From d11afdf1e30f61c17a6fec657695657c85bf939d Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 25 Jan 2022 09:30:19 +0100 Subject: [PATCH 01/20] Add ref and omp submatrix from IndexSet kernels + Some additional helpers for IndexSet class were also added --- core/base/index_set.cpp | 14 +++++ core/device_hooks/common_kernels.inc.cpp | 4 ++ core/matrix/csr.cpp | 48 ++++++++++++++++ core/matrix/csr_kernels.hpp | 24 ++++++++ cuda/matrix/csr_kernels.cu | 24 ++++++++ dpcpp/matrix/csr_kernels.dp.cpp | 24 ++++++++ hip/matrix/csr_kernels.hip.cpp | 24 ++++++++ include/ginkgo/core/base/index_set.hpp | 42 ++++++++++++++ include/ginkgo/core/matrix/csr.hpp | 5 ++ omp/matrix/csr_kernels.cpp | 71 ++++++++++++++++++++++++ omp/test/matrix/csr_kernels.cpp | 61 ++++++++++++++++++++ reference/matrix/csr_kernels.cpp | 71 ++++++++++++++++++++++++ reference/test/base/index_set.cpp | 14 +++++ reference/test/matrix/csr_kernels.cpp | 65 ++++++++++++++++++++++ 14 files changed, 491 insertions(+) diff --git a/core/base/index_set.cpp b/core/base/index_set.cpp index e95de867398..98165d943a9 100644 --- a/core/base/index_set.cpp +++ b/core/base/index_set.cpp @@ -105,6 +105,20 @@ IndexType IndexSet::get_local_index(const IndexType index) const } +template +IndexType IndexSet::get_subset_id(const IndexType index) const +{ + auto exec = this->get_executor(); + auto ss_end_host = Array(exec->get_master(), this->subsets_end_); + for (size_type id = 0; id < this->get_num_subsets(); ++id) { + if (index <= ss_end_host.get_const_data()[id]) { + return id; + } + } + return -1; +} + + template Array IndexSet::to_global_indices() const { diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index cdc7953c61d..bcc89a0a948 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -494,9 +494,13 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); template GKO_DECLARE_CSR_SCALE_KERNEL(ValueType, IndexType) diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index bfc2f408524..a6c3a2a0431 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -83,7 +84,11 @@ GKO_REGISTER_OPERATION(compute_hybrid_coo_row_ptrs, GKO_REGISTER_OPERATION(convert_to_hybrid, csr::convert_to_hybrid); GKO_REGISTER_OPERATION(calculate_nonzeros_per_row_in_span, csr::calculate_nonzeros_per_row_in_span); +GKO_REGISTER_OPERATION(calculate_nonzeros_per_row_in_index_set, + csr::calculate_nonzeros_per_row_in_index_set); GKO_REGISTER_OPERATION(compute_submatrix, csr::compute_submatrix); +GKO_REGISTER_OPERATION(compute_submatrix_from_index_set, + csr::compute_submatrix_from_index_set); GKO_REGISTER_OPERATION(transpose, csr::transpose); GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose); GKO_REGISTER_OPERATION(inv_symm_permute, csr::inv_symm_permute); @@ -612,6 +617,49 @@ Csr::create_submatrix(const gko::span& row_span, } +template +std::unique_ptr> +Csr::create_submatrix( + const IndexSet& row_index_set, + const IndexSet& col_index_set) const +{ + using Mat = Csr; + auto exec = this->get_executor(); + if (row_index_set.is_contiguous() && col_index_set.is_contiguous()) { + auto row_st = row_index_set.get_executor()->copy_val_to_host( + row_index_set.get_subsets_begin()); + auto row_end = row_index_set.get_executor()->copy_val_to_host( + row_index_set.get_subsets_end()); + auto col_st = col_index_set.get_executor()->copy_val_to_host( + col_index_set.get_subsets_begin()); + auto col_end = col_index_set.get_executor()->copy_val_to_host( + col_index_set.get_subsets_end()); + + return this->create_submatrix(span(row_st, row_end), + span(col_st, col_end)); + } else { + auto submat_num_rows = row_index_set.get_num_elems(); + auto submat_num_cols = col_index_set.get_num_elems(); + auto sub_mat_size = gko::dim<2>(submat_num_rows, submat_num_cols); + Array row_ptrs(exec, submat_num_rows + 1); + exec->run(csr::make_calculate_nonzeros_per_row_in_index_set( + this, row_index_set, col_index_set, &row_ptrs)); + exec->run( + csr::make_prefix_sum(row_ptrs.get_data(), submat_num_rows + 1)); + auto num_nnz = + exec->copy_val_to_host(row_ptrs.get_data() + sub_mat_size[0]); + auto sub_mat = Mat::create(exec, sub_mat_size, + std::move(Array(exec, num_nnz)), + std::move(Array(exec, num_nnz)), + std::move(row_ptrs), this->get_strategy()); + exec->run(csr::make_compute_submatrix_from_index_set( + this, row_index_set, col_index_set, sub_mat.get())); + sub_mat->make_srow(); + return sub_mat; + } +} + + template std::unique_ptr> Csr::extract_diagonal() const diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index c67d5e0730d..14f7f491f24 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -165,12 +166,29 @@ namespace kernels { const matrix::Csr* source, const span& row_span, \ const span& col_span, Array* row_nnz) +#define GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL(ValueType, \ + IndexType) \ + void calculate_nonzeros_per_row_in_index_set( \ + std::shared_ptr exec, \ + const matrix::Csr* source, \ + const IndexSet& row_index_set, \ + const IndexSet& col_index_set, Array* row_nnz) + #define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \ void compute_submatrix(std::shared_ptr exec, \ const matrix::Csr* source, \ gko::span row_span, gko::span col_span, \ matrix::Csr* result) +#define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL(ValueType, \ + IndexType) \ + void compute_submatrix_from_index_set( \ + std::shared_ptr exec, \ + const matrix::Csr* source, \ + const IndexSet& row_index_set, \ + const IndexSet& col_index_set, \ + matrix::Csr* result) + #define GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ void sort_by_column_index(std::shared_ptr exec, \ matrix::Csr* to_sort) @@ -247,6 +265,12 @@ namespace kernels { template \ GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType); \ template \ + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL(ValueType, \ + IndexType); \ + template \ + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL(ValueType, \ + IndexType); \ + template \ GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \ template \ GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \ diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index b1e892ebf48..7ab812b50fc 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1158,6 +1158,30 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); +template +void calculate_nonzeros_per_row_in_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, + Array* row_nnz) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); + + +template +void compute_submatrix_from_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, + matrix::Csr* result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); + + template void sort_by_column_index(std::shared_ptr exec, matrix::Csr* to_sort) diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index d475a104dbf..835ba943a26 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1385,6 +1385,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +template +void calculate_nonzeros_per_row_in_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, + Array* row_nnz) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); + + template void compute_submatrix(std::shared_ptr exec, const matrix::Csr* source, @@ -1412,6 +1424,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); +template +void compute_submatrix_from_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, + matrix::Csr* result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); + + namespace { diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index d1d980c60a5..f1d09c1a163 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -945,6 +945,30 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); +template +void calculate_nonzeros_per_row_in_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, + Array* row_nnz) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); + + +template +void compute_submatrix_from_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, + matrix::Csr* result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); + + template void sort_by_column_index(std::shared_ptr exec, matrix::Csr* to_sort) diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index 4a0c08fee72..0725ba73f22 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -100,6 +100,26 @@ class IndexSet : public EnablePolymorphicObject> { : EnablePolymorphicObject(std::move(exec)) {} + /** + * Creates an index set on the specified executor from the initializer list. + * + * @param exec the Executor where the index set data will be allocated + * @param size the maximum index the index set it allowed to hold. This + * is the size of the index space. + * @param indices the indices that the index set should hold. + * @param is_sorted a parameter that specifies if the indices array is + * sorted or not. `true` if sorted. + */ + IndexSet(std::shared_ptr executor, + std::initializer_list init_list, + const bool is_sorted = false) + : EnablePolymorphicObject(std::move(executor)), + index_space_size_(init_list.size()) + { + this->populate_subsets( + Array(this->get_executor(), init_list), is_sorted); + } + /** * Creates an index set on the specified executor and the given size * @@ -196,6 +216,28 @@ class IndexSet : public EnablePolymorphicObject> { */ index_type get_local_index(index_type global_index) const; + /** + * Return which set the global index belongs to. + * + * Consider the set idx_set = (0, 1, 2, 4, 6, 7, 8, 9). This function + * returns the subset id in the index set of the input global index. For + * example, `idx_set.get_subset_id(0) == 0` `idx_set.get_subset_id(4) + * == 1` and `idx_set.get_subset_id(6) == 2`. + * + * @note This function returns a scalar value and needs a scalar value. + * For repeated queries, it is more efficient to use the Array + * functions that take and return arrays which allow for more + * throughput. + * + * @param global_index the global index. + * + * @return the local index of the element in the index set. + * + * @warning This single entry query can have significant kernel lauch + * overheads and should be avoided if possible. + */ + index_type get_subset_id(index_type global_index) const; + /** * This is an array version of the scalar function above. * diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 14f88156b6c..9bfa9335945 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include @@ -792,6 +793,10 @@ class Csr : public EnableLinOp>, std::unique_ptr> extract_diagonal() const override; + std::unique_ptr> create_submatrix( + const gko::IndexSet& row_index_set, + const gko::IndexSet& column_index_set) const; + std::unique_ptr> create_submatrix( const gko::span& row_span, const gko::span& column_span) const; diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 53d0e7ee4e5..9344a236066 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -742,6 +742,36 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +template +void calculate_nonzeros_per_row_in_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, Array* row_nnz) +{ + size_type res_row = 0; + auto num_row_subsets = row_index_set.get_num_subsets(); + auto row_subset_begin = row_index_set.get_subsets_begin(); + auto row_subset_end = row_index_set.get_subsets_end(); + for (size_type set = 0; set < num_row_subsets; ++set) { + for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; + ++row) { + row_nnz->get_data()[res_row] = zero(); + for (size_type nnz = source->get_const_row_ptrs()[row]; + nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { + if (col_index_set.contains(source->get_const_col_idxs()[nnz])) { + row_nnz->get_data()[res_row]++; + } + } + res_row++; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); + + template void compute_submatrix(std::shared_ptr exec, const matrix::Csr* source, @@ -775,6 +805,47 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); +template +void compute_submatrix_from_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, + matrix::Csr* result) +{ + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + auto num_row_subsets = row_index_set.get_num_subsets(); + auto row_subset_begin = row_index_set.get_subsets_begin(); + auto row_subset_end = row_index_set.get_subsets_end(); + auto res_row_ptrs = result->get_row_ptrs(); + auto res_col_idxs = result->get_col_idxs(); + auto res_values = result->get_values(); + const auto src_row_ptrs = source->get_const_row_ptrs(); + const auto src_col_idxs = source->get_const_col_idxs(); + const auto src_values = source->get_const_values(); + + size_type res_nnz = 0; + for (size_type set = 0; set < num_row_subsets; ++set) { + for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; + ++row) { + for (size_type nnz = src_row_ptrs[row]; nnz < src_row_ptrs[row + 1]; + ++nnz) { + if (col_index_set.contains(src_col_idxs[nnz])) { + res_col_idxs[res_nnz] = + col_index_set.get_local_index(src_col_idxs[nnz]); + res_values[res_nnz] = src_values[nnz]; + res_nnz++; + } + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); + + template void inv_symm_permute(std::shared_ptr exec, const IndexType* perm, diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index d7a8a55bf4c..bb977241209 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -755,6 +755,48 @@ TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) } +TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + using IndexType = int; + using ValueType = double; + set_up_mat_data(); + gko::IndexSet rset{ + this->ref, {42, 7, 8, 9, 10, 22, 25, 26, 34, 35, 36, 51}}; + gko::IndexSet cset{this->ref, + {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; + gko::IndexSet drset(this->omp, rset); + gko::IndexSet dcset(this->omp, cset); + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rset.get_num_elems() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_index_set( + this->ref, this->mtx2.get(), rset, cset, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rset.get_num_elems()]; + auto drow_nnz = gko::Array(this->omp, row_nnz); + auto smat1 = Mtx::create( + this->ref, gko::dim<2>(rset.get_num_elems(), cset.get_num_elems()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), + std::move(row_nnz)); + auto sdmat1 = Mtx::create( + this->omp, gko::dim<2>(rset.get_num_elems(), cset.get_num_elems()), + std::move(gko::Array(this->omp, num_nnz)), + std::move(gko::Array(this->omp, num_nnz)), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix_from_index_set( + this->ref, this->mtx2.get(), rset, cset, smat1.get()); + gko::kernels::omp::csr::compute_submatrix_from_index_set( + this->omp, this->dmtx2.get(), drset, dcset, sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) { set_up_mat_data(); @@ -816,4 +858,23 @@ TEST_F(Csr, AddScaledIdentityToNonSquare) } +TEST_F(Csr, CreateSubMatrixFromIndexSetIsEquivalentToRef) +{ + using IndexType = int; + using ValueType = double; + set_up_mat_data(); + + gko::IndexSet rset{ + this->ref, {42, 7, 8, 9, 10, 22, 25, 26, 34, 35, 36, 51}}; + gko::IndexSet cset{this->ref, + {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; + gko::IndexSet drset(this->omp, rset); + gko::IndexSet dcset(this->omp, cset); + auto smat1 = this->mtx2->create_submatrix(rset, cset); + auto sdmat1 = this->dmtx2->create_submatrix(drset, dcset); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + } // namespace diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 2e913b128a3..9ffa78f7d45 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -623,6 +623,36 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +template +void calculate_nonzeros_per_row_in_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, Array* row_nnz) +{ + size_type res_row = 0; + auto num_row_subsets = row_index_set.get_num_subsets(); + auto row_subset_begin = row_index_set.get_subsets_begin(); + auto row_subset_end = row_index_set.get_subsets_end(); + for (size_type set = 0; set < num_row_subsets; ++set) { + for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; + ++row) { + row_nnz->get_data()[res_row] = zero(); + for (size_type nnz = source->get_const_row_ptrs()[row]; + nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { + if (col_index_set.contains(source->get_const_col_idxs()[nnz])) { + row_nnz->get_data()[res_row]++; + } + } + res_row++; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); + + template void compute_submatrix(std::shared_ptr exec, const matrix::Csr* source, @@ -657,6 +687,47 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); +template +void compute_submatrix_from_index_set( + std::shared_ptr exec, + const matrix::Csr* source, + const IndexSet& row_index_set, + const IndexSet& col_index_set, + matrix::Csr* result) +{ + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + auto num_row_subsets = row_index_set.get_num_subsets(); + auto row_subset_begin = row_index_set.get_subsets_begin(); + auto row_subset_end = row_index_set.get_subsets_end(); + auto res_row_ptrs = result->get_row_ptrs(); + auto res_col_idxs = result->get_col_idxs(); + auto res_values = result->get_values(); + const auto src_row_ptrs = source->get_const_row_ptrs(); + const auto src_col_idxs = source->get_const_col_idxs(); + const auto src_values = source->get_const_values(); + + size_type res_nnz = 0; + for (size_type set = 0; set < num_row_subsets; ++set) { + for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; + ++row) { + for (size_type nnz = src_row_ptrs[row]; nnz < src_row_ptrs[row + 1]; + ++nnz) { + if (col_index_set.contains(src_col_idxs[nnz])) { + res_col_idxs[res_nnz] = + col_index_set.get_local_index(src_col_idxs[nnz]); + res_values[res_nnz] = src_values[nnz]; + res_nnz++; + } + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, diff --git a/reference/test/base/index_set.cpp b/reference/test/base/index_set.cpp index c47ae9809a1..b82313aa0f3 100644 --- a/reference/test/base/index_set.cpp +++ b/reference/test/base/index_set.cpp @@ -321,6 +321,20 @@ TYPED_TEST(IndexSet, CanGetLocalIndex) } +TYPED_TEST(IndexSet, CanGetSubsetId) +{ + auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; + auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + + ASSERT_EQ(idx_set.get_num_elems(), 8); + EXPECT_EQ(idx_set.get_subset_id(6), 2); + EXPECT_EQ(idx_set.get_subset_id(7), 2); + EXPECT_EQ(idx_set.get_subset_id(0), 0); + EXPECT_EQ(idx_set.get_subset_id(8), 2); + EXPECT_EQ(idx_set.get_subset_id(4), 1); +} + + TYPED_TEST(IndexSet, CanDetectNonExistentIndices) { auto idx_arr = gko::Array{ diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 7341c6347fe..08b1d87e235 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1760,4 +1760,69 @@ TYPED_TEST(Csr, CanGetSubmatrix2) } +TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) +{ + using Vec = typename TestFixture::Vec; + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto mat = gko::initialize( + { + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0, -1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0, -1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 + }, + this->exec); + ASSERT_EQ(mat->get_num_stored_elements(), 23); + { + auto row_set = gko::IndexSet(this->exec, {0, 1}); + auto col_set = gko::IndexSet(this->exec, {0, 1}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = + gko::initialize({I{1.0, 3.0}, I{1.0, 0.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + + { + auto row_set = gko::IndexSet(this->exec, {1, 2, 3, 4}); + auto col_set = gko::IndexSet(this->exec, {1, 3}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = gko::initialize( + {I{0.0, 7.5}, I{3.0, 0.0}, I{-1.0, 0.0}, I{0.0, 3.5}}, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + + { + auto row_set = gko::IndexSet(this->exec, {1, 3, 4}); + auto col_set = gko::IndexSet(this->exec, {1, 3, 0}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = gko::initialize( + {I{1.0, 0.0, 7.5}, I{0.0, -1.0, 0.0}, I{1.0, 0.0, 3.5}}, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + + { + auto row_set = gko::IndexSet(this->exec, {1, 4, 5, 6}); + auto col_set = gko::IndexSet(this->exec, {4, 3, 0, 1}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = gko::initialize({I{1.0, 0.0, 7.5, 3.0}, // 1 + I{1.0, 0.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 7.5, 1.0}}, // 6 + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } +} + + } // namespace From 7230a1d91a9fcdf35d5b8f03c7cabd39b7c187dd Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 27 Jan 2022 16:54:00 +0100 Subject: [PATCH 02/20] Reorganize index-set kernels --- core/base/index_set.cpp | 21 ++++--- core/base/index_set_kernels.hpp | 49 +++++++-------- cuda/base/index_set_kernels.cpp | 29 ++++----- dpcpp/base/index_set_kernels.dp.cpp | 29 ++++----- hip/base/index_set_kernels.hip.cpp | 29 ++++----- omp/base/index_set_kernels.cpp | 88 ++++++++++++--------------- omp/test/base/index_set.cpp | 28 ++++++--- reference/base/index_set_kernels.cpp | 89 ++++++++++++---------------- 8 files changed, 179 insertions(+), 183 deletions(-) diff --git a/core/base/index_set.cpp b/core/base/index_set.cpp index 98165d943a9..dbc31c77cfe 100644 --- a/core/base/index_set.cpp +++ b/core/base/index_set.cpp @@ -128,8 +128,9 @@ Array IndexSet::to_global_indices() const this->superset_cumulative_indices_.get_num_elems() - 1); auto decomp_indices = gko::Array(exec, num_elems); exec->run(index_set::make_to_global_indices( - this->index_space_size_, &this->subsets_begin_, &this->subsets_end_, - &this->superset_cumulative_indices_, &decomp_indices)); + this->index_space_size_, this->get_num_subsets(), + this->get_subsets_begin(), this->get_subsets_end(), + this->get_superset_indices(), decomp_indices.get_data())); return decomp_indices; } @@ -145,9 +146,11 @@ Array IndexSet::map_local_to_global( GKO_ASSERT(this->get_num_subsets() >= 1); exec->run(index_set::make_local_to_global( - this->index_space_size_, &this->subsets_begin_, &this->subsets_end_, - &this->superset_cumulative_indices_, &local_indices, &global_indices, - is_sorted)); + this->index_space_size_, this->get_num_subsets(), + this->get_subsets_begin(), this->get_subsets_end(), + this->get_superset_indices(), + static_cast(local_indices.get_num_elems()), + local_indices.get_const_data(), global_indices.get_data(), is_sorted)); return global_indices; } @@ -162,9 +165,11 @@ Array IndexSet::map_global_to_local( GKO_ASSERT(this->get_num_subsets() >= 1); exec->run(index_set::make_global_to_local( - this->index_space_size_, &this->subsets_begin_, &this->subsets_end_, - &this->superset_cumulative_indices_, &global_indices, &local_indices, - is_sorted)); + this->index_space_size_, this->get_num_subsets(), + this->get_subsets_begin(), this->get_subsets_end(), + this->get_superset_indices(), + static_cast(local_indices.get_num_elems()), + global_indices.get_const_data(), local_indices.get_data(), is_sorted)); return local_indices; } diff --git a/core/base/index_set_kernels.hpp b/core/base/index_set_kernels.hpp index c40fb567a02..4ae214c6e4b 100644 --- a/core/base/index_set_kernels.hpp +++ b/core/base/index_set_kernels.hpp @@ -52,13 +52,12 @@ namespace kernels { const Array* local_indices, \ Array* validity_array) -#define GKO_DECLARE_INDEX_SET_TO_GLOBAL_INDICES_KERNEL(IndexType) \ - void to_global_indices(std::shared_ptr exec, \ - const IndexType index_space_size, \ - const Array* subset_begin, \ - const Array* subset_end, \ - const Array* superset_indices, \ - Array* decomp_indices) +#define GKO_DECLARE_INDEX_SET_TO_GLOBAL_INDICES_KERNEL(IndexType) \ + void to_global_indices( \ + std::shared_ptr exec, \ + const IndexType index_space_size, const IndexType num_subsets, \ + const IndexType* subset_begin, const IndexType* subset_end, \ + const IndexType* superset_indices, IndexType* decomp_indices) #define GKO_DECLARE_INDEX_SET_POPULATE_KERNEL(IndexType) \ void populate_subsets( \ @@ -67,25 +66,23 @@ namespace kernels { Array* subset_begin, Array* subset_end, \ Array* superset_indices, const bool is_sorted) -#define GKO_DECLARE_INDEX_SET_GLOBAL_TO_LOCAL_KERNEL(IndexType) \ - void global_to_local(std::shared_ptr exec, \ - const IndexType index_space_size, \ - const Array* subset_begin, \ - const Array* subset_end, \ - const Array* superset_indices, \ - const Array* global_indices, \ - Array* local_indices, \ - const bool is_sorted) - -#define GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL(IndexType) \ - void local_to_global(std::shared_ptr exec, \ - const IndexType index_space_size, \ - const Array* subset_begin, \ - const Array* subset_end, \ - const Array* superset_indices, \ - const Array* local_indices, \ - Array* global_indices, \ - const bool is_sorted) +#define GKO_DECLARE_INDEX_SET_GLOBAL_TO_LOCAL_KERNEL(IndexType) \ + void global_to_local( \ + std::shared_ptr exec, \ + const IndexType index_space_size, const IndexType num_subsets, \ + const IndexType* subset_begin, const IndexType* subset_end, \ + const IndexType* superset_indices, const IndexType num_indices, \ + const IndexType* global_indices, IndexType* local_indices, \ + const bool is_sorted) + +#define GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL(IndexType) \ + void local_to_global( \ + std::shared_ptr exec, \ + const IndexType index_space_size, const IndexType num_subsets, \ + const IndexType* subset_begin, const IndexType* subset_end, \ + const IndexType* superset_indices, const IndexType num_indices, \ + const IndexType* local_indices, IndexType* global_indices, \ + const bool is_sorted) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/cuda/base/index_set_kernels.cpp b/cuda/base/index_set_kernels.cpp index 3dcfd2a8140..32e0e433586 100644 --- a/cuda/base/index_set_kernels.cpp +++ b/cuda/base/index_set_kernels.cpp @@ -60,10 +60,11 @@ namespace index_set { template void to_global_indices(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - Array* decomp_indices) GKO_NOT_IMPLEMENTED; + const IndexType num_subsets, + const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + IndexType* decomp_indices) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_TO_GLOBAL_INDICES_KERNEL); @@ -84,11 +85,11 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET_POPULATE_KERNEL); template void global_to_local(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* global_indices, - Array* local_indices, + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* global_indices, IndexType* local_indices, const bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( @@ -98,11 +99,11 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* local_indices, - Array* global_indices, + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* local_indices, IndexType* global_indices, const bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( diff --git a/dpcpp/base/index_set_kernels.dp.cpp b/dpcpp/base/index_set_kernels.dp.cpp index 446a062b3fc..3b70ac349ed 100644 --- a/dpcpp/base/index_set_kernels.dp.cpp +++ b/dpcpp/base/index_set_kernels.dp.cpp @@ -60,10 +60,11 @@ namespace index_set { template void to_global_indices(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - Array* decomp_indices) GKO_NOT_IMPLEMENTED; + const IndexType num_subsets, + const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + IndexType* decomp_indices) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_TO_GLOBAL_INDICES_KERNEL); @@ -84,11 +85,11 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET_POPULATE_KERNEL); template void global_to_local(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* global_indices, - Array* local_indices, + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* global_indices, IndexType* local_indices, const bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( @@ -98,11 +99,11 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* local_indices, - Array* global_indices, + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* local_indices, IndexType* global_indices, const bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( diff --git a/hip/base/index_set_kernels.hip.cpp b/hip/base/index_set_kernels.hip.cpp index 1eb041b921e..b528c02b297 100644 --- a/hip/base/index_set_kernels.hip.cpp +++ b/hip/base/index_set_kernels.hip.cpp @@ -60,10 +60,11 @@ namespace index_set { template void to_global_indices(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - Array* decomp_indices) GKO_NOT_IMPLEMENTED; + const IndexType num_subsets, + const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + IndexType* decomp_indices) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_TO_GLOBAL_INDICES_KERNEL); @@ -84,11 +85,11 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET_POPULATE_KERNEL); template void global_to_local(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* global_indices, - Array* local_indices, + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* global_indices, IndexType* local_indices, const bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( @@ -98,11 +99,11 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* local_indices, - Array* global_indices, + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* local_indices, IndexType* global_indices, const bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index 173e759b831..c8f0da0e735 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -64,20 +64,18 @@ namespace index_set { template void to_global_indices(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - Array* decomp_indices) + const IndexType num_subsets, + const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + IndexType* decomp_indices) { - auto indices = decomp_indices->get_data(); - auto num_subsets = superset_indices->get_num_elems() - 1; - auto ss_indices = superset_indices->get_const_data(); #pragma omp parallel for for (size_type subset = 0; subset < num_subsets; ++subset) { - for (size_type i = 0; i < ss_indices[subset + 1] - ss_indices[subset]; - ++i) { - indices[ss_indices[subset] + i] = - subset_begin->get_const_data()[subset] + i; + for (size_type i = 0; + i < superset_indices[subset + 1] - superset_indices[subset]; ++i) { + decomp_indices[superset_indices[subset] + i] = + subset_begin[subset] + i; } } } @@ -148,29 +146,26 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET_POPULATE_KERNEL); template void global_to_local(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* global_indices, - Array* local_indices, const bool is_sorted) + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* global_indices, IndexType* local_indices, + const bool is_sorted) { #pragma omp parallel for - for (size_type i = 0; i < global_indices->get_num_elems(); ++i) { - auto index = global_indices->get_const_data()[i]; + for (size_type i = 0; i < num_indices; ++i) { + auto index = global_indices[i]; GKO_ASSERT(index < index_space_size); - auto bucket = - std::distance(subset_begin->get_const_data(), - std::upper_bound(subset_begin->get_const_data(), - subset_begin->get_const_data() + - subset_begin->get_num_elems(), - index)); + const auto bucket = std::distance( + subset_begin, + std::upper_bound(subset_begin, subset_begin + num_subsets, index)); auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (subset_end->get_const_data()[shifted_bucket] <= index) { - local_indices->get_data()[i] = invalid_index(); + if (subset_end[shifted_bucket] <= index) { + local_indices[i] = invalid_index(); } else { - local_indices->get_data()[i] = - index - subset_begin->get_const_data()[shifted_bucket] + - superset_indices->get_const_data()[shifted_bucket]; + local_indices[i] = index - subset_begin[shifted_bucket] + + superset_indices[shifted_bucket]; } } } @@ -182,29 +177,24 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* local_indices, - Array* global_indices, const bool is_sorted) + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* local_indices, IndexType* global_indices, + const bool is_sorted) { #pragma omp parallel for - for (size_type i = 0; i < local_indices->get_num_elems(); ++i) { - auto index = local_indices->get_const_data()[i]; - GKO_ASSERT( - index <= - (superset_indices - ->get_const_data()[superset_indices->get_num_elems() - 1])); - auto bucket = std::distance( - superset_indices->get_const_data(), - std::upper_bound(superset_indices->get_const_data(), - superset_indices->get_const_data() + - superset_indices->get_num_elems(), - index)); + for (size_type i = 0; i < num_indices; ++i) { + auto index = local_indices[i]; + GKO_ASSERT(index <= (superset_indices[num_subsets])); + const auto bucket = std::distance( + superset_indices, + std::upper_bound(superset_indices, + superset_indices + num_subsets + 1, index)); auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - global_indices->get_data()[i] = - subset_begin->get_const_data()[shifted_bucket] + index - - superset_indices->get_const_data()[shifted_bucket]; + global_indices[i] = subset_begin[shifted_bucket] + index - + superset_indices[shifted_bucket]; } } diff --git a/omp/test/base/index_set.cpp b/omp/test/base/index_set.cpp index e0585a4c1b1..f18a4f60487 100644 --- a/omp/test/base/index_set.cpp +++ b/omp/test/base/index_set.cpp @@ -188,13 +188,19 @@ TYPED_TEST(IndexSet, GetGlobalIndicesIsEquivalentToReference) auto ref_local_arr = gko::Array{this->ref, rand_global_arr.get_num_elems()}; gko::kernels::reference::index_set::global_to_local( - this->ref, TypeParam(520), &ref_begin_comp, &ref_end_comp, - &ref_superset_comp, &rand_global_arr, &ref_local_arr, false); + this->ref, TypeParam(520), ref_idx_set.get_num_subsets(), + ref_idx_set.get_subsets_begin(), ref_idx_set.get_subsets_end(), + ref_idx_set.get_superset_indices(), + static_cast(rand_global_arr.get_num_elems()), + rand_global_arr.get_const_data(), ref_local_arr.get_data(), false); auto omp_local_arr = gko::Array{this->omp, rand_global_arr.get_num_elems()}; gko::kernels::omp::index_set::global_to_local( - this->omp, TypeParam(520), &omp_begin_comp, &omp_end_comp, - &omp_superset_comp, &rand_global_arr, &omp_local_arr, false); + this->omp, TypeParam(520), omp_idx_set.get_num_subsets(), + omp_idx_set.get_subsets_begin(), omp_idx_set.get_subsets_end(), + omp_idx_set.get_superset_indices(), + static_cast(rand_global_arr.get_num_elems()), + rand_global_arr.get_const_data(), omp_local_arr.get_data(), false); ASSERT_EQ(rand_global_arr.get_num_elems(), omp_local_arr.get_num_elems()); GKO_ASSERT_ARRAY_EQ(ref_local_arr, omp_local_arr); @@ -229,13 +235,19 @@ TYPED_TEST(IndexSet, GetLocalIndicesIsEquivalentToReference) auto ref_global_arr = gko::Array{this->ref, rand_local_arr.get_num_elems()}; gko::kernels::reference::index_set::local_to_global( - this->ref, TypeParam(520), &ref_begin_comp, &ref_end_comp, - &ref_superset_comp, &rand_local_arr, &ref_global_arr, false); + this->ref, TypeParam(520), ref_idx_set.get_num_subsets(), + ref_idx_set.get_subsets_begin(), ref_idx_set.get_subsets_end(), + ref_idx_set.get_superset_indices(), + static_cast(rand_local_arr.get_num_elems()), + rand_local_arr.get_const_data(), ref_global_arr.get_data(), false); auto omp_global_arr = gko::Array{this->omp, rand_local_arr.get_num_elems()}; gko::kernels::omp::index_set::local_to_global( - this->omp, TypeParam(520), &omp_begin_comp, &omp_end_comp, - &omp_superset_comp, &rand_local_arr, &omp_global_arr, false); + this->omp, TypeParam(520), omp_idx_set.get_num_subsets(), + omp_idx_set.get_subsets_begin(), omp_idx_set.get_subsets_end(), + omp_idx_set.get_superset_indices(), + static_cast(rand_local_arr.get_num_elems()), + rand_local_arr.get_const_data(), omp_global_arr.get_data(), false); ASSERT_EQ(rand_local_arr.get_num_elems(), omp_global_arr.get_num_elems()); GKO_ASSERT_ARRAY_EQ(ref_global_arr, omp_global_arr); diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index a26421d88e0..d480fc7e290 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -82,19 +82,17 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void to_global_indices(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - Array* decomp_indices) + const IndexType num_subsets, + const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + IndexType* decomp_indices) { - auto indices = decomp_indices->get_data(); - auto num_subsets = superset_indices->get_num_elems() - 1; - auto ss_indices = superset_indices->get_const_data(); for (size_type subset = 0; subset < num_subsets; ++subset) { - for (size_type i = 0; i < ss_indices[subset + 1] - ss_indices[subset]; - ++i) { - indices[ss_indices[subset] + i] = - subset_begin->get_const_data()[subset] + i; + for (size_type i = 0; + i < superset_indices[subset + 1] - superset_indices[subset]; ++i) { + decomp_indices[superset_indices[subset] + i] = + subset_begin[subset] + i; } } } @@ -165,37 +163,34 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET_POPULATE_KERNEL); template void global_to_local(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* global_indices, - Array* local_indices, const bool is_sorted) + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* global_indices, IndexType* local_indices, + const bool is_sorted) { IndexType shifted_bucket = 0; // Loop over all the query indices. - for (size_type i = 0; i < global_indices->get_num_elems(); ++i) { + for (size_type i = 0; i < num_indices; ++i) { // If the query indices are sorted, then we dont need to search in the // entire set, but can search only in the successive complement set of // the previous search if (!is_sorted) { shifted_bucket = 0; } - auto index = global_indices->get_const_data()[i]; + auto index = global_indices[i]; GKO_ASSERT(index < index_space_size); - auto shifted_subset = &subset_begin->get_const_data()[shifted_bucket]; - auto bucket = - std::distance(subset_begin->get_const_data(), - std::upper_bound(shifted_subset, - subset_begin->get_const_data() + - subset_begin->get_num_elems(), - index)); + const auto shifted_subset = &subset_begin[shifted_bucket]; + auto bucket = std::distance( + subset_begin, std::upper_bound(shifted_subset, + subset_begin + num_subsets, index)); shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (subset_end->get_const_data()[shifted_bucket] <= index) { - local_indices->get_data()[i] = invalid_index(); + if (subset_end[shifted_bucket] <= index) { + local_indices[i] = invalid_index(); } else { - local_indices->get_data()[i] = - index - subset_begin->get_const_data()[shifted_bucket] + - superset_indices->get_const_data()[shifted_bucket]; + local_indices[i] = index - subset_begin[shifted_bucket] + + superset_indices[shifted_bucket]; } } } @@ -207,37 +202,31 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType index_space_size, - const Array* subset_begin, - const Array* subset_end, - const Array* superset_indices, - const Array* local_indices, - Array* global_indices, const bool is_sorted) + const IndexType num_subsets, const IndexType* subset_begin, + const IndexType* subset_end, + const IndexType* superset_indices, + const IndexType num_indices, + const IndexType* local_indices, IndexType* global_indices, + const bool is_sorted) { IndexType shifted_bucket = 0; - for (size_type i = 0; i < local_indices->get_num_elems(); ++i) { + for (size_type i = 0; i < num_indices; ++i) { // If the query indices are sorted, then we dont need to search in the // entire set, but can search only in the successive complement set of // the previous search if (!is_sorted) { shifted_bucket = 0; } - auto index = local_indices->get_const_data()[i]; - GKO_ASSERT( - index <= - (superset_indices - ->get_const_data()[superset_indices->get_num_elems() - 1])); - auto shifted_superset = - &superset_indices->get_const_data()[shifted_bucket]; + auto index = local_indices[i]; + GKO_ASSERT(index <= (superset_indices[num_subsets])); + const auto shifted_superset = &superset_indices[shifted_bucket]; auto bucket = std::distance( - superset_indices->get_const_data(), + superset_indices, std::upper_bound(shifted_superset, - superset_indices->get_const_data() + - superset_indices->get_num_elems(), - index)); + superset_indices + num_subsets + 1, index)); shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - global_indices->get_data()[i] = - subset_begin->get_const_data()[shifted_bucket] + index - - superset_indices->get_const_data()[shifted_bucket]; + global_indices[i] = subset_begin[shifted_bucket] + index - + superset_indices[shifted_bucket]; } } From 0b2183cd94577578356add0378ebaa9df5f4a397 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 28 Jan 2022 13:44:01 +0100 Subject: [PATCH 03/20] Workaround for calling core funcs from kernels --- omp/matrix/csr_kernels.cpp | 63 ++++++++++++++++++++++++++++++-- reference/matrix/csr_kernels.cpp | 63 ++++++++++++++++++++++++++++++-- 2 files changed, 118 insertions(+), 8 deletions(-) diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 9344a236066..4a5b7fb9bd7 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include @@ -51,6 +52,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/allocator.hpp" +#include "core/base/index_set_kernels.hpp" #include "core/base/iterator_factory.hpp" #include "core/base/utils.hpp" #include "core/components/fill_array_kernels.hpp" @@ -742,6 +744,55 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +// TODO: FIXME +namespace index_set { + + +template +Array map_global_to_local(const IndexSet& index_set, + const Array& global_indices, + const bool is_sorted) +{ + auto exec = index_set.get_executor(); + auto local_indices = + gko::Array(exec, global_indices.get_num_elems()); + + GKO_ASSERT(index_set.get_num_subsets() >= 1); + gko::kernels::omp::index_set::global_to_local( + as(exec), index_set.get_size(), + index_set.get_num_subsets(), index_set.get_subsets_begin(), + index_set.get_subsets_end(), index_set.get_superset_indices(), + static_cast(local_indices.get_num_elems()), + global_indices.get_const_data(), local_indices.get_data(), is_sorted); + return local_indices; +} + + +template +IndexType get_local_index(const IndexSet& index_set, + const IndexType index) +{ + auto exec = index_set.get_executor(); + const auto global_idx = + Array(exec, std::initializer_list{index}); + auto local_idx = Array( + exec, index_set::map_global_to_local(index_set, global_idx, true)); + + return exec->copy_val_to_host(local_idx.get_data()); +} + + +template +bool contains(const IndexSet& index_set, const IndexType input_index) +{ + auto local_index = index_set::get_local_index(index_set, input_index); + return local_index != invalid_index(); +} + + +} // namespace index_set + + template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, @@ -759,7 +810,8 @@ void calculate_nonzeros_per_row_in_index_set( row_nnz->get_data()[res_row] = zero(); for (size_type nnz = source->get_const_row_ptrs()[row]; nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { - if (col_index_set.contains(source->get_const_col_idxs()[nnz])) { + if (index_set::contains(col_index_set, + source->get_const_col_idxs()[nnz])) { row_nnz->get_data()[res_row]++; } } @@ -829,15 +881,18 @@ void compute_submatrix_from_index_set( for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { + auto local_map = std::vector( + src_row_ptrs[row + 1] - src_row_ptrs[row], 0); for (size_type nnz = src_row_ptrs[row]; nnz < src_row_ptrs[row + 1]; ++nnz) { - if (col_index_set.contains(src_col_idxs[nnz])) { - res_col_idxs[res_nnz] = - col_index_set.get_local_index(src_col_idxs[nnz]); + if (index_set::contains(col_index_set, src_col_idxs[nnz])) { + res_col_idxs[res_nnz] = index_set::get_local_index( + col_index_set, src_col_idxs[nnz]); res_values[res_nnz] = src_values[nnz]; res_nnz++; } } + // res_nnz = res_row_ptrs[row_index_set.get_local_index(row)]; } } } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 9ffa78f7d45..c511df62ab1 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include @@ -50,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/allocator.hpp" +#include "core/base/index_set_kernels.hpp" #include "core/base/iterator_factory.hpp" #include "core/components/fill_array_kernels.hpp" #include "core/components/format_conversion_kernels.hpp" @@ -623,6 +625,55 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +// TODO: FIXME +namespace index_set { + + +template +Array map_global_to_local(const IndexSet& index_set, + const Array& global_indices, + const bool is_sorted) +{ + auto exec = index_set.get_executor(); + auto local_indices = + gko::Array(exec, global_indices.get_num_elems()); + + GKO_ASSERT(index_set.get_num_subsets() >= 1); + gko::kernels::reference::index_set::global_to_local( + as(exec), index_set.get_size(), + index_set.get_num_subsets(), index_set.get_subsets_begin(), + index_set.get_subsets_end(), index_set.get_superset_indices(), + static_cast(local_indices.get_num_elems()), + global_indices.get_const_data(), local_indices.get_data(), is_sorted); + return local_indices; +} + + +template +IndexType get_local_index(const IndexSet& index_set, + const IndexType index) +{ + auto exec = index_set.get_executor(); + const auto global_idx = + Array(exec, std::initializer_list{index}); + auto local_idx = Array( + exec, index_set::map_global_to_local(index_set, global_idx, true)); + + return exec->copy_val_to_host(local_idx.get_data()); +} + + +template +bool contains(const IndexSet& index_set, const IndexType input_index) +{ + auto local_index = index_set::get_local_index(index_set, input_index); + return local_index != invalid_index(); +} + + +} // namespace index_set + + template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, @@ -640,7 +691,8 @@ void calculate_nonzeros_per_row_in_index_set( row_nnz->get_data()[res_row] = zero(); for (size_type nnz = source->get_const_row_ptrs()[row]; nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { - if (col_index_set.contains(source->get_const_col_idxs()[nnz])) { + if (index_set::contains(col_index_set, + source->get_const_col_idxs()[nnz])) { row_nnz->get_data()[res_row]++; } } @@ -711,15 +763,18 @@ void compute_submatrix_from_index_set( for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { + auto local_map = std::vector( + src_row_ptrs[row + 1] - src_row_ptrs[row], 0); for (size_type nnz = src_row_ptrs[row]; nnz < src_row_ptrs[row + 1]; ++nnz) { - if (col_index_set.contains(src_col_idxs[nnz])) { - res_col_idxs[res_nnz] = - col_index_set.get_local_index(src_col_idxs[nnz]); + if (index_set::contains(col_index_set, src_col_idxs[nnz])) { + res_col_idxs[res_nnz] = index_set::get_local_index( + col_index_set, src_col_idxs[nnz]); res_values[res_nnz] = src_values[nnz]; res_nnz++; } } + // res_nnz = res_row_ptrs[row_index_set.get_local_index(row)]; } } } From bb3785455947ffc7109d97ee8a58ffab388bd0a2 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 28 Jan 2022 17:11:46 +0100 Subject: [PATCH 04/20] Fix for init_list space size detection --- include/ginkgo/core/base/index_set.hpp | 4 ++- omp/matrix/csr_kernels.cpp | 40 +++++++++++++++----------- reference/matrix/csr_kernels.cpp | 40 +++++++++++++++----------- 3 files changed, 51 insertions(+), 33 deletions(-) diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index 0725ba73f22..cc38df5f5f2 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -114,7 +114,9 @@ class IndexSet : public EnablePolymorphicObject> { std::initializer_list init_list, const bool is_sorted = false) : EnablePolymorphicObject(std::move(executor)), - index_space_size_(init_list.size()) + index_space_size_( + *(std::max_element(std::begin(init_list), std::end(init_list))) + + 1) { this->populate_subsets( Array(this->get_executor(), init_list), is_sorted); diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 4a5b7fb9bd7..62c191b5c53 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -749,19 +749,19 @@ namespace index_set { template -Array map_global_to_local(const IndexSet& index_set, - const Array& global_indices, - const bool is_sorted) +Array map_global_to_local( + std::shared_ptr exec, + const IndexSet& index_set, + const Array& global_indices, const bool is_sorted) { - auto exec = index_set.get_executor(); auto local_indices = gko::Array(exec, global_indices.get_num_elems()); GKO_ASSERT(index_set.get_num_subsets() >= 1); gko::kernels::omp::index_set::global_to_local( - as(exec), index_set.get_size(), - index_set.get_num_subsets(), index_set.get_subsets_begin(), - index_set.get_subsets_end(), index_set.get_superset_indices(), + exec, index_set.get_size(), index_set.get_num_subsets(), + index_set.get_subsets_begin(), index_set.get_subsets_end(), + index_set.get_superset_indices(), static_cast(local_indices.get_num_elems()), global_indices.get_const_data(), local_indices.get_data(), is_sorted); return local_indices; @@ -769,24 +769,31 @@ Array map_global_to_local(const IndexSet& index_set, template -IndexType get_local_index(const IndexSet& index_set, +IndexType get_local_index(std::shared_ptr exec, + const IndexSet& index_set, const IndexType index) { - auto exec = index_set.get_executor(); const auto global_idx = Array(exec, std::initializer_list{index}); auto local_idx = Array( - exec, index_set::map_global_to_local(index_set, global_idx, true)); + exec, + index_set::map_global_to_local(exec, index_set, global_idx, true)); return exec->copy_val_to_host(local_idx.get_data()); } template -bool contains(const IndexSet& index_set, const IndexType input_index) +bool contains(std::shared_ptr exec, + const IndexSet& index_set, const IndexType input_index) { - auto local_index = index_set::get_local_index(index_set, input_index); - return local_index != invalid_index(); + if (input_index >= index_set.get_size()) { + return false; + } else { + auto local_index = + index_set::get_local_index(exec, index_set, input_index); + return local_index != invalid_index(); + } } @@ -810,7 +817,7 @@ void calculate_nonzeros_per_row_in_index_set( row_nnz->get_data()[res_row] = zero(); for (size_type nnz = source->get_const_row_ptrs()[row]; nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { - if (index_set::contains(col_index_set, + if (index_set::contains(exec, col_index_set, source->get_const_col_idxs()[nnz])) { row_nnz->get_data()[res_row]++; } @@ -885,9 +892,10 @@ void compute_submatrix_from_index_set( src_row_ptrs[row + 1] - src_row_ptrs[row], 0); for (size_type nnz = src_row_ptrs[row]; nnz < src_row_ptrs[row + 1]; ++nnz) { - if (index_set::contains(col_index_set, src_col_idxs[nnz])) { + if (index_set::contains(exec, col_index_set, + src_col_idxs[nnz])) { res_col_idxs[res_nnz] = index_set::get_local_index( - col_index_set, src_col_idxs[nnz]); + exec, col_index_set, src_col_idxs[nnz]); res_values[res_nnz] = src_values[nnz]; res_nnz++; } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index c511df62ab1..e5cca85af04 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -630,19 +630,19 @@ namespace index_set { template -Array map_global_to_local(const IndexSet& index_set, - const Array& global_indices, - const bool is_sorted) +Array map_global_to_local( + std::shared_ptr exec, + const IndexSet& index_set, + const Array& global_indices, const bool is_sorted) { - auto exec = index_set.get_executor(); auto local_indices = gko::Array(exec, global_indices.get_num_elems()); GKO_ASSERT(index_set.get_num_subsets() >= 1); gko::kernels::reference::index_set::global_to_local( - as(exec), index_set.get_size(), - index_set.get_num_subsets(), index_set.get_subsets_begin(), - index_set.get_subsets_end(), index_set.get_superset_indices(), + exec, index_set.get_size(), index_set.get_num_subsets(), + index_set.get_subsets_begin(), index_set.get_subsets_end(), + index_set.get_superset_indices(), static_cast(local_indices.get_num_elems()), global_indices.get_const_data(), local_indices.get_data(), is_sorted); return local_indices; @@ -650,24 +650,31 @@ Array map_global_to_local(const IndexSet& index_set, template -IndexType get_local_index(const IndexSet& index_set, +IndexType get_local_index(std::shared_ptr exec, + const IndexSet& index_set, const IndexType index) { - auto exec = index_set.get_executor(); const auto global_idx = Array(exec, std::initializer_list{index}); auto local_idx = Array( - exec, index_set::map_global_to_local(index_set, global_idx, true)); + exec, + index_set::map_global_to_local(exec, index_set, global_idx, true)); return exec->copy_val_to_host(local_idx.get_data()); } template -bool contains(const IndexSet& index_set, const IndexType input_index) +bool contains(std::shared_ptr exec, + const IndexSet& index_set, const IndexType input_index) { - auto local_index = index_set::get_local_index(index_set, input_index); - return local_index != invalid_index(); + if (input_index >= index_set.get_size()) { + return false; + } else { + auto local_index = + index_set::get_local_index(exec, index_set, input_index); + return local_index != invalid_index(); + } } @@ -691,7 +698,7 @@ void calculate_nonzeros_per_row_in_index_set( row_nnz->get_data()[res_row] = zero(); for (size_type nnz = source->get_const_row_ptrs()[row]; nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { - if (index_set::contains(col_index_set, + if (index_set::contains(exec, col_index_set, source->get_const_col_idxs()[nnz])) { row_nnz->get_data()[res_row]++; } @@ -767,9 +774,10 @@ void compute_submatrix_from_index_set( src_row_ptrs[row + 1] - src_row_ptrs[row], 0); for (size_type nnz = src_row_ptrs[row]; nnz < src_row_ptrs[row + 1]; ++nnz) { - if (index_set::contains(col_index_set, src_col_idxs[nnz])) { + if (index_set::contains(exec, col_index_set, + src_col_idxs[nnz])) { res_col_idxs[res_nnz] = index_set::get_local_index( - col_index_set, src_col_idxs[nnz]); + exec, col_index_set, src_col_idxs[nnz]); res_values[res_nnz] = src_values[nnz]; res_nnz++; } From 9f90b747ae396f4a472b9a9a7afc7c4659ce0a00 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 15 Feb 2022 18:20:14 +0100 Subject: [PATCH 05/20] Allow index gt index_space_size --- omp/base/index_set_kernels.cpp | 5 ++++- reference/base/index_set_kernels.cpp | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index c8f0da0e735..fd9b4739432 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -156,7 +156,10 @@ void global_to_local(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < num_indices; ++i) { auto index = global_indices[i]; - GKO_ASSERT(index < index_space_size); + if (index > index_space_size) { + local_indices[i] = invalid_index(); + continue; + } const auto bucket = std::distance( subset_begin, std::upper_bound(subset_begin, subset_begin + num_subsets, index)); diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index d480fc7e290..13f091e681b 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -180,7 +180,10 @@ void global_to_local(std::shared_ptr exec, shifted_bucket = 0; } auto index = global_indices[i]; - GKO_ASSERT(index < index_space_size); + if (index > index_space_size) { + local_indices[i] = invalid_index(); + continue; + } const auto shifted_subset = &subset_begin[shifted_bucket]; auto bucket = std::distance( subset_begin, std::upper_bound(shifted_subset, From 851026ac412bc676604ba424742fdf86005a10cb Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 15 Feb 2022 18:20:36 +0100 Subject: [PATCH 06/20] Remove workaround and call kernels directly --- omp/matrix/csr_kernels.cpp | 103 ++++++++++--------------------- reference/matrix/csr_kernels.cpp | 103 ++++++++++--------------------- 2 files changed, 66 insertions(+), 140 deletions(-) diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 62c191b5c53..0ab8abdfbfc 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -744,62 +744,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); -// TODO: FIXME -namespace index_set { - - -template -Array map_global_to_local( - std::shared_ptr exec, - const IndexSet& index_set, - const Array& global_indices, const bool is_sorted) -{ - auto local_indices = - gko::Array(exec, global_indices.get_num_elems()); - - GKO_ASSERT(index_set.get_num_subsets() >= 1); - gko::kernels::omp::index_set::global_to_local( - exec, index_set.get_size(), index_set.get_num_subsets(), - index_set.get_subsets_begin(), index_set.get_subsets_end(), - index_set.get_superset_indices(), - static_cast(local_indices.get_num_elems()), - global_indices.get_const_data(), local_indices.get_data(), is_sorted); - return local_indices; -} - - -template -IndexType get_local_index(std::shared_ptr exec, - const IndexSet& index_set, - const IndexType index) -{ - const auto global_idx = - Array(exec, std::initializer_list{index}); - auto local_idx = Array( - exec, - index_set::map_global_to_local(exec, index_set, global_idx, true)); - - return exec->copy_val_to_host(local_idx.get_data()); -} - - -template -bool contains(std::shared_ptr exec, - const IndexSet& index_set, const IndexType input_index) -{ - if (input_index >= index_set.get_size()) { - return false; - } else { - auto local_index = - index_set::get_local_index(exec, index_set, input_index); - return local_index != invalid_index(); - } -} - - -} // namespace index_set - - template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, @@ -811,14 +755,26 @@ void calculate_nonzeros_per_row_in_index_set( auto num_row_subsets = row_index_set.get_num_subsets(); auto row_subset_begin = row_index_set.get_subsets_begin(); auto row_subset_end = row_index_set.get_subsets_end(); + auto src_ptrs = source->get_const_row_ptrs(); for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { row_nnz->get_data()[res_row] = zero(); - for (size_type nnz = source->get_const_row_ptrs()[row]; - nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { - if (index_set::contains(exec, col_index_set, - source->get_const_col_idxs()[nnz])) { + Array l_idxs( + exec, + static_cast(src_ptrs[row + 1] - src_ptrs[row])); + gko::kernels::omp::index_set::global_to_local( + exec, col_index_set.get_size(), col_index_set.get_num_subsets(), + col_index_set.get_subsets_begin(), + col_index_set.get_subsets_end(), + col_index_set.get_superset_indices(), + static_cast(l_idxs.get_num_elems()), + source->get_const_col_idxs() + src_ptrs[row], l_idxs.get_data(), + false); + for (size_type nnz = 0; nnz < (src_ptrs[row + 1] - src_ptrs[row]); + ++nnz) { + auto l_idx = l_idxs.get_const_data()[nnz]; + if (l_idx != invalid_index()) { row_nnz->get_data()[res_row]++; } } @@ -888,19 +844,26 @@ void compute_submatrix_from_index_set( for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - auto local_map = std::vector( - src_row_ptrs[row + 1] - src_row_ptrs[row], 0); - for (size_type nnz = src_row_ptrs[row]; nnz < src_row_ptrs[row + 1]; - ++nnz) { - if (index_set::contains(exec, col_index_set, - src_col_idxs[nnz])) { - res_col_idxs[res_nnz] = index_set::get_local_index( - exec, col_index_set, src_col_idxs[nnz]); - res_values[res_nnz] = src_values[nnz]; + Array l_idxs( + exec, static_cast(src_row_ptrs[row + 1] - + src_row_ptrs[row])); + gko::kernels::omp::index_set::global_to_local( + exec, col_index_set.get_size(), col_index_set.get_num_subsets(), + col_index_set.get_subsets_begin(), + col_index_set.get_subsets_end(), + col_index_set.get_superset_indices(), + static_cast(l_idxs.get_num_elems()), + source->get_const_col_idxs() + src_row_ptrs[row], + l_idxs.get_data(), false); + for (size_type nnz = 0; + nnz < (src_row_ptrs[row + 1] - src_row_ptrs[row]); ++nnz) { + auto l_idx = l_idxs.get_const_data()[nnz]; + if (l_idx != invalid_index()) { + res_col_idxs[res_nnz] = l_idx; + res_values[res_nnz] = src_values[nnz + src_row_ptrs[row]]; res_nnz++; } } - // res_nnz = res_row_ptrs[row_index_set.get_local_index(row)]; } } } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index e5cca85af04..4a0ba4e82fd 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -625,62 +625,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); -// TODO: FIXME -namespace index_set { - - -template -Array map_global_to_local( - std::shared_ptr exec, - const IndexSet& index_set, - const Array& global_indices, const bool is_sorted) -{ - auto local_indices = - gko::Array(exec, global_indices.get_num_elems()); - - GKO_ASSERT(index_set.get_num_subsets() >= 1); - gko::kernels::reference::index_set::global_to_local( - exec, index_set.get_size(), index_set.get_num_subsets(), - index_set.get_subsets_begin(), index_set.get_subsets_end(), - index_set.get_superset_indices(), - static_cast(local_indices.get_num_elems()), - global_indices.get_const_data(), local_indices.get_data(), is_sorted); - return local_indices; -} - - -template -IndexType get_local_index(std::shared_ptr exec, - const IndexSet& index_set, - const IndexType index) -{ - const auto global_idx = - Array(exec, std::initializer_list{index}); - auto local_idx = Array( - exec, - index_set::map_global_to_local(exec, index_set, global_idx, true)); - - return exec->copy_val_to_host(local_idx.get_data()); -} - - -template -bool contains(std::shared_ptr exec, - const IndexSet& index_set, const IndexType input_index) -{ - if (input_index >= index_set.get_size()) { - return false; - } else { - auto local_index = - index_set::get_local_index(exec, index_set, input_index); - return local_index != invalid_index(); - } -} - - -} // namespace index_set - - template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, @@ -692,14 +636,26 @@ void calculate_nonzeros_per_row_in_index_set( auto num_row_subsets = row_index_set.get_num_subsets(); auto row_subset_begin = row_index_set.get_subsets_begin(); auto row_subset_end = row_index_set.get_subsets_end(); + auto src_ptrs = source->get_const_row_ptrs(); for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { row_nnz->get_data()[res_row] = zero(); - for (size_type nnz = source->get_const_row_ptrs()[row]; - nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { - if (index_set::contains(exec, col_index_set, - source->get_const_col_idxs()[nnz])) { + Array l_idxs( + exec, + static_cast(src_ptrs[row + 1] - src_ptrs[row])); + gko::kernels::reference::index_set::global_to_local( + exec, col_index_set.get_size(), col_index_set.get_num_subsets(), + col_index_set.get_subsets_begin(), + col_index_set.get_subsets_end(), + col_index_set.get_superset_indices(), + static_cast(l_idxs.get_num_elems()), + source->get_const_col_idxs() + src_ptrs[row], l_idxs.get_data(), + false); + for (size_type nnz = 0; nnz < (src_ptrs[row + 1] - src_ptrs[row]); + ++nnz) { + auto l_idx = l_idxs.get_const_data()[nnz]; + if (l_idx != invalid_index()) { row_nnz->get_data()[res_row]++; } } @@ -770,19 +726,26 @@ void compute_submatrix_from_index_set( for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - auto local_map = std::vector( - src_row_ptrs[row + 1] - src_row_ptrs[row], 0); - for (size_type nnz = src_row_ptrs[row]; nnz < src_row_ptrs[row + 1]; - ++nnz) { - if (index_set::contains(exec, col_index_set, - src_col_idxs[nnz])) { - res_col_idxs[res_nnz] = index_set::get_local_index( - exec, col_index_set, src_col_idxs[nnz]); - res_values[res_nnz] = src_values[nnz]; + Array l_idxs( + exec, static_cast(src_row_ptrs[row + 1] - + src_row_ptrs[row])); + gko::kernels::reference::index_set::global_to_local( + exec, col_index_set.get_size(), col_index_set.get_num_subsets(), + col_index_set.get_subsets_begin(), + col_index_set.get_subsets_end(), + col_index_set.get_superset_indices(), + static_cast(l_idxs.get_num_elems()), + source->get_const_col_idxs() + src_row_ptrs[row], + l_idxs.get_data(), false); + for (size_type nnz = 0; + nnz < (src_row_ptrs[row + 1] - src_row_ptrs[row]); ++nnz) { + auto l_idx = l_idxs.get_const_data()[nnz]; + if (l_idx != invalid_index()) { + res_col_idxs[res_nnz] = l_idx; + res_values[res_nnz] = src_values[nnz + src_row_ptrs[row]]; res_nnz++; } } - // res_nnz = res_row_ptrs[row_index_set.get_local_index(row)]; } } } From 12d694f746224cdc69a185ca864aacabd92ffeb1 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Tue, 15 Feb 2022 20:04:38 +0000 Subject: [PATCH 07/20] Format files Co-authored-by: Pratik Nayak --- cuda/base/index_set_kernels.cpp | 6 +++--- dpcpp/base/index_set_kernels.dp.cpp | 6 +++--- hip/base/index_set_kernels.hip.cpp | 6 +++--- omp/base/index_set_kernels.cpp | 4 +++- reference/base/index_set_kernels.cpp | 4 +++- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/cuda/base/index_set_kernels.cpp b/cuda/base/index_set_kernels.cpp index 32e0e433586..c8b247e07c1 100644 --- a/cuda/base/index_set_kernels.cpp +++ b/cuda/base/index_set_kernels.cpp @@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include "core/base/index_set_kernels.hpp" + + #include @@ -38,9 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/base/index_set_kernels.hpp" - - namespace gko { namespace kernels { /** diff --git a/dpcpp/base/index_set_kernels.dp.cpp b/dpcpp/base/index_set_kernels.dp.cpp index 3b70ac349ed..76994709bfc 100644 --- a/dpcpp/base/index_set_kernels.dp.cpp +++ b/dpcpp/base/index_set_kernels.dp.cpp @@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include "core/base/index_set_kernels.hpp" + + #include @@ -38,9 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/base/index_set_kernels.hpp" - - namespace gko { namespace kernels { /** diff --git a/hip/base/index_set_kernels.hip.cpp b/hip/base/index_set_kernels.hip.cpp index b528c02b297..f45add6d6ab 100644 --- a/hip/base/index_set_kernels.hip.cpp +++ b/hip/base/index_set_kernels.hip.cpp @@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include "core/base/index_set_kernels.hpp" + + #include @@ -38,9 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/base/index_set_kernels.hpp" - - namespace gko { namespace kernels { /** diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index fd9b4739432..d9f196c6568 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include "core/base/index_set_kernels.hpp" + + #include #include #include @@ -42,7 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/allocator.hpp" -#include "core/base/index_set_kernels.hpp" namespace gko { diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index 13f091e681b..9e3048d0658 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include "core/base/index_set_kernels.hpp" + + #include #include #include @@ -44,7 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/allocator.hpp" -#include "core/base/index_set_kernels.hpp" namespace gko { From 8d5e2eb1d1abeb22df08843cd8764f8baa2cc260 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 16 Feb 2022 09:35:36 +0100 Subject: [PATCH 08/20] Parallelize omp by subsets --- omp/matrix/csr_kernels.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 0ab8abdfbfc..76644bb3857 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -751,15 +751,16 @@ void calculate_nonzeros_per_row_in_index_set( const IndexSet& row_index_set, const IndexSet& col_index_set, Array* row_nnz) { - size_type res_row = 0; auto num_row_subsets = row_index_set.get_num_subsets(); auto row_subset_begin = row_index_set.get_subsets_begin(); auto row_subset_end = row_index_set.get_subsets_end(); auto src_ptrs = source->get_const_row_ptrs(); +#pragma omp parallel for for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - row_nnz->get_data()[res_row] = zero(); + row_nnz->get_data()[row - row_subset_begin[set]] = + zero(); Array l_idxs( exec, static_cast(src_ptrs[row + 1] - src_ptrs[row])); @@ -775,10 +776,9 @@ void calculate_nonzeros_per_row_in_index_set( ++nnz) { auto l_idx = l_idxs.get_const_data()[nnz]; if (l_idx != invalid_index()) { - row_nnz->get_data()[res_row]++; + row_nnz->get_data()[row - row_subset_begin[set]]++; } } - res_row++; } } } @@ -840,10 +840,11 @@ void compute_submatrix_from_index_set( const auto src_col_idxs = source->get_const_col_idxs(); const auto src_values = source->get_const_values(); - size_type res_nnz = 0; +#pragma omp parallel for for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { + size_type res_nnz = res_row_ptrs[row - row_subset_begin[set]]; Array l_idxs( exec, static_cast(src_row_ptrs[row + 1] - src_row_ptrs[row])); From a09f2235d6380ee9c391d7fcb0b2b0ab1150b11d Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 16 Feb 2022 14:47:15 +0100 Subject: [PATCH 09/20] Review update. Co-authored-by: Yu-Hsiang Tsai Co-authored-by: Tobias Ribizel --- core/base/index_set.cpp | 20 +++++++++++--------- core/base/index_set_kernels.hpp | 21 ++++++++++----------- cuda/base/index_set_kernels.cpp | 2 -- dpcpp/base/index_set_kernels.dp.cpp | 2 -- hip/base/index_set_kernels.hip.cpp | 2 -- include/ginkgo/core/base/index_set.hpp | 5 ++--- omp/base/index_set_kernels.cpp | 9 +++++---- omp/test/base/index_set.cpp | 4 ++-- reference/base/index_set_kernels.cpp | 9 +++++---- reference/matrix/csr_kernels.cpp | 6 +++--- reference/test/base/index_set.cpp | 2 +- 11 files changed, 39 insertions(+), 43 deletions(-) diff --git a/core/base/index_set.cpp b/core/base/index_set.cpp index dbc31c77cfe..8e3bc8f7223 100644 --- a/core/base/index_set.cpp +++ b/core/base/index_set.cpp @@ -108,10 +108,13 @@ IndexType IndexSet::get_local_index(const IndexType index) const template IndexType IndexSet::get_subset_id(const IndexType index) const { - auto exec = this->get_executor(); - auto ss_end_host = Array(exec->get_master(), this->subsets_end_); + auto ss_end_host = make_temporary_clone>( + this->get_executor()->get_master(), &this->subsets_end_); + auto ss_begin_host = make_temporary_clone>( + this->get_executor()->get_master(), &this->subsets_begin_); for (size_type id = 0; id < this->get_num_subsets(); ++id) { - if (index <= ss_end_host.get_const_data()[id]) { + if (index < ss_end_host->get_const_data()[id] && + index >= ss_begin_host->get_const_data()[id]) { return id; } } @@ -128,9 +131,9 @@ Array IndexSet::to_global_indices() const this->superset_cumulative_indices_.get_num_elems() - 1); auto decomp_indices = gko::Array(exec, num_elems); exec->run(index_set::make_to_global_indices( - this->index_space_size_, this->get_num_subsets(), - this->get_subsets_begin(), this->get_subsets_end(), - this->get_superset_indices(), decomp_indices.get_data())); + this->get_num_subsets(), this->get_subsets_begin(), + this->get_subsets_end(), this->get_superset_indices(), + decomp_indices.get_data())); return decomp_indices; } @@ -146,9 +149,8 @@ Array IndexSet::map_local_to_global( GKO_ASSERT(this->get_num_subsets() >= 1); exec->run(index_set::make_local_to_global( - this->index_space_size_, this->get_num_subsets(), - this->get_subsets_begin(), this->get_subsets_end(), - this->get_superset_indices(), + this->get_num_subsets(), this->get_subsets_begin(), + this->get_subsets_end(), this->get_superset_indices(), static_cast(local_indices.get_num_elems()), local_indices.get_const_data(), global_indices.get_data(), is_sorted)); return global_indices; diff --git a/core/base/index_set_kernels.hpp b/core/base/index_set_kernels.hpp index 4ae214c6e4b..a9df7508f05 100644 --- a/core/base/index_set_kernels.hpp +++ b/core/base/index_set_kernels.hpp @@ -52,12 +52,12 @@ namespace kernels { const Array* local_indices, \ Array* validity_array) -#define GKO_DECLARE_INDEX_SET_TO_GLOBAL_INDICES_KERNEL(IndexType) \ - void to_global_indices( \ - std::shared_ptr exec, \ - const IndexType index_space_size, const IndexType num_subsets, \ - const IndexType* subset_begin, const IndexType* subset_end, \ - const IndexType* superset_indices, IndexType* decomp_indices) +#define GKO_DECLARE_INDEX_SET_TO_GLOBAL_INDICES_KERNEL(IndexType) \ + void to_global_indices( \ + std::shared_ptr exec, \ + const IndexType num_subsets, const IndexType* subset_begin, \ + const IndexType* subset_end, const IndexType* superset_indices, \ + IndexType* decomp_indices) #define GKO_DECLARE_INDEX_SET_POPULATE_KERNEL(IndexType) \ void populate_subsets( \ @@ -78,11 +78,10 @@ namespace kernels { #define GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL(IndexType) \ void local_to_global( \ std::shared_ptr exec, \ - const IndexType index_space_size, const IndexType num_subsets, \ - const IndexType* subset_begin, const IndexType* subset_end, \ - const IndexType* superset_indices, const IndexType num_indices, \ - const IndexType* local_indices, IndexType* global_indices, \ - const bool is_sorted) + const IndexType num_subsets, const IndexType* subset_begin, \ + const IndexType* subset_end, const IndexType* superset_indices, \ + const IndexType num_indices, const IndexType* local_indices, \ + IndexType* global_indices, const bool is_sorted) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/cuda/base/index_set_kernels.cpp b/cuda/base/index_set_kernels.cpp index c8b247e07c1..d04b5700aea 100644 --- a/cuda/base/index_set_kernels.cpp +++ b/cuda/base/index_set_kernels.cpp @@ -59,7 +59,6 @@ namespace index_set { template void to_global_indices(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, @@ -98,7 +97,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, const IndexType* superset_indices, diff --git a/dpcpp/base/index_set_kernels.dp.cpp b/dpcpp/base/index_set_kernels.dp.cpp index 76994709bfc..c8bf0f37b40 100644 --- a/dpcpp/base/index_set_kernels.dp.cpp +++ b/dpcpp/base/index_set_kernels.dp.cpp @@ -59,7 +59,6 @@ namespace index_set { template void to_global_indices(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, @@ -98,7 +97,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, const IndexType* superset_indices, diff --git a/hip/base/index_set_kernels.hip.cpp b/hip/base/index_set_kernels.hip.cpp index f45add6d6ab..b77b3d36f09 100644 --- a/hip/base/index_set_kernels.hip.cpp +++ b/hip/base/index_set_kernels.hip.cpp @@ -59,7 +59,6 @@ namespace index_set { template void to_global_indices(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, @@ -98,7 +97,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, const IndexType* superset_indices, diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index cc38df5f5f2..c371ac30fbc 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -104,9 +104,8 @@ class IndexSet : public EnablePolymorphicObject> { * Creates an index set on the specified executor from the initializer list. * * @param exec the Executor where the index set data will be allocated - * @param size the maximum index the index set it allowed to hold. This - * is the size of the index space. - * @param indices the indices that the index set should hold. + * @param init_list the indices that the index set should hold in an + * initializer_list. * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index d9f196c6568..d8383662d70 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -65,7 +65,6 @@ namespace index_set { template void to_global_indices(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, @@ -158,7 +157,7 @@ void global_to_local(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < num_indices; ++i) { auto index = global_indices[i]; - if (index > index_space_size) { + if (index >= index_space_size) { local_indices[i] = invalid_index(); continue; } @@ -181,7 +180,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, const IndexType* superset_indices, @@ -192,7 +190,10 @@ void local_to_global(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < num_indices; ++i) { auto index = local_indices[i]; - GKO_ASSERT(index <= (superset_indices[num_subsets])); + if (index >= superset_indices[num_subsets]) { + global_indices[i] = invalid_index(); + continue; + } const auto bucket = std::distance( superset_indices, std::upper_bound(superset_indices, diff --git a/omp/test/base/index_set.cpp b/omp/test/base/index_set.cpp index f18a4f60487..452970c9ba0 100644 --- a/omp/test/base/index_set.cpp +++ b/omp/test/base/index_set.cpp @@ -235,7 +235,7 @@ TYPED_TEST(IndexSet, GetLocalIndicesIsEquivalentToReference) auto ref_global_arr = gko::Array{this->ref, rand_local_arr.get_num_elems()}; gko::kernels::reference::index_set::local_to_global( - this->ref, TypeParam(520), ref_idx_set.get_num_subsets(), + this->ref, ref_idx_set.get_num_subsets(), ref_idx_set.get_subsets_begin(), ref_idx_set.get_subsets_end(), ref_idx_set.get_superset_indices(), static_cast(rand_local_arr.get_num_elems()), @@ -243,7 +243,7 @@ TYPED_TEST(IndexSet, GetLocalIndicesIsEquivalentToReference) auto omp_global_arr = gko::Array{this->omp, rand_local_arr.get_num_elems()}; gko::kernels::omp::index_set::local_to_global( - this->omp, TypeParam(520), omp_idx_set.get_num_subsets(), + this->omp, omp_idx_set.get_num_subsets(), omp_idx_set.get_subsets_begin(), omp_idx_set.get_subsets_end(), omp_idx_set.get_superset_indices(), static_cast(rand_local_arr.get_num_elems()), diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index 9e3048d0658..6476b5619b8 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -83,7 +83,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void to_global_indices(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, @@ -182,7 +181,7 @@ void global_to_local(std::shared_ptr exec, shifted_bucket = 0; } auto index = global_indices[i]; - if (index > index_space_size) { + if (index >= index_space_size) { local_indices[i] = invalid_index(); continue; } @@ -206,7 +205,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, - const IndexType index_space_size, const IndexType num_subsets, const IndexType* subset_begin, const IndexType* subset_end, const IndexType* superset_indices, @@ -223,7 +221,10 @@ void local_to_global(std::shared_ptr exec, shifted_bucket = 0; } auto index = local_indices[i]; - GKO_ASSERT(index <= (superset_indices[num_subsets])); + if (index >= superset_indices[num_subsets]) { + global_indices[i] = invalid_index(); + continue; + } const auto shifted_superset = &superset_indices[shifted_bucket]; auto bucket = std::distance( superset_indices, diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 4a0ba4e82fd..0d1e978ce7d 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -610,7 +610,7 @@ void calculate_nonzeros_per_row_in_span( size_type res_row = 0; for (size_type row = row_span.begin; row < row_span.end; ++row) { row_nnz->get_data()[res_row] = zero(); - for (size_type nnz = source->get_const_row_ptrs()[row]; + for (IndexType nnz = source->get_const_row_ptrs()[row]; nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { if (source->get_const_col_idxs()[nnz] < col_span.end && source->get_const_col_idxs()[nnz] >= col_span.begin) { @@ -652,7 +652,7 @@ void calculate_nonzeros_per_row_in_index_set( static_cast(l_idxs.get_num_elems()), source->get_const_col_idxs() + src_ptrs[row], l_idxs.get_data(), false); - for (size_type nnz = 0; nnz < (src_ptrs[row + 1] - src_ptrs[row]); + for (IndexType nnz = 0; nnz < (src_ptrs[row + 1] - src_ptrs[row]); ++nnz) { auto l_idx = l_idxs.get_const_data()[nnz]; if (l_idx != invalid_index()) { @@ -737,7 +737,7 @@ void compute_submatrix_from_index_set( static_cast(l_idxs.get_num_elems()), source->get_const_col_idxs() + src_row_ptrs[row], l_idxs.get_data(), false); - for (size_type nnz = 0; + for (IndexType nnz = 0; nnz < (src_row_ptrs[row + 1] - src_row_ptrs[row]); ++nnz) { auto l_idx = l_idxs.get_const_data()[nnz]; if (l_idx != invalid_index()) { diff --git a/reference/test/base/index_set.cpp b/reference/test/base/index_set.cpp index b82313aa0f3..1c954c8ce1d 100644 --- a/reference/test/base/index_set.cpp +++ b/reference/test/base/index_set.cpp @@ -189,7 +189,7 @@ TYPED_TEST(IndexSet, CanBeConstructedFromIndices) } -TYPED_TEST(IndexSet, CanBeTo_Global_IndicesedIntoIndices) +TYPED_TEST(IndexSet, CanBeConvertedToGlobalIndices) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; From 2efaf513bfb626350af74b946257af19307bd964 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 16 Feb 2022 15:46:01 +0100 Subject: [PATCH 10/20] Minimize allocs in reference kernel. --- reference/matrix/csr_kernels.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 0d1e978ce7d..60c950f998f 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -637,13 +637,16 @@ void calculate_nonzeros_per_row_in_index_set( auto row_subset_begin = row_index_set.get_subsets_begin(); auto row_subset_end = row_index_set.get_subsets_end(); auto src_ptrs = source->get_const_row_ptrs(); + size_type max_row_nnz = 0; + for (size_type i = 1; i < source->get_size()[0] + 1; i++) { + max_row_nnz = + std::max(max_row_nnz, src_ptrs[i] - src_ptrs[i - 1]); + } + Array l_idxs(exec, max_row_nnz); for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { row_nnz->get_data()[res_row] = zero(); - Array l_idxs( - exec, - static_cast(src_ptrs[row + 1] - src_ptrs[row])); gko::kernels::reference::index_set::global_to_local( exec, col_index_set.get_size(), col_index_set.get_num_subsets(), col_index_set.get_subsets_begin(), From d69f7eadcba00817aeb2644ace241a96bd545879 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 16 Feb 2022 18:12:14 +0100 Subject: [PATCH 11/20] Add scoped_trace and fix omp kernel. --- omp/matrix/csr_kernels.cpp | 8 ++++---- reference/test/matrix/csr_kernels.cpp | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 76644bb3857..fff5ac1db09 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -755,12 +755,11 @@ void calculate_nonzeros_per_row_in_index_set( auto row_subset_begin = row_index_set.get_subsets_begin(); auto row_subset_end = row_index_set.get_subsets_end(); auto src_ptrs = source->get_const_row_ptrs(); -#pragma omp parallel for + size_type res_row = 0; for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - row_nnz->get_data()[row - row_subset_begin[set]] = - zero(); + row_nnz->get_data()[res_row] = zero(); Array l_idxs( exec, static_cast(src_ptrs[row + 1] - src_ptrs[row])); @@ -776,9 +775,10 @@ void calculate_nonzeros_per_row_in_index_set( ++nnz) { auto l_idx = l_idxs.get_const_data()[nnz]; if (l_idx != invalid_index()) { - row_nnz->get_data()[row - row_subset_begin[set]]++; + row_nnz->get_data()[res_row]++; } } + res_row++; } } } diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 08b1d87e235..da967aac96e 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1699,6 +1699,7 @@ TYPED_TEST(Csr, CanGetSubmatrix2) this->exec); ASSERT_EQ(mat->get_num_stored_elements(), 23); { + SCOPED_TRACE("Left top corner: Square 2x2"); auto sub_mat1 = mat->create_submatrix(gko::span(0, 2), gko::span(0, 2)); auto ref1 = gko::initialize({I{1.0, 3.0}, I{1.0, 0.0}}, this->exec); @@ -1706,6 +1707,7 @@ TYPED_TEST(Csr, CanGetSubmatrix2) GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); } { + SCOPED_TRACE("Left boundary: Square 2x2"); auto sub_mat2 = mat->create_submatrix(gko::span(2, 4), gko::span(0, 2)); auto ref2 = gko::initialize({I{0.0, 3.0}, I{0.0, -1.0}}, this->exec); @@ -1713,6 +1715,7 @@ TYPED_TEST(Csr, CanGetSubmatrix2) GKO_EXPECT_MTX_NEAR(sub_mat2.get(), ref2.get(), 0.0); } { + SCOPED_TRACE("Right boundary: Square 2x2"); auto sub_mat3 = mat->create_submatrix(gko::span(0, 2), gko::span(3, 5)); auto ref3 = gko::initialize({I{0.0, 2.0}, I{7.5, 3.0}}, this->exec); @@ -1720,6 +1723,7 @@ TYPED_TEST(Csr, CanGetSubmatrix2) GKO_EXPECT_MTX_NEAR(sub_mat3.get(), ref3.get(), 0.0); } { + SCOPED_TRACE("Non-square 5x2"); auto sub_mat4 = mat->create_submatrix(gko::span(1, 6), gko::span(2, 4)); /* 4.5, 7.5 @@ -1779,6 +1783,7 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) this->exec); ASSERT_EQ(mat->get_num_stored_elements(), 23); { + SCOPED_TRACE("Small square 2x2"); auto row_set = gko::IndexSet(this->exec, {0, 1}); auto col_set = gko::IndexSet(this->exec, {0, 1}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); @@ -1789,6 +1794,7 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) } { + SCOPED_TRACE("Non-square 4x2"); auto row_set = gko::IndexSet(this->exec, {1, 2, 3, 4}); auto col_set = gko::IndexSet(this->exec, {1, 3}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); @@ -1800,6 +1806,7 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) } { + SCOPED_TRACE("Square 3x3"); auto row_set = gko::IndexSet(this->exec, {1, 3, 4}); auto col_set = gko::IndexSet(this->exec, {1, 3, 0}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); @@ -1811,6 +1818,7 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) } { + SCOPED_TRACE("Square 4x4"); auto row_set = gko::IndexSet(this->exec, {1, 4, 5, 6}); auto col_set = gko::IndexSet(this->exec, {4, 3, 0, 1}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); From b0354a6b822bea9650e2f24b18ff5e8c920901fb Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 4 Mar 2022 08:07:56 +0100 Subject: [PATCH 12/20] Remove allocs inside loops --- omp/matrix/csr_kernels.cpp | 18 ++++++++++++------ reference/matrix/csr_kernels.cpp | 9 ++++++--- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index fff5ac1db09..601422fab0a 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -756,13 +756,16 @@ void calculate_nonzeros_per_row_in_index_set( auto row_subset_end = row_index_set.get_subsets_end(); auto src_ptrs = source->get_const_row_ptrs(); size_type res_row = 0; + size_type max_row_nnz = 0; + for (size_type i = 1; i < source->get_size()[0] + 1; i++) { + max_row_nnz = + std::max(max_row_nnz, src_ptrs[i] - src_ptrs[i - 1]); + } + Array l_idxs(exec, max_row_nnz); for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { row_nnz->get_data()[res_row] = zero(); - Array l_idxs( - exec, - static_cast(src_ptrs[row + 1] - src_ptrs[row])); gko::kernels::omp::index_set::global_to_local( exec, col_index_set.get_size(), col_index_set.get_num_subsets(), col_index_set.get_subsets_begin(), @@ -839,15 +842,18 @@ void compute_submatrix_from_index_set( const auto src_row_ptrs = source->get_const_row_ptrs(); const auto src_col_idxs = source->get_const_col_idxs(); const auto src_values = source->get_const_values(); + size_type max_row_nnz = 0; + for (size_type i = 1; i < source->get_size()[0] + 1; i++) { + max_row_nnz = std::max( + max_row_nnz, src_row_ptrs[i] - src_row_ptrs[i - 1]); + } + Array l_idxs(exec, max_row_nnz); #pragma omp parallel for for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { size_type res_nnz = res_row_ptrs[row - row_subset_begin[set]]; - Array l_idxs( - exec, static_cast(src_row_ptrs[row + 1] - - src_row_ptrs[row])); gko::kernels::omp::index_set::global_to_local( exec, col_index_set.get_size(), col_index_set.get_num_subsets(), col_index_set.get_subsets_begin(), diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 60c950f998f..056f126550b 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -726,12 +726,15 @@ void compute_submatrix_from_index_set( const auto src_values = source->get_const_values(); size_type res_nnz = 0; + size_type max_row_nnz = 0; + for (size_type i = 1; i < source->get_size()[0] + 1; i++) { + max_row_nnz = std::max( + max_row_nnz, src_row_ptrs[i] - src_row_ptrs[i - 1]); + } + Array l_idxs(exec, max_row_nnz); for (size_type set = 0; set < num_row_subsets; ++set) { for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - Array l_idxs( - exec, static_cast(src_row_ptrs[row + 1] - - src_row_ptrs[row])); gko::kernels::reference::index_set::global_to_local( exec, col_index_set.get_size(), col_index_set.get_num_subsets(), col_index_set.get_subsets_begin(), From 2a3c062c4fd545e72a3d9a3606fb126233c6a01e Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 4 Mar 2022 08:08:33 +0100 Subject: [PATCH 13/20] Review update. Co-authored-by: Yuhsiang Tsai --- core/base/index_set.cpp | 2 +- core/base/index_set_kernels.hpp | 6 +++--- cuda/base/index_set_kernels.cpp | 1 - dpcpp/base/index_set_kernels.dp.cpp | 1 - hip/base/index_set_kernels.hip.cpp | 1 - include/ginkgo/core/base/index_set.hpp | 8 ++++---- omp/base/index_set_kernels.cpp | 1 - omp/matrix/csr_kernels.cpp | 8 ++++---- omp/test/base/index_set.cpp | 6 ++---- omp/test/matrix/csr_kernels.cpp | 1 + reference/base/index_set_kernels.cpp | 1 - reference/matrix/csr_kernels.cpp | 10 +++++----- 12 files changed, 20 insertions(+), 26 deletions(-) diff --git a/core/base/index_set.cpp b/core/base/index_set.cpp index 8e3bc8f7223..0c9d7416e57 100644 --- a/core/base/index_set.cpp +++ b/core/base/index_set.cpp @@ -150,7 +150,7 @@ Array IndexSet::map_local_to_global( GKO_ASSERT(this->get_num_subsets() >= 1); exec->run(index_set::make_local_to_global( this->get_num_subsets(), this->get_subsets_begin(), - this->get_subsets_end(), this->get_superset_indices(), + this->get_superset_indices(), static_cast(local_indices.get_num_elems()), local_indices.get_const_data(), global_indices.get_data(), is_sorted)); return global_indices; diff --git a/core/base/index_set_kernels.hpp b/core/base/index_set_kernels.hpp index a9df7508f05..c2347386af7 100644 --- a/core/base/index_set_kernels.hpp +++ b/core/base/index_set_kernels.hpp @@ -79,9 +79,9 @@ namespace kernels { void local_to_global( \ std::shared_ptr exec, \ const IndexType num_subsets, const IndexType* subset_begin, \ - const IndexType* subset_end, const IndexType* superset_indices, \ - const IndexType num_indices, const IndexType* local_indices, \ - IndexType* global_indices, const bool is_sorted) + const IndexType* superset_indices, const IndexType num_indices, \ + const IndexType* local_indices, IndexType* global_indices, \ + const bool is_sorted) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/cuda/base/index_set_kernels.cpp b/cuda/base/index_set_kernels.cpp index d04b5700aea..5efd374a3cf 100644 --- a/cuda/base/index_set_kernels.cpp +++ b/cuda/base/index_set_kernels.cpp @@ -98,7 +98,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType num_subsets, const IndexType* subset_begin, - const IndexType* subset_end, const IndexType* superset_indices, const IndexType num_indices, const IndexType* local_indices, IndexType* global_indices, diff --git a/dpcpp/base/index_set_kernels.dp.cpp b/dpcpp/base/index_set_kernels.dp.cpp index c8bf0f37b40..a21f329bad7 100644 --- a/dpcpp/base/index_set_kernels.dp.cpp +++ b/dpcpp/base/index_set_kernels.dp.cpp @@ -98,7 +98,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType num_subsets, const IndexType* subset_begin, - const IndexType* subset_end, const IndexType* superset_indices, const IndexType num_indices, const IndexType* local_indices, IndexType* global_indices, diff --git a/hip/base/index_set_kernels.hip.cpp b/hip/base/index_set_kernels.hip.cpp index b77b3d36f09..777ece35d63 100644 --- a/hip/base/index_set_kernels.hip.cpp +++ b/hip/base/index_set_kernels.hip.cpp @@ -98,7 +98,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType num_subsets, const IndexType* subset_begin, - const IndexType* subset_end, const IndexType* superset_indices, const IndexType num_indices, const IndexType* local_indices, IndexType* global_indices, diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index c371ac30fbc..98d19fda78c 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -190,7 +190,7 @@ class IndexSet : public EnablePolymorphicObject> { * @param local_index the local index. * @return the global index from the index set. * - * @warning This single entry query can have significant kernel lauch + * @warning This single entry query can have significant kernel launch * overheads and should be avoided if possible. */ index_type get_global_index(index_type local_index) const; @@ -212,7 +212,7 @@ class IndexSet : public EnablePolymorphicObject> { * * @return the local index of the element in the index set. * - * @warning This single entry query can have significant kernel lauch + * @warning This single entry query can have significant kernel launch * overheads and should be avoided if possible. */ index_type get_local_index(index_type global_index) const; @@ -234,7 +234,7 @@ class IndexSet : public EnablePolymorphicObject> { * * @return the local index of the element in the index set. * - * @warning This single entry query can have significant kernel lauch + * @warning This single entry query can have significant kernel launch * overheads and should be avoided if possible. */ index_type get_subset_id(index_type global_index) const; @@ -301,7 +301,7 @@ class IndexSet : public EnablePolymorphicObject> { * * @return whether the element exists in the index set. * - * @warning This single entry query can have significant kernel lauch + * @warning This single entry query can have significant kernel launch * overheads and should be avoided if possible. */ bool contains(const index_type global_index) const; diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index d8383662d70..444939e8540 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -181,7 +181,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType num_subsets, const IndexType* subset_begin, - const IndexType* subset_end, const IndexType* superset_indices, const IndexType num_indices, const IndexType* local_indices, IndexType* global_indices, diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 601422fab0a..7dda83eb75b 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -731,7 +731,7 @@ void calculate_nonzeros_per_row_in_span( #pragma omp parallel for for (size_type row = row_span.begin; row < row_span.end; ++row) { row_nnz->get_data()[row - row_span.begin] = zero(); - for (size_type nnz = row_ptrs[row]; nnz < row_ptrs[row + 1]; ++nnz) { + for (auto nnz = row_ptrs[row]; nnz < row_ptrs[row + 1]; ++nnz) { if (col_idxs[nnz] >= col_span.begin && col_idxs[nnz] < col_span.end) { row_nnz->get_data()[row - row_span.begin]++; @@ -763,7 +763,7 @@ void calculate_nonzeros_per_row_in_index_set( } Array l_idxs(exec, max_row_nnz); for (size_type set = 0; set < num_row_subsets; ++set) { - for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; + for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { row_nnz->get_data()[res_row] = zero(); gko::kernels::omp::index_set::global_to_local( @@ -807,7 +807,7 @@ void compute_submatrix(std::shared_ptr exec, #pragma omp parallel for for (size_type row = 0; row < num_rows; ++row) { size_type res_nnz = res_row_ptrs[row]; - for (size_type nnz = row_ptrs[row_offset + row]; + for (auto nnz = row_ptrs[row_offset + row]; nnz < row_ptrs[row_offset + row + 1]; ++nnz) { const auto local_col = col_idxs[nnz] - col_offset; if (local_col >= 0 && local_col < num_cols) { @@ -851,7 +851,7 @@ void compute_submatrix_from_index_set( #pragma omp parallel for for (size_type set = 0; set < num_row_subsets; ++set) { - for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; + for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { size_type res_nnz = res_row_ptrs[row - row_subset_begin[set]]; gko::kernels::omp::index_set::global_to_local( diff --git a/omp/test/base/index_set.cpp b/omp/test/base/index_set.cpp index 452970c9ba0..1e0c99e9ef6 100644 --- a/omp/test/base/index_set.cpp +++ b/omp/test/base/index_set.cpp @@ -236,16 +236,14 @@ TYPED_TEST(IndexSet, GetLocalIndicesIsEquivalentToReference) gko::Array{this->ref, rand_local_arr.get_num_elems()}; gko::kernels::reference::index_set::local_to_global( this->ref, ref_idx_set.get_num_subsets(), - ref_idx_set.get_subsets_begin(), ref_idx_set.get_subsets_end(), - ref_idx_set.get_superset_indices(), + ref_idx_set.get_subsets_begin(), ref_idx_set.get_superset_indices(), static_cast(rand_local_arr.get_num_elems()), rand_local_arr.get_const_data(), ref_global_arr.get_data(), false); auto omp_global_arr = gko::Array{this->omp, rand_local_arr.get_num_elems()}; gko::kernels::omp::index_set::local_to_global( this->omp, omp_idx_set.get_num_subsets(), - omp_idx_set.get_subsets_begin(), omp_idx_set.get_subsets_end(), - omp_idx_set.get_superset_indices(), + omp_idx_set.get_subsets_begin(), omp_idx_set.get_superset_indices(), static_cast(rand_local_arr.get_num_elems()), rand_local_arr.get_const_data(), omp_global_arr.get_data(), false); diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index bb977241209..0fdff31ed6a 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -776,6 +776,7 @@ TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef) this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); auto num_nnz = row_nnz.get_data()[rset.get_num_elems()]; auto drow_nnz = gko::Array(this->omp, row_nnz); + drow_nnz.fill(gko::one()); auto smat1 = Mtx::create( this->ref, gko::dim<2>(rset.get_num_elems(), cset.get_num_elems()), std::move(gko::Array(this->ref, num_nnz)), diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index 6476b5619b8..37191b6e320 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -206,7 +206,6 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( template void local_to_global(std::shared_ptr exec, const IndexType num_subsets, const IndexType* subset_begin, - const IndexType* subset_end, const IndexType* superset_indices, const IndexType num_indices, const IndexType* local_indices, IndexType* global_indices, diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 056f126550b..4334f9bd623 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -608,9 +608,9 @@ void calculate_nonzeros_per_row_in_span( const span& col_span, Array* row_nnz) { size_type res_row = 0; - for (size_type row = row_span.begin; row < row_span.end; ++row) { + for (auto row = row_span.begin; row < row_span.end; ++row) { row_nnz->get_data()[res_row] = zero(); - for (IndexType nnz = source->get_const_row_ptrs()[row]; + for (auto nnz = source->get_const_row_ptrs()[row]; nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { if (source->get_const_col_idxs()[nnz] < col_span.end && source->get_const_col_idxs()[nnz] >= col_span.begin) { @@ -644,7 +644,7 @@ void calculate_nonzeros_per_row_in_index_set( } Array l_idxs(exec, max_row_nnz); for (size_type set = 0; set < num_row_subsets; ++set) { - for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; + for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { row_nnz->get_data()[res_row] = zero(); gko::kernels::reference::index_set::global_to_local( @@ -733,7 +733,7 @@ void compute_submatrix_from_index_set( } Array l_idxs(exec, max_row_nnz); for (size_type set = 0; set < num_row_subsets; ++set) { - for (size_type row = row_subset_begin[set]; row < row_subset_end[set]; + for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { gko::kernels::reference::index_set::global_to_local( exec, col_index_set.get_size(), col_index_set.get_num_subsets(), @@ -743,7 +743,7 @@ void compute_submatrix_from_index_set( static_cast(l_idxs.get_num_elems()), source->get_const_col_idxs() + src_row_ptrs[row], l_idxs.get_data(), false); - for (IndexType nnz = 0; + for (auto nnz = 0; nnz < (src_row_ptrs[row + 1] - src_row_ptrs[row]); ++nnz) { auto l_idx = l_idxs.get_const_data()[nnz]; if (l_idx != invalid_index()) { From 841c82761edc70ddc54e8436e02e2a97c016dce6 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 7 Mar 2022 23:07:29 +0100 Subject: [PATCH 14/20] Some kernel perf updates. Co-authored-by: Yuhsiang Tsai --- omp/base/index_set_kernels.cpp | 3 +- omp/matrix/csr_kernels.cpp | 89 ++++++++++++++------------- omp/test/matrix/csr_kernels.cpp | 2 - reference/base/index_set_kernels.cpp | 3 +- reference/matrix/csr_kernels.cpp | 84 +++++++++++++------------ reference/test/matrix/csr_kernels.cpp | 12 ++++ 6 files changed, 107 insertions(+), 86 deletions(-) diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index 444939e8540..de81f30a991 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -165,7 +165,8 @@ void global_to_local(std::shared_ptr exec, subset_begin, std::upper_bound(subset_begin, subset_begin + num_subsets, index)); auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (subset_end[shifted_bucket] <= index) { + if (subset_end[shifted_bucket] <= index || + index < subset_begin[shifted_bucket]) { local_indices[i] = invalid_index(); } else { local_indices[i] = index - subset_begin[shifted_bucket] + diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 7dda83eb75b..43b7dbaadd2 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -752,32 +752,35 @@ void calculate_nonzeros_per_row_in_index_set( const IndexSet& col_index_set, Array* row_nnz) { auto num_row_subsets = row_index_set.get_num_subsets(); + auto num_col_subsets = col_index_set.get_num_subsets(); + auto row_superset_indices = row_index_set.get_superset_indices(); auto row_subset_begin = row_index_set.get_subsets_begin(); auto row_subset_end = row_index_set.get_subsets_end(); + auto col_subset_begin = col_index_set.get_subsets_begin(); + auto col_subset_end = col_index_set.get_subsets_end(); auto src_ptrs = source->get_const_row_ptrs(); - size_type res_row = 0; - size_type max_row_nnz = 0; - for (size_type i = 1; i < source->get_size()[0] + 1; i++) { - max_row_nnz = - std::max(max_row_nnz, src_ptrs[i] - src_ptrs[i - 1]); - } - Array l_idxs(exec, max_row_nnz); + +#pragma omp parallel for for (size_type set = 0; set < num_row_subsets; ++set) { + size_type res_row = row_superset_indices[set]; for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { row_nnz->get_data()[res_row] = zero(); - gko::kernels::omp::index_set::global_to_local( - exec, col_index_set.get_size(), col_index_set.get_num_subsets(), - col_index_set.get_subsets_begin(), - col_index_set.get_subsets_end(), - col_index_set.get_superset_indices(), - static_cast(l_idxs.get_num_elems()), - source->get_const_col_idxs() + src_ptrs[row], l_idxs.get_data(), - false); - for (size_type nnz = 0; nnz < (src_ptrs[row + 1] - src_ptrs[row]); - ++nnz) { - auto l_idx = l_idxs.get_const_data()[nnz]; - if (l_idx != invalid_index()) { + for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { + auto index = source->get_const_col_idxs()[i]; + if (index >= col_index_set.get_size()) { + continue; + } + const auto bucket = std::distance( + col_subset_begin, + std::upper_bound(col_subset_begin, + col_subset_begin + num_col_subsets, + index)); + auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); + if (col_subset_end[shifted_bucket] <= index || + (index < col_subset_begin[shifted_bucket])) { + continue; + } else { row_nnz->get_data()[res_row]++; } } @@ -839,35 +842,37 @@ void compute_submatrix_from_index_set( auto res_row_ptrs = result->get_row_ptrs(); auto res_col_idxs = result->get_col_idxs(); auto res_values = result->get_values(); - const auto src_row_ptrs = source->get_const_row_ptrs(); + auto num_col_subsets = col_index_set.get_num_subsets(); + auto col_subset_begin = col_index_set.get_subsets_begin(); + auto col_subset_end = col_index_set.get_subsets_end(); + auto col_superset_indices = col_index_set.get_superset_indices(); + const auto src_ptrs = source->get_const_row_ptrs(); const auto src_col_idxs = source->get_const_col_idxs(); const auto src_values = source->get_const_values(); - size_type max_row_nnz = 0; - for (size_type i = 1; i < source->get_size()[0] + 1; i++) { - max_row_nnz = std::max( - max_row_nnz, src_row_ptrs[i] - src_row_ptrs[i - 1]); - } - Array l_idxs(exec, max_row_nnz); -#pragma omp parallel for + size_type res_nnz = 0; for (size_type set = 0; set < num_row_subsets; ++set) { for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - size_type res_nnz = res_row_ptrs[row - row_subset_begin[set]]; - gko::kernels::omp::index_set::global_to_local( - exec, col_index_set.get_size(), col_index_set.get_num_subsets(), - col_index_set.get_subsets_begin(), - col_index_set.get_subsets_end(), - col_index_set.get_superset_indices(), - static_cast(l_idxs.get_num_elems()), - source->get_const_col_idxs() + src_row_ptrs[row], - l_idxs.get_data(), false); - for (size_type nnz = 0; - nnz < (src_row_ptrs[row + 1] - src_row_ptrs[row]); ++nnz) { - auto l_idx = l_idxs.get_const_data()[nnz]; - if (l_idx != invalid_index()) { - res_col_idxs[res_nnz] = l_idx; - res_values[res_nnz] = src_values[nnz + src_row_ptrs[row]]; + for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { + auto index = src_col_idxs[i]; + if (index >= col_index_set.get_size()) { + continue; + } + const auto bucket = std::distance( + col_subset_begin, + std::upper_bound(col_subset_begin, + col_subset_begin + num_col_subsets, + index)); + auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); + if (col_subset_end[shifted_bucket] <= index || + (index < col_subset_begin[shifted_bucket])) { + continue; + } else { + res_col_idxs[res_nnz] = + index - col_subset_begin[shifted_bucket] + + col_superset_indices[shifted_bucket]; + res_values[res_nnz] = src_values[i]; res_nnz++; } } diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 0fdff31ed6a..dc2b32edd68 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -776,7 +776,6 @@ TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef) this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); auto num_nnz = row_nnz.get_data()[rset.get_num_elems()]; auto drow_nnz = gko::Array(this->omp, row_nnz); - drow_nnz.fill(gko::one()); auto smat1 = Mtx::create( this->ref, gko::dim<2>(rset.get_num_elems(), cset.get_num_elems()), std::move(gko::Array(this->ref, num_nnz)), @@ -788,7 +787,6 @@ TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef) std::move(gko::Array(this->omp, num_nnz)), std::move(drow_nnz)); - gko::kernels::reference::csr::compute_submatrix_from_index_set( this->ref, this->mtx2.get(), rset, cset, smat1.get()); gko::kernels::omp::csr::compute_submatrix_from_index_set( diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index 37191b6e320..25602e55322 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -190,7 +190,8 @@ void global_to_local(std::shared_ptr exec, subset_begin, std::upper_bound(shifted_subset, subset_begin + num_subsets, index)); shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (subset_end[shifted_bucket] <= index) { + if (subset_end[shifted_bucket] <= index || + index < subset_begin[shifted_bucket]) { local_indices[i] = invalid_index(); } else { local_indices[i] = index - subset_begin[shifted_bucket] + diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 4334f9bd623..365847ef592 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -632,33 +632,34 @@ void calculate_nonzeros_per_row_in_index_set( const IndexSet& row_index_set, const IndexSet& col_index_set, Array* row_nnz) { - size_type res_row = 0; auto num_row_subsets = row_index_set.get_num_subsets(); auto row_subset_begin = row_index_set.get_subsets_begin(); auto row_subset_end = row_index_set.get_subsets_end(); + auto row_superset_indices = row_index_set.get_superset_indices(); + auto num_col_subsets = col_index_set.get_num_subsets(); + auto col_subset_begin = col_index_set.get_subsets_begin(); + auto col_subset_end = col_index_set.get_subsets_end(); auto src_ptrs = source->get_const_row_ptrs(); - size_type max_row_nnz = 0; - for (size_type i = 1; i < source->get_size()[0] + 1; i++) { - max_row_nnz = - std::max(max_row_nnz, src_ptrs[i] - src_ptrs[i - 1]); - } - Array l_idxs(exec, max_row_nnz); for (size_type set = 0; set < num_row_subsets; ++set) { + size_type res_row = row_superset_indices[set]; for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { row_nnz->get_data()[res_row] = zero(); - gko::kernels::reference::index_set::global_to_local( - exec, col_index_set.get_size(), col_index_set.get_num_subsets(), - col_index_set.get_subsets_begin(), - col_index_set.get_subsets_end(), - col_index_set.get_superset_indices(), - static_cast(l_idxs.get_num_elems()), - source->get_const_col_idxs() + src_ptrs[row], l_idxs.get_data(), - false); - for (IndexType nnz = 0; nnz < (src_ptrs[row + 1] - src_ptrs[row]); - ++nnz) { - auto l_idx = l_idxs.get_const_data()[nnz]; - if (l_idx != invalid_index()) { + for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { + auto index = source->get_const_col_idxs()[i]; + if (index >= col_index_set.get_size()) { + continue; + } + const auto bucket = std::distance( + col_subset_begin, + std::upper_bound(col_subset_begin, + col_subset_begin + num_col_subsets, + index)); + auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); + if (col_subset_end[shifted_bucket] <= index || + (index < col_subset_begin[shifted_bucket])) { + continue; + } else { row_nnz->get_data()[res_row]++; } } @@ -721,34 +722,37 @@ void compute_submatrix_from_index_set( auto res_row_ptrs = result->get_row_ptrs(); auto res_col_idxs = result->get_col_idxs(); auto res_values = result->get_values(); - const auto src_row_ptrs = source->get_const_row_ptrs(); + auto num_col_subsets = col_index_set.get_num_subsets(); + auto col_subset_begin = col_index_set.get_subsets_begin(); + auto col_subset_end = col_index_set.get_subsets_end(); + auto col_superset_indices = col_index_set.get_superset_indices(); + const auto src_ptrs = source->get_const_row_ptrs(); const auto src_col_idxs = source->get_const_col_idxs(); const auto src_values = source->get_const_values(); size_type res_nnz = 0; - size_type max_row_nnz = 0; - for (size_type i = 1; i < source->get_size()[0] + 1; i++) { - max_row_nnz = std::max( - max_row_nnz, src_row_ptrs[i] - src_row_ptrs[i - 1]); - } - Array l_idxs(exec, max_row_nnz); for (size_type set = 0; set < num_row_subsets; ++set) { for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - gko::kernels::reference::index_set::global_to_local( - exec, col_index_set.get_size(), col_index_set.get_num_subsets(), - col_index_set.get_subsets_begin(), - col_index_set.get_subsets_end(), - col_index_set.get_superset_indices(), - static_cast(l_idxs.get_num_elems()), - source->get_const_col_idxs() + src_row_ptrs[row], - l_idxs.get_data(), false); - for (auto nnz = 0; - nnz < (src_row_ptrs[row + 1] - src_row_ptrs[row]); ++nnz) { - auto l_idx = l_idxs.get_const_data()[nnz]; - if (l_idx != invalid_index()) { - res_col_idxs[res_nnz] = l_idx; - res_values[res_nnz] = src_values[nnz + src_row_ptrs[row]]; + for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { + auto index = source->get_const_col_idxs()[i]; + if (index >= col_index_set.get_size()) { + continue; + } + const auto bucket = std::distance( + col_subset_begin, + std::upper_bound(col_subset_begin, + col_subset_begin + num_col_subsets, + index)); + auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); + if (col_subset_end[shifted_bucket] <= index || + (index < col_subset_begin[shifted_bucket])) { + continue; + } else { + res_col_idxs[res_nnz] = + index - col_subset_begin[shifted_bucket] + + col_superset_indices[shifted_bucket]; + res_values[res_nnz] = src_values[i]; res_nnz++; } } diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index da967aac96e..8745d08010e 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1830,6 +1830,18 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); } + + { + SCOPED_TRACE("Non Square 2x4"); + auto row_set = gko::IndexSet(this->exec, {5, 6}); + auto col_set = gko::IndexSet(this->exec, {4, 3, 0, 1}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = gko::initialize({I{0.0, 1.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 7.5, 1.0}}, // 6 + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } } From 690b15bebc55a79532425f486b48501e257435ef Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 17 Mar 2022 10:00:04 +0100 Subject: [PATCH 15/20] Review updates. Co-authored-by: Yu-Hsiang Tsai --- omp/matrix/csr_kernels.cpp | 6 +++++- omp/test/matrix/csr_kernels.cpp | 26 ++++++++++++++++++++++++++ reference/test/matrix/csr_kernels.cpp | 2 ++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 43b7dbaadd2..30e1604805a 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -839,6 +839,7 @@ void compute_submatrix_from_index_set( auto num_row_subsets = row_index_set.get_num_subsets(); auto row_subset_begin = row_index_set.get_subsets_begin(); auto row_subset_end = row_index_set.get_subsets_end(); + auto row_superset_indices = row_index_set.get_superset_indices(); auto res_row_ptrs = result->get_row_ptrs(); auto res_col_idxs = result->get_col_idxs(); auto res_values = result->get_values(); @@ -850,10 +851,13 @@ void compute_submatrix_from_index_set( const auto src_col_idxs = source->get_const_col_idxs(); const auto src_values = source->get_const_values(); - size_type res_nnz = 0; +#pragma unroll for (size_type set = 0; set < num_row_subsets; ++set) { for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { + auto local_row = + row - row_subset_begin[set] + row_superset_indices[set]; + auto res_nnz = res_row_ptrs[local_row]; for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { auto index = src_col_idxs[i]; if (index >= col_index_set.get_size()) { diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index dc2b32edd68..86a2992e015 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -755,6 +755,32 @@ TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowInIndexSetIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + using IndexType = int; + using ValueType = double; + set_up_mat_data(); + gko::IndexSet rset{ + this->ref, {42, 7, 8, 9, 10, 22, 25, 26, 34, 35, 36, 51}}; + gko::IndexSet cset{this->ref, + {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; + gko::IndexSet drset(this->omp, rset); + gko::IndexSet dcset(this->omp, cset); + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rset.get_num_elems() + 1); + row_nnz.fill(gko::zero()); + auto drow_nnz = gko::Array(this->omp, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_index_set( + this->ref, this->mtx2.get(), rset, cset, &row_nnz); + gko::kernels::omp::csr::calculate_nonzeros_per_row_in_index_set( + this->omp, this->dmtx2.get(), drset, dcset, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 8745d08010e..d8d5fefd2f5 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1781,7 +1781,9 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 }, this->exec); + ASSERT_EQ(mat->get_num_stored_elements(), 23); + { SCOPED_TRACE("Small square 2x2"); auto row_set = gko::IndexSet(this->exec, {0, 1}); From 5fe2f23d902278f946bc7f20026bf2e05b97113d Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 23 Mar 2022 18:03:46 +0100 Subject: [PATCH 16/20] Review update. Co-authored-by: Tobias Ribizel Co-authored-by: Marcel Koch --- core/base/index_set.cpp | 17 -------- core/matrix/csr.cpp | 5 ++- core/matrix/csr_kernels.hpp | 2 +- cuda/matrix/csr_kernels.cu | 2 +- dpcpp/matrix/csr_kernels.dp.cpp | 2 +- hip/matrix/csr_kernels.hip.cpp | 2 +- include/ginkgo/core/base/index_set.hpp | 58 +++++++++++--------------- include/ginkgo/core/matrix/csr.hpp | 41 ++++++++++++++---- omp/base/index_set_kernels.cpp | 35 ++++++++-------- omp/matrix/csr_kernels.cpp | 18 ++++---- omp/test/matrix/csr_kernels.cpp | 6 +-- reference/base/index_set_kernels.cpp | 4 +- reference/matrix/csr_kernels.cpp | 6 +-- reference/test/base/index_set.cpp | 14 ------- reference/test/matrix/csr_kernels.cpp | 43 +++++++++++++++++++ 15 files changed, 140 insertions(+), 115 deletions(-) diff --git a/core/base/index_set.cpp b/core/base/index_set.cpp index 0c9d7416e57..60bbbb6cda0 100644 --- a/core/base/index_set.cpp +++ b/core/base/index_set.cpp @@ -105,23 +105,6 @@ IndexType IndexSet::get_local_index(const IndexType index) const } -template -IndexType IndexSet::get_subset_id(const IndexType index) const -{ - auto ss_end_host = make_temporary_clone>( - this->get_executor()->get_master(), &this->subsets_end_); - auto ss_begin_host = make_temporary_clone>( - this->get_executor()->get_master(), &this->subsets_begin_); - for (size_type id = 0; id < this->get_num_subsets(); ++id) { - if (index < ss_end_host->get_const_data()[id] && - index >= ss_begin_host->get_const_data()[id]) { - return id; - } - } - return -1; -} - - template Array IndexSet::to_global_indices() const { diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index a6c3a2a0431..1faf5c68ddf 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -625,6 +625,9 @@ Csr::create_submatrix( { using Mat = Csr; auto exec = this->get_executor(); + if (!row_index_set.get_num_elems() || !col_index_set.get_num_elems()) { + return Mat::create(exec); + } if (row_index_set.is_contiguous() && col_index_set.is_contiguous()) { auto row_st = row_index_set.get_executor()->copy_val_to_host( row_index_set.get_subsets_begin()); @@ -643,7 +646,7 @@ Csr::create_submatrix( auto sub_mat_size = gko::dim<2>(submat_num_rows, submat_num_cols); Array row_ptrs(exec, submat_num_rows + 1); exec->run(csr::make_calculate_nonzeros_per_row_in_index_set( - this, row_index_set, col_index_set, &row_ptrs)); + this, row_index_set, col_index_set, row_ptrs.get_data())); exec->run( csr::make_prefix_sum(row_ptrs.get_data(), submat_num_rows + 1)); auto num_nnz = diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 14f7f491f24..5f3fd4d9f3c 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -172,7 +172,7 @@ namespace kernels { std::shared_ptr exec, \ const matrix::Csr* source, \ const IndexSet& row_index_set, \ - const IndexSet& col_index_set, Array* row_nnz) + const IndexSet& col_index_set, IndexType* row_nnz) #define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \ void compute_submatrix(std::shared_ptr exec, \ diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 7ab812b50fc..35f2f8341e4 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1164,7 +1164,7 @@ void calculate_nonzeros_per_row_in_index_set( const matrix::Csr* source, const IndexSet& row_index_set, const IndexSet& col_index_set, - Array* row_nnz) GKO_NOT_IMPLEMENTED; + IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 835ba943a26..d55cb29e395 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1391,7 +1391,7 @@ void calculate_nonzeros_per_row_in_index_set( const matrix::Csr* source, const IndexSet& row_index_set, const IndexSet& col_index_set, - Array* row_nnz) GKO_NOT_IMPLEMENTED; + IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index f1d09c1a163..3bedaaa3e15 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -951,7 +951,7 @@ void calculate_nonzeros_per_row_in_index_set( const matrix::Csr* source, const IndexSet& row_index_set, const IndexSet& col_index_set, - Array* row_nnz) GKO_NOT_IMPLEMENTED; + IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index 98d19fda78c..11e0344e9da 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -82,10 +82,17 @@ namespace gko { * @ingroup IndexSet */ template -class IndexSet : public EnablePolymorphicObject> { +class IndexSet : public EnablePolymorphicObject>, + public EnablePolymorphicAssignment>, + public EnableCreateMethod> { friend class EnablePolymorphicObject; + friend class EnableCreateMethod; public: + using EnableCreateMethod::create; + using EnablePolymorphicAssignment::convert_to; + using EnablePolymorphicAssignment::move_to; + /** * The type of elements stored in the index set. */ @@ -97,7 +104,9 @@ class IndexSet : public EnablePolymorphicObject> { * @param exec the Executor where the IndexSet data is allocated */ IndexSet(std::shared_ptr exec) - : EnablePolymorphicObject(std::move(exec)) + : EnablePolymorphicObject(std::move(exec)), + index_space_size_{0}, + num_stored_indices_{0} {} /** @@ -109,14 +118,18 @@ class IndexSet : public EnablePolymorphicObject> { * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ - IndexSet(std::shared_ptr executor, - std::initializer_list init_list, - const bool is_sorted = false) + explicit IndexSet(std::shared_ptr executor, + std::initializer_list init_list, + const bool is_sorted = false) : EnablePolymorphicObject(std::move(executor)), - index_space_size_( - *(std::max_element(std::begin(init_list), std::end(init_list))) + - 1) + index_space_size_(init_list.size() > 0 + ? *(std::max_element(std::begin(init_list), + std::end(init_list))) + + 1 + : 0), + num_stored_indices_{static_cast(init_list.size())} { + GKO_ASSERT(index_space_size_ > 0); this->populate_subsets( Array(this->get_executor(), init_list), is_sorted); } @@ -131,9 +144,10 @@ class IndexSet : public EnablePolymorphicObject> { * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ - IndexSet(std::shared_ptr executor, - const index_type size, const gko::Array& indices, - const bool is_sorted = false) + explicit IndexSet(std::shared_ptr executor, + const index_type size, + const gko::Array& indices, + const bool is_sorted = false) : EnablePolymorphicObject(std::move(executor)), index_space_size_(size) { @@ -217,28 +231,6 @@ class IndexSet : public EnablePolymorphicObject> { */ index_type get_local_index(index_type global_index) const; - /** - * Return which set the global index belongs to. - * - * Consider the set idx_set = (0, 1, 2, 4, 6, 7, 8, 9). This function - * returns the subset id in the index set of the input global index. For - * example, `idx_set.get_subset_id(0) == 0` `idx_set.get_subset_id(4) - * == 1` and `idx_set.get_subset_id(6) == 2`. - * - * @note This function returns a scalar value and needs a scalar value. - * For repeated queries, it is more efficient to use the Array - * functions that take and return arrays which allow for more - * throughput. - * - * @param global_index the global index. - * - * @return the local index of the element in the index set. - * - * @warning This single entry query can have significant kernel launch - * overheads and should be avoided if possible. - */ - index_type get_subset_id(index_type global_index) const; - /** * This is an array version of the scalar function above. * diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 9bfa9335945..7065deccba0 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -793,13 +793,6 @@ class Csr : public EnableLinOp>, std::unique_ptr> extract_diagonal() const override; - std::unique_ptr> create_submatrix( - const gko::IndexSet& row_index_set, - const gko::IndexSet& column_index_set) const; - - std::unique_ptr> create_submatrix( - const gko::span& row_span, const gko::span& column_span) const; - std::unique_ptr compute_absolute() const override; void compute_absolute_inplace() override; @@ -959,7 +952,7 @@ class Csr : public EnableLinOp>, this->inv_scale_impl(make_temporary_clone(exec, alpha).get()); } - /* + /** * Creates a constant (immutable) Csr matrix from a set of constant arrays. * * @param exec the executor to create the matrix on @@ -987,7 +980,7 @@ class Csr : public EnableLinOp>, gko::detail::array_const_cast(std::move(row_ptrs)), strategy}); } - /* + /** * This is version of create_const with a default strategy. */ static std::unique_ptr create_const( @@ -1001,6 +994,36 @@ class Csr : public EnableLinOp>, Csr::make_default_strategy(exec)); } + /** + * Creates a submatrix from this Csr matrix given row and column IndexSet + * objects. + * + * @param row_index_set the row index set containing the set of rows to be + * in the submatrix. + * @param column_index_set the col index set containing the set of columns + * to be in the submatrix. + * @return A new CSR matrix with the elements that belong to the row and + * columns of this matrix as specified by the index sets. + * @note This is not a view but creates a new, separate CSR matrix. + */ + std::unique_ptr> create_submatrix( + const gko::IndexSet& row_index_set, + const gko::IndexSet& column_index_set) const; + + /** + * Creates a submatrix from this Csr matrix given row and column spans + * + * @param row_span the row span containing the contiguous set of rows to be + * in the submatrix. + * @param column_span the column span containing the contiguous set of + * columns to be in the submatrix. + * @return A new CSR matrix with the elements that belong to the row and + * columns of this matrix as specified by the index sets. + * @note This is not a view but creates a new, separate CSR matrix. + */ + std::unique_ptr> create_submatrix( + const gko::span& row_span, const gko::span& column_span) const; + protected: /** * Creates an uninitialized CSR matrix of the specified size. diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index de81f30a991..223373053fe 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -73,10 +73,11 @@ void to_global_indices(std::shared_ptr exec, { #pragma omp parallel for for (size_type subset = 0; subset < num_subsets; ++subset) { - for (size_type i = 0; - i < superset_indices[subset + 1] - superset_indices[subset]; ++i) { - decomp_indices[superset_indices[subset] + i] = - subset_begin[subset] + i; + IndexType local_i{}; + for (auto i = superset_indices[subset]; + i < superset_indices[subset + 1]; ++i) { + decomp_indices[i] = local_i + subset_begin[subset]; + local_i++; } } } @@ -157,20 +158,19 @@ void global_to_local(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < num_indices; ++i) { auto index = global_indices[i]; - if (index >= index_space_size) { + if (index < 0 || index >= index_space_size) { local_indices[i] = invalid_index(); continue; } const auto bucket = std::distance( - subset_begin, - std::upper_bound(subset_begin, subset_begin + num_subsets, index)); - auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (subset_end[shifted_bucket] <= index || - index < subset_begin[shifted_bucket]) { + subset_begin + 1, + std::upper_bound(subset_begin + 1, subset_begin + num_subsets + 1, + index)); + if (index >= subset_end[bucket] || index < subset_begin[bucket]) { local_indices[i] = invalid_index(); } else { - local_indices[i] = index - subset_begin[shifted_bucket] + - superset_indices[shifted_bucket]; + local_indices[i] = + index - subset_begin[bucket] + superset_indices[bucket]; } } } @@ -190,17 +190,16 @@ void local_to_global(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < num_indices; ++i) { auto index = local_indices[i]; - if (index >= superset_indices[num_subsets]) { + if (index < 0 || index >= superset_indices[num_subsets]) { global_indices[i] = invalid_index(); continue; } const auto bucket = std::distance( - superset_indices, - std::upper_bound(superset_indices, + superset_indices + 1, + std::upper_bound(superset_indices + 1, superset_indices + num_subsets + 1, index)); - auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - global_indices[i] = subset_begin[shifted_bucket] + index - - superset_indices[shifted_bucket]; + global_indices[i] = + subset_begin[bucket] + index - superset_indices[bucket]; } } diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 30e1604805a..8fae745f757 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -749,7 +749,7 @@ void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, const IndexSet& row_index_set, - const IndexSet& col_index_set, Array* row_nnz) + const IndexSet& col_index_set, IndexType* row_nnz) { auto num_row_subsets = row_index_set.get_num_subsets(); auto num_col_subsets = col_index_set.get_num_subsets(); @@ -765,7 +765,7 @@ void calculate_nonzeros_per_row_in_index_set( size_type res_row = row_superset_indices[set]; for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - row_nnz->get_data()[res_row] = zero(); + row_nnz[res_row] = zero(); for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { auto index = source->get_const_col_idxs()[i]; if (index >= col_index_set.get_size()) { @@ -777,11 +777,9 @@ void calculate_nonzeros_per_row_in_index_set( col_subset_begin + num_col_subsets, index)); auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (col_subset_end[shifted_bucket] <= index || - (index < col_subset_begin[shifted_bucket])) { - continue; - } else { - row_nnz->get_data()[res_row]++; + if (index < col_subset_end[shifted_bucket] && + index >= col_subset_begin[shifted_bucket]) { + row_nnz[res_row]++; } } res_row++; @@ -869,10 +867,8 @@ void compute_submatrix_from_index_set( col_subset_begin + num_col_subsets, index)); auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (col_subset_end[shifted_bucket] <= index || - (index < col_subset_begin[shifted_bucket])) { - continue; - } else { + if (index < col_subset_end[shifted_bucket] && + (index >= col_subset_begin[shifted_bucket])) { res_col_idxs[res_nnz] = index - col_subset_begin[shifted_bucket] + col_superset_indices[shifted_bucket]; diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 86a2992e015..7a61c72ac7b 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -773,9 +773,9 @@ TEST_F(Csr, CalculateNnzPerRowInIndexSetIsEquivalentToRef) auto drow_nnz = gko::Array(this->omp, row_nnz); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_index_set( - this->ref, this->mtx2.get(), rset, cset, &row_nnz); + this->ref, this->mtx2.get(), rset, cset, row_nnz.get_data()); gko::kernels::omp::csr::calculate_nonzeros_per_row_in_index_set( - this->omp, this->dmtx2.get(), drset, dcset, &drow_nnz); + this->omp, this->dmtx2.get(), drset, dcset, drow_nnz.get_data()); GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); } @@ -797,7 +797,7 @@ TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef) auto row_nnz = gko::Array(this->ref, rset.get_num_elems() + 1); row_nnz.fill(gko::zero()); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_index_set( - this->ref, this->mtx2.get(), rset, cset, &row_nnz); + this->ref, this->mtx2.get(), rset, cset, row_nnz.get_data()); gko::kernels::reference::components::prefix_sum( this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); auto num_nnz = row_nnz.get_data()[rset.get_num_elems()]; diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index 25602e55322..acdffa3fb0e 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -181,7 +181,7 @@ void global_to_local(std::shared_ptr exec, shifted_bucket = 0; } auto index = global_indices[i]; - if (index >= index_space_size) { + if (index < 0 || index >= index_space_size) { local_indices[i] = invalid_index(); continue; } @@ -221,7 +221,7 @@ void local_to_global(std::shared_ptr exec, shifted_bucket = 0; } auto index = local_indices[i]; - if (index >= superset_indices[num_subsets]) { + if (index < 0 || index >= superset_indices[num_subsets]) { global_indices[i] = invalid_index(); continue; } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 365847ef592..f7021c5224a 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -630,7 +630,7 @@ void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, const IndexSet& row_index_set, - const IndexSet& col_index_set, Array* row_nnz) + const IndexSet& col_index_set, IndexType* row_nnz) { auto num_row_subsets = row_index_set.get_num_subsets(); auto row_subset_begin = row_index_set.get_subsets_begin(); @@ -644,7 +644,7 @@ void calculate_nonzeros_per_row_in_index_set( size_type res_row = row_superset_indices[set]; for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - row_nnz->get_data()[res_row] = zero(); + row_nnz[res_row] = zero(); for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { auto index = source->get_const_col_idxs()[i]; if (index >= col_index_set.get_size()) { @@ -660,7 +660,7 @@ void calculate_nonzeros_per_row_in_index_set( (index < col_subset_begin[shifted_bucket])) { continue; } else { - row_nnz->get_data()[res_row]++; + row_nnz[res_row]++; } } res_row++; diff --git a/reference/test/base/index_set.cpp b/reference/test/base/index_set.cpp index 1c954c8ce1d..026201106c1 100644 --- a/reference/test/base/index_set.cpp +++ b/reference/test/base/index_set.cpp @@ -321,20 +321,6 @@ TYPED_TEST(IndexSet, CanGetLocalIndex) } -TYPED_TEST(IndexSet, CanGetSubsetId) -{ - auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; - - ASSERT_EQ(idx_set.get_num_elems(), 8); - EXPECT_EQ(idx_set.get_subset_id(6), 2); - EXPECT_EQ(idx_set.get_subset_id(7), 2); - EXPECT_EQ(idx_set.get_subset_id(0), 0); - EXPECT_EQ(idx_set.get_subset_id(8), 2); - EXPECT_EQ(idx_set.get_subset_id(4), 1); -} - - TYPED_TEST(IndexSet, CanDetectNonExistentIndices) { auto idx_arr = gko::Array{ diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index d8d5fefd2f5..62422489a7f 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1784,6 +1784,47 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) ASSERT_EQ(mat->get_num_stored_elements(), 23); + { + SCOPED_TRACE("Both empty index sets"); + auto row_set = gko::IndexSet(this->exec); + auto col_set = gko::IndexSet(this->exec); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = Mtx::create(this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + + { + SCOPED_TRACE("One empty index set"); + auto row_set = gko::IndexSet(this->exec); + auto col_set = gko::IndexSet(this->exec, {0}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = Mtx::create(this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + + { + SCOPED_TRACE("Full index set"); + auto row_set = + gko::IndexSet(this->exec, {0, 1, 2, 3, 4, 5, 6}); + auto col_set = gko::IndexSet(this->exec, {0, 1, 2, 3, 4}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = gko::initialize( + { + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0, -1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0, -1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 + }, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + { SCOPED_TRACE("Small square 2x2"); auto row_set = gko::IndexSet(this->exec, {0, 1}); @@ -1822,6 +1863,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Square 4x4"); auto row_set = gko::IndexSet(this->exec, {1, 4, 5, 6}); + // This is unsorted to make sure that the output is correct (sorted) + // even when the input is sorted. auto col_set = gko::IndexSet(this->exec, {4, 3, 0, 1}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = gko::initialize({I{1.0, 0.0, 7.5, 3.0}, // 1 From f4ffc369673548fddd410aafba456670c932e096 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 25 Mar 2022 15:30:02 +0100 Subject: [PATCH 17/20] Make IndexSet a non-polymorphic object. --- cuda/test/base/CMakeLists.txt | 1 + cuda/test/base/index_set.cu | 114 +++++++++++++++++++++++++ hip/test/base/CMakeLists.txt | 1 + hip/test/base/index_set.hip.cpp | 114 +++++++++++++++++++++++++ include/ginkgo/core/base/index_set.hpp | 39 ++++----- reference/test/base/index_set.cpp | 14 +++ 6 files changed, 264 insertions(+), 19 deletions(-) create mode 100644 cuda/test/base/index_set.cu create mode 100644 hip/test/base/index_set.hip.cpp diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt index c23efefa8ed..b53c348b0ba 100644 --- a/cuda/test/base/CMakeLists.txt +++ b/cuda/test/base/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_cuda_test(array) ginkgo_create_cuda_test(cuda_executor) +ginkgo_create_cuda_test(index_set) ginkgo_create_thread_test(cuda_executor_reset) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) diff --git a/cuda/test/base/index_set.cu b/cuda/test/base/index_set.cu new file mode 100644 index 00000000000..2ebc2809345 --- /dev/null +++ b/cuda/test/base/index_set.cu @@ -0,0 +1,114 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +class IndexSet : public ::testing::Test { +protected: + using T = int; + IndexSet() + : exec(gko::ReferenceExecutor::create()), + cuda(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create())) + {} + + void TearDown() + { + if (exec != nullptr) { + // ensure that previous calls finished and didn't throw an error + ASSERT_NO_THROW(exec->synchronize()); + } + } + + static void assert_equal_index_sets(gko::IndexSet& a, + gko::IndexSet& b) + { + ASSERT_EQ(a.get_size(), b.get_size()); + ASSERT_EQ(a.get_num_subsets(), b.get_num_subsets()); + if (a.get_num_subsets() > 0) { + for (auto i = 0; i < a.get_num_subsets(); ++i) { + EXPECT_EQ(a.get_subsets_begin()[i], b.get_subsets_begin()[i]); + EXPECT_EQ(a.get_subsets_end()[i], b.get_subsets_end()[i]); + EXPECT_EQ(a.get_superset_indices()[i], + b.get_superset_indices()[i]); + } + } + } + + static void assert_equal_arrays(const T num_elems, const T* a, const T* b) + { + if (num_elems > 0) { + for (auto i = 0; i < num_elems; ++i) { + EXPECT_EQ(a[i], b[i]); + } + } + } + + std::shared_ptr exec; + std::shared_ptr cuda; +}; + + +TEST_F(IndexSet, CanBeCopiedBetweenExecutors) +{ + auto idx_arr = gko::Array{exec, {0, 1, 2, 4, 6, 7, 8, 9}}; + auto begin_comp = gko::Array{exec, {0, 4, 6}}; + auto end_comp = gko::Array{exec, {3, 5, 10}}; + auto superset_comp = gko::Array{exec, {0, 3, 4, 8}}; + + auto idx_set = gko::IndexSet{exec, 10, idx_arr}; + auto cuda_idx_set = gko::IndexSet(cuda, idx_set); + auto host_idx_set = gko::IndexSet(exec, cuda_idx_set); + + ASSERT_EQ(cuda_idx_set.get_executor(), cuda); + this->assert_equal_index_sets(host_idx_set, idx_set); +} + + +} // namespace diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt index 4529a40c862..a34aa042722 100644 --- a/hip/test/base/CMakeLists.txt +++ b/hip/test/base/CMakeLists.txt @@ -1,4 +1,5 @@ ginkgo_create_hip_test(hip_executor) +ginkgo_create_hip_test(index_set) ginkgo_create_thread_test(hip_executor_reset) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) diff --git a/hip/test/base/index_set.hip.cpp b/hip/test/base/index_set.hip.cpp new file mode 100644 index 00000000000..04492452e37 --- /dev/null +++ b/hip/test/base/index_set.hip.cpp @@ -0,0 +1,114 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +class IndexSet : public ::testing::Test { +protected: + using T = int; + IndexSet() + : exec(gko::ReferenceExecutor::create()), + hip(gko::HipExecutor::create(0, gko::ReferenceExecutor::create())) + {} + + void TearDown() + { + if (exec != nullptr) { + // ensure that previous calls finished and didn't throw an error + ASSERT_NO_THROW(exec->synchronize()); + } + } + + static void assert_equal_index_sets(gko::IndexSet& a, + gko::IndexSet& b) + { + ASSERT_EQ(a.get_size(), b.get_size()); + ASSERT_EQ(a.get_num_subsets(), b.get_num_subsets()); + if (a.get_num_subsets() > 0) { + for (auto i = 0; i < a.get_num_subsets(); ++i) { + EXPECT_EQ(a.get_subsets_begin()[i], b.get_subsets_begin()[i]); + EXPECT_EQ(a.get_subsets_end()[i], b.get_subsets_end()[i]); + EXPECT_EQ(a.get_superset_indices()[i], + b.get_superset_indices()[i]); + } + } + } + + static void assert_equal_arrays(const T num_elems, const T* a, const T* b) + { + if (num_elems > 0) { + for (auto i = 0; i < num_elems; ++i) { + EXPECT_EQ(a[i], b[i]); + } + } + } + + std::shared_ptr exec; + std::shared_ptr hip; +}; + + +TEST_F(IndexSet, CanBeCopiedBetweenExecutors) +{ + auto idx_arr = gko::Array{exec, {0, 1, 2, 4, 6, 7, 8, 9}}; + auto begin_comp = gko::Array{exec, {0, 4, 6}}; + auto end_comp = gko::Array{exec, {3, 5, 10}}; + auto superset_comp = gko::Array{exec, {0, 3, 4, 8}}; + + auto idx_set = gko::IndexSet{exec, 10, idx_arr}; + auto hip_idx_set = gko::IndexSet(hip, idx_set); + auto host_idx_set = gko::IndexSet(exec, hip_idx_set); + + ASSERT_EQ(hip_idx_set.get_executor(), hip); + this->assert_equal_index_sets(host_idx_set, idx_set); +} + + +} // namespace diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index 11e0344e9da..1a42562f4e6 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -82,17 +82,8 @@ namespace gko { * @ingroup IndexSet */ template -class IndexSet : public EnablePolymorphicObject>, - public EnablePolymorphicAssignment>, - public EnableCreateMethod> { - friend class EnablePolymorphicObject; - friend class EnableCreateMethod; - +class IndexSet { public: - using EnableCreateMethod::create; - using EnablePolymorphicAssignment::convert_to; - using EnablePolymorphicAssignment::move_to; - /** * The type of elements stored in the index set. */ @@ -104,9 +95,7 @@ class IndexSet : public EnablePolymorphicObject>, * @param exec the Executor where the IndexSet data is allocated */ IndexSet(std::shared_ptr exec) - : EnablePolymorphicObject(std::move(exec)), - index_space_size_{0}, - num_stored_indices_{0} + : exec_(std::move(exec)), index_space_size_{0}, num_stored_indices_{0} {} /** @@ -118,10 +107,10 @@ class IndexSet : public EnablePolymorphicObject>, * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ - explicit IndexSet(std::shared_ptr executor, + explicit IndexSet(std::shared_ptr exec, std::initializer_list init_list, const bool is_sorted = false) - : EnablePolymorphicObject(std::move(executor)), + : exec_(std::move(exec)), index_space_size_(init_list.size() > 0 ? *(std::max_element(std::begin(init_list), std::end(init_list))) + @@ -144,12 +133,11 @@ class IndexSet : public EnablePolymorphicObject>, * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ - explicit IndexSet(std::shared_ptr executor, + explicit IndexSet(std::shared_ptr exec, const index_type size, const gko::Array& indices, const bool is_sorted = false) - : EnablePolymorphicObject(std::move(executor)), - index_space_size_(size) + : exec_(std::move(exec)), index_space_size_(size) { GKO_ASSERT(index_space_size_ >= indices.get_num_elems()); this->populate_subsets(indices, is_sorted); @@ -164,9 +152,21 @@ class IndexSet : public EnablePolymorphicObject>, IndexSet(std::shared_ptr exec, const IndexSet& other) : IndexSet(exec) { - *this = other; + this->index_space_size_ = other.index_space_size_; + this->num_stored_indices_ = other.num_stored_indices_; + subsets_begin_ = gko::Array(exec, other.subsets_begin_); + subsets_end_ = gko::Array(exec, other.subsets_end_); + superset_cumulative_indices_ = + gko::Array(exec, other.superset_cumulative_indices_); } + /** + * Returns the executor of the IndexSet + * + * @return the executor. + */ + std::shared_ptr get_executor() const { return this->exec_; } + /** * Returns the size of the index set space. * @@ -344,6 +344,7 @@ class IndexSet : public EnablePolymorphicObject>, void populate_subsets(const gko::Array& indices, const bool is_sorted); + std::shared_ptr exec_; index_type index_space_size_; index_type num_stored_indices_; gko::Array subsets_begin_; diff --git a/reference/test/base/index_set.cpp b/reference/test/base/index_set.cpp index 026201106c1..993a132215b 100644 --- a/reference/test/base/index_set.cpp +++ b/reference/test/base/index_set.cpp @@ -94,6 +94,14 @@ class IndexSet : public ::testing::Test { TYPED_TEST_SUITE(IndexSet, gko::test::IndexTypes, TypenameNameGenerator); +TYPED_TEST(IndexSet, KnowsItsExecutor) +{ + auto idx_set = gko::IndexSet{this->exec}; + + ASSERT_EQ(this->exec, idx_set.get_executor()); +} + + TYPED_TEST(IndexSet, CanBeCopyConstructed) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; @@ -105,6 +113,7 @@ TYPED_TEST(IndexSet, CanBeCopyConstructed) gko::IndexSet idx_set2(idx_set); + ASSERT_EQ(idx_set2.get_executor(), idx_set.get_executor()); this->assert_equal_index_sets(idx_set2, idx_set); } @@ -120,6 +129,8 @@ TYPED_TEST(IndexSet, CanBeMoveConstructed) gko::IndexSet idx_set2(std::move(idx_set)); + ASSERT_EQ(idx_set2.get_executor(), this->exec); + ASSERT_EQ(idx_set.get_executor(), nullptr); ASSERT_EQ(idx_set2.get_size(), 10); } @@ -135,6 +146,7 @@ TYPED_TEST(IndexSet, CanBeCopyAssigned) gko::IndexSet idx_set2 = idx_set; + ASSERT_EQ(idx_set2.get_executor(), idx_set.get_executor()); this->assert_equal_index_sets(idx_set2, idx_set); } @@ -150,6 +162,8 @@ TYPED_TEST(IndexSet, CanBeMoveAssigned) gko::IndexSet idx_set2 = std::move(idx_set); + ASSERT_EQ(idx_set2.get_executor(), this->exec); + ASSERT_EQ(idx_set.get_executor(), nullptr); ASSERT_EQ(idx_set2.get_size(), 10); } From 3982fe1b0c4f6743feb8acd9305930a4ce612466 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 25 Mar 2022 16:47:14 +0100 Subject: [PATCH 18/20] Rename IndexSet to index_set --- common/unified/base/index_set_kernels.cpp | 6 +- core/base/index_set.cpp | 38 ++++----- core/base/index_set_kernels.hpp | 3 +- core/device_hooks/common_kernels.inc.cpp | 4 +- core/matrix/csr.cpp | 4 +- core/matrix/csr_kernels.hpp | 8 +- cuda/base/index_set_kernels.cpp | 4 +- cuda/matrix/csr_kernels.cu | 8 +- cuda/test/base/index_set.cu | 16 ++-- dpcpp/base/index_set_kernels.dp.cpp | 4 +- dpcpp/matrix/csr_kernels.dp.cpp | 8 +- hip/base/index_set_kernels.hip.cpp | 4 +- hip/matrix/csr_kernels.hip.cpp | 8 +- hip/test/base/index_set.hip.cpp | 16 ++-- include/ginkgo/core/base/index_set.hpp | 38 ++++----- include/ginkgo/core/matrix/csr.hpp | 8 +- omp/base/index_set_kernels.cpp | 4 +- omp/matrix/csr_kernels.cpp | 8 +- omp/test/base/index_set.cpp | 54 ++++++------- omp/test/matrix/csr_kernels.cpp | 36 ++++----- reference/base/index_set_kernels.cpp | 4 +- reference/matrix/csr_kernels.cpp | 8 +- reference/test/base/index_set.cpp | 94 +++++++++++------------ reference/test/matrix/csr_kernels.cpp | 34 ++++---- 24 files changed, 209 insertions(+), 210 deletions(-) diff --git a/common/unified/base/index_set_kernels.cpp b/common/unified/base/index_set_kernels.cpp index d2a41f7b05e..c292617d317 100644 --- a/common/unified/base/index_set_kernels.cpp +++ b/common/unified/base/index_set_kernels.cpp @@ -43,11 +43,11 @@ namespace gko { namespace kernels { namespace GKO_DEVICE_NAMESPACE { /** - * @brief The IndexSet namespace. + * @brief The index_set namespace. * * @ingroup index_set */ -namespace index_set { +namespace idx_set { template @@ -68,7 +68,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_COMPUTE_VALIDITY_KERNEL); -} // namespace index_set +} // namespace idx_set } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels } // namespace gko diff --git a/core/base/index_set.cpp b/core/base/index_set.cpp index 60bbbb6cda0..51be9931c25 100644 --- a/core/base/index_set.cpp +++ b/core/base/index_set.cpp @@ -47,32 +47,32 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -namespace index_set { +namespace idx_set { -GKO_REGISTER_OPERATION(to_global_indices, index_set::to_global_indices); -GKO_REGISTER_OPERATION(populate_subsets, index_set::populate_subsets); -GKO_REGISTER_OPERATION(global_to_local, index_set::global_to_local); -GKO_REGISTER_OPERATION(local_to_global, index_set::local_to_global); +GKO_REGISTER_OPERATION(to_global_indices, idx_set::to_global_indices); +GKO_REGISTER_OPERATION(populate_subsets, idx_set::populate_subsets); +GKO_REGISTER_OPERATION(global_to_local, idx_set::global_to_local); +GKO_REGISTER_OPERATION(local_to_global, idx_set::local_to_global); -} // namespace index_set +} // namespace idx_set template -void IndexSet::populate_subsets(const gko::Array& indices, - const bool is_sorted) +void index_set::populate_subsets( + const gko::Array& indices, const bool is_sorted) { auto exec = this->get_executor(); this->num_stored_indices_ = indices.get_num_elems(); - exec->run(index_set::make_populate_subsets( + exec->run(idx_set::make_populate_subsets( this->index_space_size_, &indices, &this->subsets_begin_, &this->subsets_end_, &this->superset_cumulative_indices_, is_sorted)); } template -bool IndexSet::contains(const IndexType input_index) const +bool index_set::contains(const IndexType input_index) const { auto local_index = this->get_local_index(input_index); return local_index != invalid_index(); @@ -80,7 +80,7 @@ bool IndexSet::contains(const IndexType input_index) const template -IndexType IndexSet::get_global_index(const IndexType index) const +IndexType index_set::get_global_index(const IndexType index) const { auto exec = this->get_executor(); const auto local_idx = @@ -93,7 +93,7 @@ IndexType IndexSet::get_global_index(const IndexType index) const template -IndexType IndexSet::get_local_index(const IndexType index) const +IndexType index_set::get_local_index(const IndexType index) const { auto exec = this->get_executor(); const auto global_idx = @@ -106,14 +106,14 @@ IndexType IndexSet::get_local_index(const IndexType index) const template -Array IndexSet::to_global_indices() const +Array index_set::to_global_indices() const { auto exec = this->get_executor(); auto num_elems = exec->copy_val_to_host( this->superset_cumulative_indices_.get_const_data() + this->superset_cumulative_indices_.get_num_elems() - 1); auto decomp_indices = gko::Array(exec, num_elems); - exec->run(index_set::make_to_global_indices( + exec->run(idx_set::make_to_global_indices( this->get_num_subsets(), this->get_subsets_begin(), this->get_subsets_end(), this->get_superset_indices(), decomp_indices.get_data())); @@ -123,7 +123,7 @@ Array IndexSet::to_global_indices() const template -Array IndexSet::map_local_to_global( +Array index_set::map_local_to_global( const Array& local_indices, const bool is_sorted) const { auto exec = this->get_executor(); @@ -131,7 +131,7 @@ Array IndexSet::map_local_to_global( gko::Array(exec, local_indices.get_num_elems()); GKO_ASSERT(this->get_num_subsets() >= 1); - exec->run(index_set::make_local_to_global( + exec->run(idx_set::make_local_to_global( this->get_num_subsets(), this->get_subsets_begin(), this->get_superset_indices(), static_cast(local_indices.get_num_elems()), @@ -141,7 +141,7 @@ Array IndexSet::map_local_to_global( template -Array IndexSet::map_global_to_local( +Array index_set::map_global_to_local( const Array& global_indices, const bool is_sorted) const { auto exec = this->get_executor(); @@ -149,7 +149,7 @@ Array IndexSet::map_global_to_local( gko::Array(exec, global_indices.get_num_elems()); GKO_ASSERT(this->get_num_subsets() >= 1); - exec->run(index_set::make_global_to_local( + exec->run(idx_set::make_global_to_local( this->index_space_size_, this->get_num_subsets(), this->get_subsets_begin(), this->get_subsets_end(), this->get_superset_indices(), @@ -159,7 +159,7 @@ Array IndexSet::map_global_to_local( } -#define GKO_DECLARE_INDEX_SET(_type) class IndexSet<_type> +#define GKO_DECLARE_INDEX_SET(_type) class index_set<_type> GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INDEX_SET); diff --git a/core/base/index_set_kernels.hpp b/core/base/index_set_kernels.hpp index c2347386af7..21774cf7a62 100644 --- a/core/base/index_set_kernels.hpp +++ b/core/base/index_set_kernels.hpp @@ -97,8 +97,7 @@ namespace kernels { GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL(IndexType) -GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(index_set, - GKO_DECLARE_ALL_AS_TEMPLATES); +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(idx_set, GKO_DECLARE_ALL_AS_TEMPLATES); #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index bcc89a0a948..a89d1f3c140 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -214,7 +214,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CONVERT_PTRS_TO_SIZES); } // namespace components -namespace index_set { +namespace idx_set { GKO_STUB_INDEX_TYPE(GKO_DECLARE_INDEX_SET_COMPUTE_VALIDITY_KERNEL); @@ -224,7 +224,7 @@ GKO_STUB_INDEX_TYPE(GKO_DECLARE_INDEX_SET_GLOBAL_TO_LOCAL_KERNEL); GKO_STUB_INDEX_TYPE(GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); -} // namespace index_set +} // namespace idx_set namespace partition { diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 1faf5c68ddf..00ddc4bca24 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -620,8 +620,8 @@ Csr::create_submatrix(const gko::span& row_span, template std::unique_ptr> Csr::create_submatrix( - const IndexSet& row_index_set, - const IndexSet& col_index_set) const + const index_set& row_index_set, + const index_set& col_index_set) const { using Mat = Csr; auto exec = this->get_executor(); diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 5f3fd4d9f3c..be82a73b40d 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -171,8 +171,8 @@ namespace kernels { void calculate_nonzeros_per_row_in_index_set( \ std::shared_ptr exec, \ const matrix::Csr* source, \ - const IndexSet& row_index_set, \ - const IndexSet& col_index_set, IndexType* row_nnz) + const gko::index_set& row_index_set, \ + const gko::index_set& col_index_set, IndexType* row_nnz) #define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \ void compute_submatrix(std::shared_ptr exec, \ @@ -185,8 +185,8 @@ namespace kernels { void compute_submatrix_from_index_set( \ std::shared_ptr exec, \ const matrix::Csr* source, \ - const IndexSet& row_index_set, \ - const IndexSet& col_index_set, \ + const gko::index_set& row_index_set, \ + const gko::index_set& col_index_set, \ matrix::Csr* result) #define GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ diff --git a/cuda/base/index_set_kernels.cpp b/cuda/base/index_set_kernels.cpp index 5efd374a3cf..6f23562b8ee 100644 --- a/cuda/base/index_set_kernels.cpp +++ b/cuda/base/index_set_kernels.cpp @@ -54,7 +54,7 @@ namespace cuda { * * @ingroup index_set */ -namespace index_set { +namespace idx_set { template @@ -107,7 +107,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); -} // namespace index_set +} // namespace idx_set } // namespace cuda } // namespace kernels } // namespace gko diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 35f2f8341e4..a0b7b236361 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1162,8 +1162,8 @@ template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -1174,8 +1174,8 @@ template void compute_submatrix_from_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, matrix::Csr* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/cuda/test/base/index_set.cu b/cuda/test/base/index_set.cu index 2ebc2809345..5a5dd7a49e2 100644 --- a/cuda/test/base/index_set.cu +++ b/cuda/test/base/index_set.cu @@ -50,10 +50,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -class IndexSet : public ::testing::Test { +class index_set : public ::testing::Test { protected: using T = int; - IndexSet() + index_set() : exec(gko::ReferenceExecutor::create()), cuda(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create())) {} @@ -66,8 +66,8 @@ protected: } } - static void assert_equal_index_sets(gko::IndexSet& a, - gko::IndexSet& b) + static void assert_equal_index_sets(gko::index_set& a, + gko::index_set& b) { ASSERT_EQ(a.get_size(), b.get_size()); ASSERT_EQ(a.get_num_subsets(), b.get_num_subsets()); @@ -95,16 +95,16 @@ protected: }; -TEST_F(IndexSet, CanBeCopiedBetweenExecutors) +TEST_F(index_set, CanBeCopiedBetweenExecutors) { auto idx_arr = gko::Array{exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{exec, {0, 4, 6}}; auto end_comp = gko::Array{exec, {3, 5, 10}}; auto superset_comp = gko::Array{exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{exec, 10, idx_arr}; - auto cuda_idx_set = gko::IndexSet(cuda, idx_set); - auto host_idx_set = gko::IndexSet(exec, cuda_idx_set); + auto idx_set = gko::index_set{exec, 10, idx_arr}; + auto cuda_idx_set = gko::index_set(cuda, idx_set); + auto host_idx_set = gko::index_set(exec, cuda_idx_set); ASSERT_EQ(cuda_idx_set.get_executor(), cuda); this->assert_equal_index_sets(host_idx_set, idx_set); diff --git a/dpcpp/base/index_set_kernels.dp.cpp b/dpcpp/base/index_set_kernels.dp.cpp index a21f329bad7..3d577268542 100644 --- a/dpcpp/base/index_set_kernels.dp.cpp +++ b/dpcpp/base/index_set_kernels.dp.cpp @@ -54,7 +54,7 @@ namespace dpcpp { * * @ingroup index_set */ -namespace index_set { +namespace idx_set { template @@ -107,7 +107,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); -} // namespace index_set +} // namespace idx_set } // namespace dpcpp } // namespace kernels } // namespace gko diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index d55cb29e395..2fe82a4d050 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1389,8 +1389,8 @@ template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -1428,8 +1428,8 @@ template void compute_submatrix_from_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, matrix::Csr* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/base/index_set_kernels.hip.cpp b/hip/base/index_set_kernels.hip.cpp index 777ece35d63..f08588fe29d 100644 --- a/hip/base/index_set_kernels.hip.cpp +++ b/hip/base/index_set_kernels.hip.cpp @@ -54,7 +54,7 @@ namespace hip { * * @ingroup index_set */ -namespace index_set { +namespace idx_set { template @@ -107,7 +107,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); -} // namespace index_set +} // namespace idx_set } // namespace hip } // namespace kernels } // namespace gko diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index 3bedaaa3e15..7627b9ff527 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -949,8 +949,8 @@ template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -961,8 +961,8 @@ template void compute_submatrix_from_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, matrix::Csr* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/test/base/index_set.hip.cpp b/hip/test/base/index_set.hip.cpp index 04492452e37..5386b06710f 100644 --- a/hip/test/base/index_set.hip.cpp +++ b/hip/test/base/index_set.hip.cpp @@ -50,10 +50,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -class IndexSet : public ::testing::Test { +class index_set : public ::testing::Test { protected: using T = int; - IndexSet() + index_set() : exec(gko::ReferenceExecutor::create()), hip(gko::HipExecutor::create(0, gko::ReferenceExecutor::create())) {} @@ -66,8 +66,8 @@ class IndexSet : public ::testing::Test { } } - static void assert_equal_index_sets(gko::IndexSet& a, - gko::IndexSet& b) + static void assert_equal_index_sets(gko::index_set& a, + gko::index_set& b) { ASSERT_EQ(a.get_size(), b.get_size()); ASSERT_EQ(a.get_num_subsets(), b.get_num_subsets()); @@ -95,16 +95,16 @@ class IndexSet : public ::testing::Test { }; -TEST_F(IndexSet, CanBeCopiedBetweenExecutors) +TEST_F(index_set, CanBeCopiedBetweenExecutors) { auto idx_arr = gko::Array{exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{exec, {0, 4, 6}}; auto end_comp = gko::Array{exec, {3, 5, 10}}; auto superset_comp = gko::Array{exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{exec, 10, idx_arr}; - auto hip_idx_set = gko::IndexSet(hip, idx_set); - auto host_idx_set = gko::IndexSet(exec, hip_idx_set); + auto idx_set = gko::index_set{exec, 10, idx_arr}; + auto hip_idx_set = gko::index_set(hip, idx_set); + auto host_idx_set = gko::index_set(exec, hip_idx_set); ASSERT_EQ(hip_idx_set.get_executor(), hip); this->assert_equal_index_sets(host_idx_set, idx_set); diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index 1a42562f4e6..d8d0fe442cf 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include @@ -42,7 +43,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include @@ -79,10 +79,10 @@ namespace gko { * * @tparam index_type type of the indices being stored in the index set. * - * @ingroup IndexSet + * @ingroup index_set */ template -class IndexSet { +class index_set { public: /** * The type of elements stored in the index set. @@ -90,11 +90,11 @@ class IndexSet { using index_type = IndexType; /** - * Creates an empty IndexSet tied to the specified Executor. + * Creates an empty index_set tied to the specified Executor. * - * @param exec the Executor where the IndexSet data is allocated + * @param exec the Executor where the index_set data is allocated */ - IndexSet(std::shared_ptr exec) + index_set(std::shared_ptr exec) : exec_(std::move(exec)), index_space_size_{0}, num_stored_indices_{0} {} @@ -107,9 +107,9 @@ class IndexSet { * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ - explicit IndexSet(std::shared_ptr exec, - std::initializer_list init_list, - const bool is_sorted = false) + explicit index_set(std::shared_ptr exec, + std::initializer_list init_list, + const bool is_sorted = false) : exec_(std::move(exec)), index_space_size_(init_list.size() > 0 ? *(std::max_element(std::begin(init_list), @@ -133,10 +133,10 @@ class IndexSet { * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ - explicit IndexSet(std::shared_ptr exec, - const index_type size, - const gko::Array& indices, - const bool is_sorted = false) + explicit index_set(std::shared_ptr exec, + const index_type size, + const gko::Array& indices, + const bool is_sorted = false) : exec_(std::move(exec)), index_space_size_(size) { GKO_ASSERT(index_space_size_ >= indices.get_num_elems()); @@ -144,13 +144,13 @@ class IndexSet { } /** - * Creates a copy of another IndexSet on a different executor. + * Creates a copy of another index_set on a different executor. * - * @param exec the executor where the new IndexSet will be created - * @param other the IndexSet to copy from + * @param exec the executor where the new index_set will be created + * @param other the index_set to copy from */ - IndexSet(std::shared_ptr exec, const IndexSet& other) - : IndexSet(exec) + index_set(std::shared_ptr exec, const index_set& other) + : index_set(exec) { this->index_space_size_ = other.index_space_size_; this->num_stored_indices_ = other.num_stored_indices_; @@ -161,7 +161,7 @@ class IndexSet { } /** - * Returns the executor of the IndexSet + * Returns the executor of the index_set * * @return the executor. */ diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 7065deccba0..cd6cc700f88 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -995,7 +995,7 @@ class Csr : public EnableLinOp>, } /** - * Creates a submatrix from this Csr matrix given row and column IndexSet + * Creates a submatrix from this Csr matrix given row and column index_set * objects. * * @param row_index_set the row index set containing the set of rows to be @@ -1007,8 +1007,8 @@ class Csr : public EnableLinOp>, * @note This is not a view but creates a new, separate CSR matrix. */ std::unique_ptr> create_submatrix( - const gko::IndexSet& row_index_set, - const gko::IndexSet& column_index_set) const; + const index_set& row_index_set, + const index_set& column_index_set) const; /** * Creates a submatrix from this Csr matrix given row and column spans @@ -1022,7 +1022,7 @@ class Csr : public EnableLinOp>, * @note This is not a view but creates a new, separate CSR matrix. */ std::unique_ptr> create_submatrix( - const gko::span& row_span, const gko::span& column_span) const; + const span& row_span, const span& column_span) const; protected: /** diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index 223373053fe..228f58db2cd 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -60,7 +60,7 @@ namespace omp { * * @ingroup index_set */ -namespace index_set { +namespace idx_set { template @@ -207,7 +207,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); -} // namespace index_set +} // namespace idx_set } // namespace omp } // namespace kernels } // namespace gko diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 8fae745f757..5f60578149b 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -748,8 +748,8 @@ template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, IndexType* row_nnz) + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, IndexType* row_nnz) { auto num_row_subsets = row_index_set.get_num_subsets(); auto num_col_subsets = col_index_set.get_num_subsets(); @@ -828,8 +828,8 @@ template void compute_submatrix_from_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, matrix::Csr* result) { auto num_rows = result->get_size()[0]; diff --git a/omp/test/base/index_set.cpp b/omp/test/base/index_set.cpp index 1e0c99e9ef6..b3e82d58ffc 100644 --- a/omp/test/base/index_set.cpp +++ b/omp/test/base/index_set.cpp @@ -50,10 +50,10 @@ namespace { template -class IndexSet : public ::testing::Test { +class index_set : public ::testing::Test { protected: using index_type = T; - IndexSet() + index_set() : omp(gko::OmpExecutor::create()), ref(gko::ReferenceExecutor::create()) {} @@ -71,8 +71,8 @@ class IndexSet : public ::testing::Test { return std::move(rand_index_arr); } - static void assert_equal_index_sets(gko::IndexSet& a, - gko::IndexSet& b) + static void assert_equal_index_sets(gko::index_set& a, + gko::index_set& b) { ASSERT_EQ(a.get_size(), b.get_size()); ASSERT_EQ(a.get_num_subsets(), b.get_num_subsets()); @@ -90,10 +90,10 @@ class IndexSet : public ::testing::Test { std::shared_ptr ref; }; -TYPED_TEST_SUITE(IndexSet, gko::test::IndexTypes, TypenameNameGenerator); +TYPED_TEST_SUITE(index_set, gko::test::IndexTypes, TypenameNameGenerator); -TYPED_TEST(IndexSet, PopulateSubsetsIsEquivalentToReferenceForUnsortedInput) +TYPED_TEST(index_set, PopulateSubsetsIsEquivalentToReferenceForUnsortedInput) { auto rand_arr = this->setup_random_indices(512); auto ref_begin_comp = gko::Array{this->ref}; @@ -103,10 +103,10 @@ TYPED_TEST(IndexSet, PopulateSubsetsIsEquivalentToReferenceForUnsortedInput) auto omp_end_comp = gko::Array{this->omp}; auto omp_superset_comp = gko::Array{this->omp}; - gko::kernels::reference::index_set::populate_subsets( + gko::kernels::reference::idx_set::populate_subsets( this->ref, TypeParam(520), &rand_arr, &ref_begin_comp, &ref_end_comp, &ref_superset_comp, false); - gko::kernels::omp::index_set::populate_subsets( + gko::kernels::omp::idx_set::populate_subsets( this->omp, TypeParam(520), &rand_arr, &omp_begin_comp, &omp_end_comp, &omp_superset_comp, false); @@ -116,7 +116,7 @@ TYPED_TEST(IndexSet, PopulateSubsetsIsEquivalentToReferenceForUnsortedInput) } -TYPED_TEST(IndexSet, PopulateSubsetsIsEquivalentToReferenceForSortedInput) +TYPED_TEST(index_set, PopulateSubsetsIsEquivalentToReferenceForSortedInput) { auto rand_arr = this->setup_random_indices(512); std::sort(rand_arr.get_data(), @@ -128,10 +128,10 @@ TYPED_TEST(IndexSet, PopulateSubsetsIsEquivalentToReferenceForSortedInput) auto omp_end_comp = gko::Array{this->omp}; auto omp_superset_comp = gko::Array{this->omp}; - gko::kernels::reference::index_set::populate_subsets( + gko::kernels::reference::idx_set::populate_subsets( this->ref, TypeParam(520), &rand_arr, &ref_begin_comp, &ref_end_comp, &ref_superset_comp, false); - gko::kernels::omp::index_set::populate_subsets( + gko::kernels::omp::idx_set::populate_subsets( this->omp, TypeParam(520), &rand_arr, &omp_begin_comp, &omp_end_comp, &omp_superset_comp, false); @@ -141,30 +141,30 @@ TYPED_TEST(IndexSet, PopulateSubsetsIsEquivalentToReferenceForSortedInput) } -TYPED_TEST(IndexSet, IndicesContainsIsEquivalentToReference) +TYPED_TEST(index_set, IndicesContainsIsEquivalentToReference) { auto rand_arr = this->setup_random_indices(512); - auto ref_idx_set = gko::IndexSet(this->ref, 520, rand_arr); - auto omp_idx_set = gko::IndexSet(this->omp, 520, rand_arr); + auto ref_idx_set = gko::index_set(this->ref, 520, rand_arr); + auto omp_idx_set = gko::index_set(this->omp, 520, rand_arr); auto ref_indices_arr = this->setup_random_indices(73); auto ref_validity_arr = gko::Array(this->omp, 73); - gko::kernels::reference::index_set::compute_validity( + gko::kernels::reference::idx_set::compute_validity( this->ref, &ref_indices_arr, &ref_validity_arr); auto omp_indices_arr = gko::Array(this->omp, ref_indices_arr); auto omp_validity_arr = gko::Array(this->omp, 73); - gko::kernels::omp::index_set::compute_validity(this->omp, &omp_indices_arr, - &omp_validity_arr); + gko::kernels::omp::idx_set::compute_validity(this->omp, &omp_indices_arr, + &omp_validity_arr); GKO_ASSERT_ARRAY_EQ(ref_validity_arr, omp_validity_arr); } -TYPED_TEST(IndexSet, GetGlobalIndicesIsEquivalentToReference) +TYPED_TEST(index_set, GetGlobalIndicesIsEquivalentToReference) { auto rand_arr = this->setup_random_indices(512); auto rand_global_arr = this->setup_random_indices(256); - auto ref_idx_set = gko::IndexSet(this->ref, 520, rand_arr); + auto ref_idx_set = gko::index_set(this->ref, 520, rand_arr); auto ref_begin_comp = gko::Array{ this->ref, ref_idx_set.get_subsets_begin(), ref_idx_set.get_subsets_begin() + ref_idx_set.get_num_subsets()}; @@ -174,7 +174,7 @@ TYPED_TEST(IndexSet, GetGlobalIndicesIsEquivalentToReference) auto ref_superset_comp = gko::Array{ this->ref, ref_idx_set.get_superset_indices(), ref_idx_set.get_superset_indices() + ref_idx_set.get_num_subsets()}; - auto omp_idx_set = gko::IndexSet(this->omp, 520, rand_arr); + auto omp_idx_set = gko::index_set(this->omp, 520, rand_arr); auto omp_begin_comp = gko::Array{ this->omp, omp_idx_set.get_subsets_begin(), omp_idx_set.get_subsets_begin() + omp_idx_set.get_num_subsets()}; @@ -187,7 +187,7 @@ TYPED_TEST(IndexSet, GetGlobalIndicesIsEquivalentToReference) auto ref_local_arr = gko::Array{this->ref, rand_global_arr.get_num_elems()}; - gko::kernels::reference::index_set::global_to_local( + gko::kernels::reference::idx_set::global_to_local( this->ref, TypeParam(520), ref_idx_set.get_num_subsets(), ref_idx_set.get_subsets_begin(), ref_idx_set.get_subsets_end(), ref_idx_set.get_superset_indices(), @@ -195,7 +195,7 @@ TYPED_TEST(IndexSet, GetGlobalIndicesIsEquivalentToReference) rand_global_arr.get_const_data(), ref_local_arr.get_data(), false); auto omp_local_arr = gko::Array{this->omp, rand_global_arr.get_num_elems()}; - gko::kernels::omp::index_set::global_to_local( + gko::kernels::omp::idx_set::global_to_local( this->omp, TypeParam(520), omp_idx_set.get_num_subsets(), omp_idx_set.get_subsets_begin(), omp_idx_set.get_subsets_end(), omp_idx_set.get_superset_indices(), @@ -207,11 +207,11 @@ TYPED_TEST(IndexSet, GetGlobalIndicesIsEquivalentToReference) } -TYPED_TEST(IndexSet, GetLocalIndicesIsEquivalentToReference) +TYPED_TEST(index_set, GetLocalIndicesIsEquivalentToReference) { auto rand_arr = this->setup_random_indices(512); auto rand_local_arr = this->setup_random_indices(256); - auto ref_idx_set = gko::IndexSet(this->ref, 520, rand_arr); + auto ref_idx_set = gko::index_set(this->ref, 520, rand_arr); auto ref_begin_comp = gko::Array{ this->ref, ref_idx_set.get_subsets_begin(), ref_idx_set.get_subsets_begin() + ref_idx_set.get_num_subsets()}; @@ -221,7 +221,7 @@ TYPED_TEST(IndexSet, GetLocalIndicesIsEquivalentToReference) auto ref_superset_comp = gko::Array{ this->ref, ref_idx_set.get_superset_indices(), ref_idx_set.get_superset_indices() + ref_idx_set.get_num_subsets()}; - auto omp_idx_set = gko::IndexSet(this->omp, 520, rand_arr); + auto omp_idx_set = gko::index_set(this->omp, 520, rand_arr); auto omp_begin_comp = gko::Array{ this->omp, omp_idx_set.get_subsets_begin(), omp_idx_set.get_subsets_begin() + omp_idx_set.get_num_subsets()}; @@ -234,14 +234,14 @@ TYPED_TEST(IndexSet, GetLocalIndicesIsEquivalentToReference) auto ref_global_arr = gko::Array{this->ref, rand_local_arr.get_num_elems()}; - gko::kernels::reference::index_set::local_to_global( + gko::kernels::reference::idx_set::local_to_global( this->ref, ref_idx_set.get_num_subsets(), ref_idx_set.get_subsets_begin(), ref_idx_set.get_superset_indices(), static_cast(rand_local_arr.get_num_elems()), rand_local_arr.get_const_data(), ref_global_arr.get_data(), false); auto omp_global_arr = gko::Array{this->omp, rand_local_arr.get_num_elems()}; - gko::kernels::omp::index_set::local_to_global( + gko::kernels::omp::idx_set::local_to_global( this->omp, omp_idx_set.get_num_subsets(), omp_idx_set.get_subsets_begin(), omp_idx_set.get_superset_indices(), static_cast(rand_local_arr.get_num_elems()), diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 7a61c72ac7b..848f5c28a1f 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -755,18 +755,18 @@ TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) } -TEST_F(Csr, CalculateNnzPerRowInIndexSetIsEquivalentToRef) +TEST_F(Csr, CalculateNnzPerRowInindex_setIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; using IndexType = int; using ValueType = double; set_up_mat_data(); - gko::IndexSet rset{ + gko::index_set rset{ this->ref, {42, 7, 8, 9, 10, 22, 25, 26, 34, 35, 36, 51}}; - gko::IndexSet cset{this->ref, - {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; - gko::IndexSet drset(this->omp, rset); - gko::IndexSet dcset(this->omp, cset); + gko::index_set cset{this->ref, + {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; + gko::index_set drset(this->omp, rset); + gko::index_set dcset(this->omp, cset); auto size = this->mtx2->get_size(); auto row_nnz = gko::Array(this->ref, rset.get_num_elems() + 1); row_nnz.fill(gko::zero()); @@ -781,18 +781,18 @@ TEST_F(Csr, CalculateNnzPerRowInIndexSetIsEquivalentToRef) } -TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef) +TEST_F(Csr, ComputeSubmatrixFromindex_setIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; using IndexType = int; using ValueType = double; set_up_mat_data(); - gko::IndexSet rset{ + gko::index_set rset{ this->ref, {42, 7, 8, 9, 10, 22, 25, 26, 34, 35, 36, 51}}; - gko::IndexSet cset{this->ref, - {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; - gko::IndexSet drset(this->omp, rset); - gko::IndexSet dcset(this->omp, cset); + gko::index_set cset{this->ref, + {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; + gko::index_set drset(this->omp, rset); + gko::index_set dcset(this->omp, cset); auto size = this->mtx2->get_size(); auto row_nnz = gko::Array(this->ref, rset.get_num_elems() + 1); row_nnz.fill(gko::zero()); @@ -883,18 +883,18 @@ TEST_F(Csr, AddScaledIdentityToNonSquare) } -TEST_F(Csr, CreateSubMatrixFromIndexSetIsEquivalentToRef) +TEST_F(Csr, CreateSubMatrixFromindex_setIsEquivalentToRef) { using IndexType = int; using ValueType = double; set_up_mat_data(); - gko::IndexSet rset{ + gko::index_set rset{ this->ref, {42, 7, 8, 9, 10, 22, 25, 26, 34, 35, 36, 51}}; - gko::IndexSet cset{this->ref, - {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; - gko::IndexSet drset(this->omp, rset); - gko::IndexSet dcset(this->omp, cset); + gko::index_set cset{this->ref, + {42, 22, 24, 26, 28, 30, 81, 82, 83, 88}}; + gko::index_set drset(this->omp, rset); + gko::index_set dcset(this->omp, cset); auto smat1 = this->mtx2->create_submatrix(rset, cset); auto sdmat1 = this->dmtx2->create_submatrix(drset, dcset); diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index acdffa3fb0e..5fa25add373 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -62,7 +62,7 @@ namespace reference { * * @ingroup index_set */ -namespace index_set { +namespace idx_set { template @@ -240,7 +240,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE( GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); -} // namespace index_set +} // namespace idx_set } // namespace reference } // namespace kernels } // namespace gko diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index f7021c5224a..3601e702c97 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -629,8 +629,8 @@ template void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, IndexType* row_nnz) + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, IndexType* row_nnz) { auto num_row_subsets = row_index_set.get_num_subsets(); auto row_subset_begin = row_index_set.get_subsets_begin(); @@ -710,8 +710,8 @@ template void compute_submatrix_from_index_set( std::shared_ptr exec, const matrix::Csr* source, - const IndexSet& row_index_set, - const IndexSet& col_index_set, + const gko::index_set& row_index_set, + const gko::index_set& col_index_set, matrix::Csr* result) { auto num_rows = result->get_size()[0]; diff --git a/reference/test/base/index_set.cpp b/reference/test/base/index_set.cpp index 993a132215b..73ceff60e03 100644 --- a/reference/test/base/index_set.cpp +++ b/reference/test/base/index_set.cpp @@ -51,10 +51,10 @@ namespace { template -class IndexSet : public ::testing::Test { +class index_set : public ::testing::Test { protected: using value_type = T; - IndexSet() : exec(gko::ReferenceExecutor::create()) {} + index_set() : exec(gko::ReferenceExecutor::create()) {} void TearDown() { @@ -64,8 +64,8 @@ class IndexSet : public ::testing::Test { } } - static void assert_equal_index_sets(gko::IndexSet& a, - gko::IndexSet& b) + static void assert_equal_index_sets(gko::index_set& a, + gko::index_set& b) { ASSERT_EQ(a.get_size(), b.get_size()); ASSERT_EQ(a.get_num_subsets(), b.get_num_subsets()); @@ -91,43 +91,43 @@ class IndexSet : public ::testing::Test { std::shared_ptr exec; }; -TYPED_TEST_SUITE(IndexSet, gko::test::IndexTypes, TypenameNameGenerator); +TYPED_TEST_SUITE(index_set, gko::test::IndexTypes, TypenameNameGenerator); -TYPED_TEST(IndexSet, KnowsItsExecutor) +TYPED_TEST(index_set, KnowsItsExecutor) { - auto idx_set = gko::IndexSet{this->exec}; + auto idx_set = gko::index_set{this->exec}; ASSERT_EQ(this->exec, idx_set.get_executor()); } -TYPED_TEST(IndexSet, CanBeCopyConstructed) +TYPED_TEST(index_set, CanBeCopyConstructed) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; auto end_comp = gko::Array{this->exec, {3, 5, 10}}; auto superset_comp = gko::Array{this->exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; - gko::IndexSet idx_set2(idx_set); + gko::index_set idx_set2(idx_set); ASSERT_EQ(idx_set2.get_executor(), idx_set.get_executor()); this->assert_equal_index_sets(idx_set2, idx_set); } -TYPED_TEST(IndexSet, CanBeMoveConstructed) +TYPED_TEST(index_set, CanBeMoveConstructed) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; auto end_comp = gko::Array{this->exec, {3, 5, 10}}; auto superset_comp = gko::Array{this->exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; - gko::IndexSet idx_set2(std::move(idx_set)); + gko::index_set idx_set2(std::move(idx_set)); ASSERT_EQ(idx_set2.get_executor(), this->exec); ASSERT_EQ(idx_set.get_executor(), nullptr); @@ -135,32 +135,32 @@ TYPED_TEST(IndexSet, CanBeMoveConstructed) } -TYPED_TEST(IndexSet, CanBeCopyAssigned) +TYPED_TEST(index_set, CanBeCopyAssigned) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; auto end_comp = gko::Array{this->exec, {3, 5, 10}}; auto superset_comp = gko::Array{this->exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; - gko::IndexSet idx_set2 = idx_set; + gko::index_set idx_set2 = idx_set; ASSERT_EQ(idx_set2.get_executor(), idx_set.get_executor()); this->assert_equal_index_sets(idx_set2, idx_set); } -TYPED_TEST(IndexSet, CanBeMoveAssigned) +TYPED_TEST(index_set, CanBeMoveAssigned) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; auto end_comp = gko::Array{this->exec, {3, 5, 10}}; auto superset_comp = gko::Array{this->exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; - gko::IndexSet idx_set2 = std::move(idx_set); + gko::index_set idx_set2 = std::move(idx_set); ASSERT_EQ(idx_set2.get_executor(), this->exec); ASSERT_EQ(idx_set.get_executor(), nullptr); @@ -168,27 +168,27 @@ TYPED_TEST(IndexSet, CanBeMoveAssigned) } -TYPED_TEST(IndexSet, KnowsItsSize) +TYPED_TEST(index_set, KnowsItsSize) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; auto end_comp = gko::Array{this->exec, {3, 5, 10}}; auto superset_comp = gko::Array{this->exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_size(), 10); } -TYPED_TEST(IndexSet, CanBeConstructedFromIndices) +TYPED_TEST(index_set, CanBeConstructedFromIndices) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; auto end_comp = gko::Array{this->exec, {3, 5, 10}}; auto superset_comp = gko::Array{this->exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_size(), 10); ASSERT_EQ(idx_set.get_num_subsets(), 3); @@ -203,13 +203,13 @@ TYPED_TEST(IndexSet, CanBeConstructedFromIndices) } -TYPED_TEST(IndexSet, CanBeConvertedToGlobalIndices) +TYPED_TEST(index_set, CanBeConvertedToGlobalIndices) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; auto end_comp = gko::Array{this->exec, {3, 5, 10}}; auto superset_comp = gko::Array{this->exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; auto out_arr = idx_set.to_global_indices(); @@ -217,14 +217,14 @@ TYPED_TEST(IndexSet, CanBeConvertedToGlobalIndices) } -TYPED_TEST(IndexSet, CanBeConstructedFromNonSortedIndices) +TYPED_TEST(index_set, CanBeConstructedFromNonSortedIndices) { auto idx_arr = gko::Array{this->exec, {9, 1, 4, 2, 6, 8, 0, 7}}; auto begin_comp = gko::Array{this->exec, {0, 4, 6}}; auto end_comp = gko::Array{this->exec, {3, 5, 10}}; auto superset_comp = gko::Array{this->exec, {0, 3, 4, 8}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_size(), 10); ASSERT_EQ(idx_set.get_num_subsets(), 3); @@ -239,43 +239,43 @@ TYPED_TEST(IndexSet, CanBeConstructedFromNonSortedIndices) } -TYPED_TEST(IndexSet, CanDetectContiguousIndexSets) +TYPED_TEST(index_set, CanDetectContiguousindex_sets) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 3, 4, 5, 6}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_subsets(), 1); ASSERT_TRUE(idx_set.is_contiguous()); } -TYPED_TEST(IndexSet, CanDetectNonContiguousIndexSets) +TYPED_TEST(index_set, CanDetectNonContiguousindex_sets) { auto idx_arr = gko::Array{this->exec, {0, 1, 3, 4, 5, 6}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_subsets(), 2); ASSERT_FALSE(idx_set.is_contiguous()); } -TYPED_TEST(IndexSet, CanDetectElementInIndexSet) +TYPED_TEST(index_set, CanDetectElementInindex_set) { auto idx_arr = gko::Array{this->exec, {0, 1, 3, 4, 5, 6}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_subsets(), 2); ASSERT_TRUE(idx_set.contains(4)); ASSERT_FALSE(idx_set.contains(2)); } -TYPED_TEST(IndexSet, CanGetGlobalIndex) +TYPED_TEST(index_set, CanGetGlobalIndex) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_elems(), 8); EXPECT_EQ(idx_set.get_global_index(0), 0); @@ -289,12 +289,12 @@ TYPED_TEST(IndexSet, CanGetGlobalIndex) } -TYPED_TEST(IndexSet, CanGetGlobalIndexFromSortedArrays) +TYPED_TEST(index_set, CanGetGlobalIndexFromSortedArrays) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto lidx_arr = gko::Array{this->exec, {0, 1, 4, 6, 7}}; auto gidx_arr = gko::Array{this->exec, {0, 1, 6, 8, 9}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_elems(), 8); auto idx_set_gidx = idx_set.map_local_to_global(lidx_arr, true); @@ -305,12 +305,12 @@ TYPED_TEST(IndexSet, CanGetGlobalIndexFromSortedArrays) } -TYPED_TEST(IndexSet, CanGetGlobalIndexFromUnsortedArrays) +TYPED_TEST(index_set, CanGetGlobalIndexFromUnsortedArrays) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto lidx_arr = gko::Array{this->exec, {4, 7, 0, 6, 1}}; auto gidx_arr = gko::Array{this->exec, {6, 9, 0, 8, 1}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_elems(), 8); auto idx_set_gidx = idx_set.map_local_to_global(lidx_arr); @@ -321,10 +321,10 @@ TYPED_TEST(IndexSet, CanGetGlobalIndexFromUnsortedArrays) } -TYPED_TEST(IndexSet, CanGetLocalIndex) +TYPED_TEST(index_set, CanGetLocalIndex) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_elems(), 8); EXPECT_EQ(idx_set.get_local_index(6), 4); @@ -335,11 +335,11 @@ TYPED_TEST(IndexSet, CanGetLocalIndex) } -TYPED_TEST(IndexSet, CanDetectNonExistentIndices) +TYPED_TEST(index_set, CanDetectNonExistentIndices) { auto idx_arr = gko::Array{ this->exec, {0, 8, 1, 2, 3, 4, 6, 11, 9, 5, 7, 28, 39}}; - auto idx_set = gko::IndexSet{this->exec, 45, idx_arr}; + auto idx_set = gko::index_set{this->exec, 45, idx_arr}; ASSERT_EQ(idx_set.get_num_elems(), 13); EXPECT_EQ(idx_set.get_local_index(11), 10); @@ -347,12 +347,12 @@ TYPED_TEST(IndexSet, CanDetectNonExistentIndices) } -TYPED_TEST(IndexSet, CanGetLocalIndexFromSortedArrays) +TYPED_TEST(index_set, CanGetLocalIndexFromSortedArrays) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto gidx_arr = gko::Array{this->exec, {0, 4, 6, 8, 9}}; auto lidx_arr = gko::Array{this->exec, {0, 3, 4, 6, 7}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_elems(), 8); auto idx_set_lidx = idx_set.map_global_to_local(gidx_arr, true); @@ -363,12 +363,12 @@ TYPED_TEST(IndexSet, CanGetLocalIndexFromSortedArrays) } -TYPED_TEST(IndexSet, CanGetLocalIndexFromUnsortedArrays) +TYPED_TEST(index_set, CanGetLocalIndexFromUnsortedArrays) { auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; auto gidx_arr = gko::Array{this->exec, {6, 0, 4, 8, 9}}; auto lidx_arr = gko::Array{this->exec, {4, 0, 3, 6, 7}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; + auto idx_set = gko::index_set{this->exec, 10, idx_arr}; ASSERT_EQ(idx_set.get_num_elems(), 8); auto idx_set_lidx = idx_set.map_global_to_local(gidx_arr); diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 62422489a7f..83bfb83340f 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1764,7 +1764,7 @@ TYPED_TEST(Csr, CanGetSubmatrix2) } -TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) +TYPED_TEST(Csr, CanGetSubmatrixWithindex_set) { using Vec = typename TestFixture::Vec; using Mtx = typename TestFixture::Mtx; @@ -1786,8 +1786,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Both empty index sets"); - auto row_set = gko::IndexSet(this->exec); - auto col_set = gko::IndexSet(this->exec); + auto row_set = gko::index_set(this->exec); + auto col_set = gko::index_set(this->exec); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = Mtx::create(this->exec); @@ -1796,8 +1796,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("One empty index set"); - auto row_set = gko::IndexSet(this->exec); - auto col_set = gko::IndexSet(this->exec, {0}); + auto row_set = gko::index_set(this->exec); + auto col_set = gko::index_set(this->exec, {0}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = Mtx::create(this->exec); @@ -1807,8 +1807,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Full index set"); auto row_set = - gko::IndexSet(this->exec, {0, 1, 2, 3, 4, 5, 6}); - auto col_set = gko::IndexSet(this->exec, {0, 1, 2, 3, 4}); + gko::index_set(this->exec, {0, 1, 2, 3, 4, 5, 6}); + auto col_set = gko::index_set(this->exec, {0, 1, 2, 3, 4}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = gko::initialize( { @@ -1827,8 +1827,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Small square 2x2"); - auto row_set = gko::IndexSet(this->exec, {0, 1}); - auto col_set = gko::IndexSet(this->exec, {0, 1}); + auto row_set = gko::index_set(this->exec, {0, 1}); + auto col_set = gko::index_set(this->exec, {0, 1}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = gko::initialize({I{1.0, 3.0}, I{1.0, 0.0}}, this->exec); @@ -1838,8 +1838,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Non-square 4x2"); - auto row_set = gko::IndexSet(this->exec, {1, 2, 3, 4}); - auto col_set = gko::IndexSet(this->exec, {1, 3}); + auto row_set = gko::index_set(this->exec, {1, 2, 3, 4}); + auto col_set = gko::index_set(this->exec, {1, 3}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = gko::initialize( {I{0.0, 7.5}, I{3.0, 0.0}, I{-1.0, 0.0}, I{0.0, 3.5}}, @@ -1850,8 +1850,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Square 3x3"); - auto row_set = gko::IndexSet(this->exec, {1, 3, 4}); - auto col_set = gko::IndexSet(this->exec, {1, 3, 0}); + auto row_set = gko::index_set(this->exec, {1, 3, 4}); + auto col_set = gko::index_set(this->exec, {1, 3, 0}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = gko::initialize( {I{1.0, 0.0, 7.5}, I{0.0, -1.0, 0.0}, I{1.0, 0.0, 3.5}}, @@ -1862,10 +1862,10 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Square 4x4"); - auto row_set = gko::IndexSet(this->exec, {1, 4, 5, 6}); + auto row_set = gko::index_set(this->exec, {1, 4, 5, 6}); // This is unsorted to make sure that the output is correct (sorted) // even when the input is sorted. - auto col_set = gko::IndexSet(this->exec, {4, 3, 0, 1}); + auto col_set = gko::index_set(this->exec, {4, 3, 0, 1}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = gko::initialize({I{1.0, 0.0, 7.5, 3.0}, // 1 I{1.0, 0.0, 3.5, 1.0}, // 4 @@ -1878,8 +1878,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Non Square 2x4"); - auto row_set = gko::IndexSet(this->exec, {5, 6}); - auto col_set = gko::IndexSet(this->exec, {4, 3, 0, 1}); + auto row_set = gko::index_set(this->exec, {5, 6}); + auto col_set = gko::index_set(this->exec, {4, 3, 0, 1}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = gko::initialize({I{0.0, 1.0, 0.0, 2.0}, // 5 I{0.0, 3.0, 7.5, 1.0}}, // 6 From 7a8c7e6e467a32959e942528d6519a20e045bce5 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 28 Mar 2022 16:41:15 +0200 Subject: [PATCH 19/20] Add move and copy constr/assign ops. --- include/ginkgo/core/base/index_set.hpp | 141 ++++++++++++++++++++++--- reference/test/base/index_set.cpp | 4 +- 2 files changed, 131 insertions(+), 14 deletions(-) diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index d8d0fe442cf..10c9367ea7f 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -89,13 +89,25 @@ class index_set { */ using index_type = IndexType; + /** + * Creates an empty Array not tied to any executor. + */ + index_set() noexcept + : exec_(nullptr), index_space_size_{0}, num_stored_indices_{0} + {} + /** * Creates an empty index_set tied to the specified Executor. * * @param exec the Executor where the index_set data is allocated */ - index_set(std::shared_ptr exec) - : exec_(std::move(exec)), index_space_size_{0}, num_stored_indices_{0} + explicit index_set(std::shared_ptr exec) noexcept + : exec_(std::move(exec)), + index_space_size_{0}, + num_stored_indices_{0}, + subsets_begin_{Array(exec_)}, + subsets_end_{Array(exec_)}, + superset_cumulative_indices_{Array(exec_)} {} /** @@ -144,7 +156,7 @@ class index_set { } /** - * Creates a copy of another index_set on a different executor. + * Creates a copy of the input index_set on a different executor. * * @param exec the executor where the new index_set will be created * @param other the index_set to copy from @@ -152,12 +164,117 @@ class index_set { index_set(std::shared_ptr exec, const index_set& other) : index_set(exec) { + *this = other; + } + + /** + * Creates a copy of the input index_set. + * + * @param other the index_set to copy from + */ + index_set(const index_set& other) : index_set(other.get_executor(), other) + {} + + /** + * Moves the input index_set to a different executor. + * + * @param exec the executor where the new index_set will be moved to + * @param other the index_set to move from + */ + index_set(std::shared_ptr exec, index_set&& other) + : index_set(exec) + { + *this = std::move(other); + } + + /** + * Moves the input index_set. + * + * @param other the index_set to move from + */ + index_set(index_set&& other) + : index_set(other.get_executor(), std::move(other)) + {} + + /** + * Copies data from another index_set + * + * The executor of this is preserved. In case this does not have an assigned + * executor, it will inherit the executor of other. + * + * @param other the index_set to copy from + * + * @return this + */ + index_set& operator=(const index_set& other) + { + if (&other == this) { + return *this; + } + if (other.get_executor() == nullptr) { + this->clear(); + return *this; + } + if (exec_ == nullptr) { + this->exec_ = other.get_executor(); + } + this->index_space_size_ = other.index_space_size_; + this->num_stored_indices_ = other.num_stored_indices_; + this->subsets_begin_ = other.subsets_begin_; + this->subsets_end_ = other.subsets_end_; + this->superset_cumulative_indices_ = other.superset_cumulative_indices_; + + return *this; + } + + /** + * Moves data from another index_set + * + * The executor of this is preserved. In case this does not have an assigned + * executor, it will inherit the executor of other. + * + * @param other the index_set to move from + * + * @return this + */ + index_set& operator=(index_set&& other) + { + if (&other == this) { + return *this; + } + if (other.get_executor() == nullptr) { + this->clear(); + return *this; + } + if (exec_ == nullptr) { + this->exec_ = other.get_executor(); + } this->index_space_size_ = other.index_space_size_; this->num_stored_indices_ = other.num_stored_indices_; - subsets_begin_ = gko::Array(exec, other.subsets_begin_); - subsets_end_ = gko::Array(exec, other.subsets_end_); - superset_cumulative_indices_ = - gko::Array(exec, other.superset_cumulative_indices_); + other.index_space_size_ = 0; + other.num_stored_indices_ = 0; + this->subsets_begin_ = std::move(other.subsets_begin_); + this->subsets_end_ = std::move(other.subsets_end_); + this->superset_cumulative_indices_ = + std::move(other.superset_cumulative_indices_); + + return *this; + } + + /** + * Deallocates all data used by the index_set. + * + * The index_set is left in a valid, but empty state, so the same index_set + * can be used to allocate new memory. Calls to + * index_set::get_subsets_begin() will return a `nullptr`. + */ + void clear() noexcept + { + this->index_space_size_ = 0; + this->num_stored_indices_ = 0; + this->subsets_begin_.clear(); + this->subsets_end_.clear(); + this->superset_cumulative_indices_.clear(); } /** @@ -345,11 +462,11 @@ class index_set { const bool is_sorted); std::shared_ptr exec_; - index_type index_space_size_; - index_type num_stored_indices_; - gko::Array subsets_begin_; - gko::Array subsets_end_; - gko::Array superset_cumulative_indices_; + index_type index_space_size_{}; + index_type num_stored_indices_{}; + gko::Array subsets_begin_{}; + gko::Array subsets_end_{}; + gko::Array superset_cumulative_indices_{}; }; diff --git a/reference/test/base/index_set.cpp b/reference/test/base/index_set.cpp index 73ceff60e03..b29f2e9719a 100644 --- a/reference/test/base/index_set.cpp +++ b/reference/test/base/index_set.cpp @@ -130,7 +130,7 @@ TYPED_TEST(index_set, CanBeMoveConstructed) gko::index_set idx_set2(std::move(idx_set)); ASSERT_EQ(idx_set2.get_executor(), this->exec); - ASSERT_EQ(idx_set.get_executor(), nullptr); + ASSERT_EQ(idx_set.get_size(), 0); ASSERT_EQ(idx_set2.get_size(), 10); } @@ -163,7 +163,7 @@ TYPED_TEST(index_set, CanBeMoveAssigned) gko::index_set idx_set2 = std::move(idx_set); ASSERT_EQ(idx_set2.get_executor(), this->exec); - ASSERT_EQ(idx_set.get_executor(), nullptr); + ASSERT_EQ(idx_set.get_size(), 0); ASSERT_EQ(idx_set2.get_size(), 10); } From 43545a010b1215f8dc18fadffd7cfec8e9029796 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 29 Mar 2022 08:48:05 +0200 Subject: [PATCH 20/20] Review update. Co-authored-by: Tobias Ribizel --- include/ginkgo/core/base/index_set.hpp | 37 +++++--------------------- 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index 10c9367ea7f..5402ff526e6 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -89,13 +89,6 @@ class index_set { */ using index_type = IndexType; - /** - * Creates an empty Array not tied to any executor. - */ - index_set() noexcept - : exec_(nullptr), index_space_size_{0}, num_stored_indices_{0} - {} - /** * Creates an empty index_set tied to the specified Executor. * @@ -211,13 +204,6 @@ class index_set { if (&other == this) { return *this; } - if (other.get_executor() == nullptr) { - this->clear(); - return *this; - } - if (exec_ == nullptr) { - this->exec_ = other.get_executor(); - } this->index_space_size_ = other.index_space_size_; this->num_stored_indices_ = other.num_stored_indices_; this->subsets_begin_ = other.subsets_begin_; @@ -242,17 +228,8 @@ class index_set { if (&other == this) { return *this; } - if (other.get_executor() == nullptr) { - this->clear(); - return *this; - } - if (exec_ == nullptr) { - this->exec_ = other.get_executor(); - } - this->index_space_size_ = other.index_space_size_; - this->num_stored_indices_ = other.num_stored_indices_; - other.index_space_size_ = 0; - other.num_stored_indices_ = 0; + this->index_space_size_ = std::exchange(other.index_space_size_, 0); + this->num_stored_indices_ = std::exchange(other.num_stored_indices_, 0); this->subsets_begin_ = std::move(other.subsets_begin_); this->subsets_end_ = std::move(other.subsets_end_); this->superset_cumulative_indices_ = @@ -462,11 +439,11 @@ class index_set { const bool is_sorted); std::shared_ptr exec_; - index_type index_space_size_{}; - index_type num_stored_indices_{}; - gko::Array subsets_begin_{}; - gko::Array subsets_end_{}; - gko::Array superset_cumulative_indices_{}; + index_type index_space_size_; + index_type num_stored_indices_; + gko::Array subsets_begin_; + gko::Array subsets_end_; + gko::Array superset_cumulative_indices_; };