From db2ffaf7f662b962ce25c08f17ea53d98a66846d Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 19 Dec 2019 10:56:22 +0100 Subject: [PATCH 1/4] extract prefix sum computations * introduce a new `prefix_sum` kernel that is used instead * remove a few unnecessary `Array::clear` invocations * fix reference ParILU initialization for zero diagonal entries. --- core/matrix/common_kernels.hpp | 92 +++++++++++++++++++ cuda/CMakeLists.txt | 1 + cuda/factorization/par_ilu_kernels.cu | 17 +--- cuda/matrix/common_kernels.cu | 74 +++++++++++++++ cuda/matrix/csr_kernels.cu | 36 +------- cuda/matrix/dense_kernels.cu | 53 ++--------- cuda/matrix/ell_kernels.cu | 16 +--- cuda/matrix/hybrid_kernels.cu | 13 +-- cuda/matrix/sellp_kernels.cu | 13 +-- cuda/matrix/sparsity_csr_kernels.cu | 1 - hip/CMakeLists.txt | 1 + hip/factorization/par_ilu_kernels.hip.cpp | 21 +---- hip/matrix/common_kernels.hip.cpp | 75 +++++++++++++++ hip/matrix/csr_kernels.hip.cpp | 53 +---------- hip/matrix/dense_kernels.hip.cpp | 56 +---------- hip/matrix/ell_kernels.hip.cpp | 15 +-- hip/matrix/hybrid_kernels.hip.cpp | 15 +-- hip/matrix/sellp_kernels.hip.cpp | 18 +--- hip/matrix/sparsity_csr_kernels.hip.cpp | 1 - omp/CMakeLists.txt | 1 + omp/factorization/par_ilu_kernels.cpp | 23 ++--- omp/matrix/common_kernels.cpp | 65 +++++++++++++ omp/matrix/csr_kernels.cpp | 15 ++- reference/CMakeLists.txt | 1 + reference/factorization/par_ilu_kernels.cpp | 9 +- reference/matrix/common_kernels.cpp | 66 +++++++++++++ reference/matrix/csr_kernels.cpp | 15 ++- .../test/factorization/par_ilu_kernels.cpp | 56 +++++++++-- 28 files changed, 492 insertions(+), 330 deletions(-) create mode 100644 core/matrix/common_kernels.hpp create mode 100644 cuda/matrix/common_kernels.cu create mode 100644 hip/matrix/common_kernels.hip.cpp create mode 100644 omp/matrix/common_kernels.cpp create mode 100644 reference/matrix/common_kernels.cpp diff --git a/core/matrix/common_kernels.hpp b/core/matrix/common_kernels.hpp new file mode 100644 index 00000000000..dd44730a4be --- /dev/null +++ b/core/matrix/common_kernels.hpp @@ -0,0 +1,92 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_COMMON_KERNELS_HPP_ +#define GKO_CORE_MATRIX_COMMON_KERNELS_HPP_ + + +#include + + +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType) \ + void prefix_sum(std::shared_ptr exec, \ + IndexType *counts, size_type num_entries) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType) + + +namespace omp { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace omp + + +namespace cuda { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace cuda + + +namespace reference { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace reference + + +namespace hip { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace hip + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + +#endif // GKO_CORE_MATRIX_COMMON_KERNELS_HPP_ \ No newline at end of file diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 49c3b6ddc3c..96d7ed44a13 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -64,6 +64,7 @@ target_sources(ginkgo_cuda base/version.cpp components/zero_array.cu factorization/par_ilu_kernels.cu + matrix/common_kernels.cu matrix/coo_kernels.cu matrix/csr_kernels.cu matrix/dense_kernels.cu diff --git a/cuda/factorization/par_ilu_kernels.cu b/cuda/factorization/par_ilu_kernels.cu index a7dbb4cc6b4..1cd41b6b1c0 100644 --- a/cuda/factorization/par_ilu_kernels.cu +++ b/cuda/factorization/par_ilu_kernels.cu @@ -38,9 +38,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "cuda/base/math.hpp" #include "cuda/base/types.hpp" -#include "cuda/components/prefix_sum.cuh" namespace gko { @@ -67,7 +67,6 @@ void initialize_row_ptrs_l_u( IndexType *l_row_ptrs, IndexType *u_row_ptrs) { const size_type num_rows{system_matrix->get_size()[0]}; - const size_type num_row_ptrs{num_rows + 1}; const dim3 block_size{default_block_size, 1, 1}; const uint32 number_blocks = @@ -80,18 +79,8 @@ void initialize_row_ptrs_l_u( as_cuda_type(system_matrix->get_const_values()), as_cuda_type(l_row_ptrs), as_cuda_type(u_row_ptrs)); - Array block_sum(exec, grid_dim.x); - auto block_sum_ptr = block_sum.get_data(); - - start_prefix_sum<<>>( - num_row_ptrs, as_cuda_type(l_row_ptrs), as_cuda_type(block_sum_ptr)); - finalize_prefix_sum<<>>( - num_row_ptrs, as_cuda_type(l_row_ptrs), as_cuda_type(block_sum_ptr)); - - start_prefix_sum<<>>( - num_row_ptrs, as_cuda_type(u_row_ptrs), as_cuda_type(block_sum_ptr)); - finalize_prefix_sum<<>>( - num_row_ptrs, as_cuda_type(u_row_ptrs), as_cuda_type(block_sum_ptr)); + prefix_sum(exec, l_row_ptrs, num_rows + 1); + prefix_sum(exec, u_row_ptrs, num_rows + 1); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/cuda/matrix/common_kernels.cu b/cuda/matrix/common_kernels.cu new file mode 100644 index 00000000000..6952e1d4f21 --- /dev/null +++ b/cuda/matrix/common_kernels.cu @@ -0,0 +1,74 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include "core/matrix/common_kernels.hpp" + + +#include "cuda/components/prefix_sum.cuh" + + +namespace gko { +namespace kernels { +namespace cuda { + + +constexpr int prefix_sum_block_size = 512; + + +template +void prefix_sum(std::shared_ptr exec, IndexType *counts, + size_type num_entries) +{ + auto num_blocks = ceildiv(num_entries, prefix_sum_block_size); + Array block_sum_array(exec, num_blocks); + auto block_sums = block_sum_array.get_data(); + start_prefix_sum + <<>>(num_entries, counts, + block_sums); + finalize_prefix_sum + <<>>(num_entries, counts, + block_sums); +} + + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); + +// explicitly instantiate for size_type as well, as this is used in the SellP +// format +template void prefix_sum(std::shared_ptr exec, + size_type *counts, size_type num_entries); + + +} // namespace cuda +} // namespace kernels +} // namespace gko \ No newline at end of file diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index a08c96b69c3..6f4280ced10 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "core/matrix/csr_builder.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" @@ -56,7 +57,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cuda/base/types.hpp" #include "cuda/components/atomic.cuh" #include "cuda/components/cooperative_groups.cuh" -#include "cuda/components/prefix_sum.cuh" #include "cuda/components/reduction.cuh" #include "cuda/components/segment_scan.cuh" #include "cuda/components/uninitialized_array.hpp" @@ -677,17 +677,7 @@ void convert_to_sellp(std::shared_ptr exec, as_cuda_type(nnz_per_row.get_const_data()), as_cuda_type(slice_lengths), as_cuda_type(slice_sets)); - auto add_values = - Array(exec, ceildiv(slice_num + 1, default_block_size)); - grid_dim = ceildiv(slice_num + 1, default_block_size); - - start_prefix_sum<<>>( - slice_num + 1, as_cuda_type(slice_sets), - as_cuda_type(add_values.get_data())); - - finalize_prefix_sum<<>>( - slice_num + 1, as_cuda_type(slice_sets), - as_cuda_type(add_values.get_const_data())); + prefix_sum(exec, slice_sets, slice_num + 1); grid_dim = ceildiv(num_rows, default_block_size); kernel::fill_in_sellp<<>>( @@ -695,9 +685,6 @@ void convert_to_sellp(std::shared_ptr exec, as_cuda_type(source_row_ptrs), as_cuda_type(source_col_idxs), as_cuda_type(slice_lengths), as_cuda_type(slice_sets), as_cuda_type(result_col_idxs), as_cuda_type(result_values)); - - nnz_per_row.clear(); - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -779,11 +766,6 @@ void calculate_total_cols(std::shared_ptr exec, exec->get_master()->copy_from(exec.get(), 1, d_result.get_const_data(), result); - - block_results.clear(); - nnz_per_row.clear(); - max_nnz_per_slice.clear(); - d_result.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -917,10 +899,6 @@ void calculate_max_nnz_per_row(std::shared_ptr exec, exec->get_master()->copy_from(exec.get(), 1, d_result.get_const_data(), result); - - nnz_per_row.clear(); - block_results.clear(); - d_result.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -952,15 +930,7 @@ void convert_to_hybrid(std::shared_ptr exec, num_rows, max_nnz_per_row, as_cuda_type(source->get_const_row_ptrs()), as_cuda_type(coo_offset.get_data())); - auto add_values = - Array(exec, ceildiv(num_rows, default_block_size)); - grid_dim = ceildiv(num_rows, default_block_size); - start_prefix_sum<<>>( - num_rows, as_cuda_type(coo_offset.get_data()), - as_cuda_type(add_values.get_data())); - finalize_prefix_sum<<>>( - num_rows, as_cuda_type(coo_offset.get_data()), - as_cuda_type(add_values.get_const_data())); + prefix_sum(exec, coo_offset.get_data(), num_rows); grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); kernel::fill_in_hybrid<<>>( diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 4579691c6cd..4b6a6c3cbbc 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -42,11 +42,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "cuda/base/config.hpp" #include "cuda/base/cublas_bindings.hpp" #include "cuda/base/pointer_mode_guard.hpp" #include "cuda/components/cooperative_groups.cuh" -#include "cuda/components/prefix_sum.cuh" #include "cuda/components/reduction.cuh" #include "cuda/components/uninitialized_array.hpp" @@ -251,25 +251,15 @@ void convert_to_coo(std::shared_ptr exec, auto nnz_prefix_sum = Array(exec, num_rows); calculate_nonzeros_per_row(exec, source, &nnz_prefix_sum); - const size_type grid_dim = ceildiv(num_rows, default_block_size); - auto add_values = Array(exec, grid_dim); + prefix_sum(exec, nnz_prefix_sum.get_data(), num_rows); - start_prefix_sum<<>>( - num_rows, as_cuda_type(nnz_prefix_sum.get_data()), - as_cuda_type(add_values.get_data())); - - finalize_prefix_sum<<>>( - num_rows, as_cuda_type(nnz_prefix_sum.get_data()), - as_cuda_type(add_values.get_data())); + size_type grid_dim = ceildiv(num_rows, default_block_size); kernel::fill_in_coo<<>>( num_rows, num_cols, stride, as_cuda_type(nnz_prefix_sum.get_const_data()), as_cuda_type(source->get_const_values()), as_cuda_type(row_idxs), as_cuda_type(col_idxs), as_cuda_type(values)); - - nnz_prefix_sum.clear(); - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -297,22 +287,13 @@ void convert_to_csr(std::shared_ptr exec, num_rows, num_cols, stride, as_cuda_type(source->get_const_values()), as_cuda_type(row_ptrs)); - size_type grid_dim = ceildiv(num_rows + 1, default_block_size); - auto add_values = Array(exec, grid_dim); + prefix_sum(exec, row_ptrs, num_rows + 1); - start_prefix_sum - <<>>(num_rows + 1, as_cuda_type(row_ptrs), - as_cuda_type(add_values.get_data())); - - finalize_prefix_sum<<>>( - num_rows + 1, as_cuda_type(row_ptrs), - as_cuda_type(add_values.get_const_data())); + size_type grid_dim = ceildiv(num_rows, default_block_size); kernel::fill_in_csr<<>>( num_rows, num_cols, stride, as_cuda_type(source->get_const_values()), as_cuda_type(row_ptrs), as_cuda_type(col_idxs), as_cuda_type(values)); - - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -387,26 +368,13 @@ void convert_to_sellp(std::shared_ptr exec, as_cuda_type(nnz_per_row.get_const_data()), as_cuda_type(slice_lengths), as_cuda_type(slice_sets)); - auto add_values = - Array(exec, ceildiv(slice_num + 1, default_block_size)); - grid_dim = ceildiv(slice_num + 1, default_block_size); - - start_prefix_sum<<>>( - slice_num + 1, as_cuda_type(slice_sets), - as_cuda_type(add_values.get_data())); - - finalize_prefix_sum<<>>( - slice_num + 1, as_cuda_type(slice_sets), - as_cuda_type(add_values.get_const_data())); + prefix_sum(exec, slice_sets, slice_num + 1); grid_dim = ceildiv(num_rows, default_block_size); kernel::fill_in_sellp<<>>( num_rows, num_cols, slice_size, stride, as_cuda_type(source->get_const_values()), as_cuda_type(slice_lengths), as_cuda_type(slice_sets), as_cuda_type(col_idxs), as_cuda_type(vals)); - - add_values.clear(); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -433,7 +401,6 @@ void count_nonzeros(std::shared_ptr exec, calculate_nonzeros_per_row(exec, source, &nnz_per_row); *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL); @@ -469,9 +436,6 @@ void calculate_max_nnz_per_row(std::shared_ptr exec, exec->get_master()->copy_from(exec.get(), 1, d_result.get_const_data(), result); - d_result.clear(); - block_results.clear(); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( @@ -537,11 +501,6 @@ void calculate_total_cols(std::shared_ptr exec, exec->get_master()->copy_from(exec.get(), 1, d_result.get_const_data(), result); - - block_results.clear(); - nnz_per_row.clear(); - max_nnz_per_slice.clear(); - d_result.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( diff --git a/cuda/matrix/ell_kernels.cu b/cuda/matrix/ell_kernels.cu index 5c58cfc41ef..89ce0f9d27e 100644 --- a/cuda/matrix/ell_kernels.cu +++ b/cuda/matrix/ell_kernels.cu @@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" #include "cuda/base/config.hpp" @@ -51,7 +52,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cuda/components/atomic.cuh" #include "cuda/components/cooperative_groups.cuh" #include "cuda/components/format_conversion.cuh" -#include "cuda/components/prefix_sum.cuh" #include "cuda/components/reduction.cuh" #include "cuda/components/zero_array.hpp" @@ -307,24 +307,15 @@ void convert_to_csr(std::shared_ptr exec, num_rows, max_nnz_per_row, stride, as_cuda_type(source->get_const_values()), as_cuda_type(row_ptrs)); - size_type grid_dim = ceildiv(num_rows + 1, default_block_size); - auto add_values = Array(exec, grid_dim); + prefix_sum(exec, row_ptrs, num_rows + 1); - start_prefix_sum - <<>>(num_rows + 1, as_cuda_type(row_ptrs), - as_cuda_type(add_values.get_data())); - - finalize_prefix_sum<<>>( - num_rows + 1, as_cuda_type(row_ptrs), - as_cuda_type(add_values.get_const_data())); + size_type grid_dim = ceildiv(num_rows, default_block_size); kernel::fill_in_csr<<>>( num_rows, max_nnz_per_row, stride, as_cuda_type(source->get_const_values()), as_cuda_type(source->get_const_col_idxs()), as_cuda_type(row_ptrs), as_cuda_type(col_idxs), as_cuda_type(values)); - - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -342,7 +333,6 @@ void count_nonzeros(std::shared_ptr exec, calculate_nonzeros_per_row(exec, source, &nnz_per_row); *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/cuda/matrix/hybrid_kernels.cu b/cuda/matrix/hybrid_kernels.cu index abf9525baec..fc9bdc4c1dd 100644 --- a/cuda/matrix/hybrid_kernels.cu +++ b/cuda/matrix/hybrid_kernels.cu @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "core/matrix/coo_kernels.hpp" #include "core/matrix/ell_kernels.hpp" #include "cuda/base/config.hpp" @@ -44,7 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cuda/components/atomic.cuh" #include "cuda/components/cooperative_groups.cuh" #include "cuda/components/format_conversion.cuh" -#include "cuda/components/prefix_sum.cuh" #include "cuda/components/reduction.cuh" #include "cuda/components/segment_scan.cuh" #include "cuda/components/zero_array.hpp" @@ -128,16 +128,7 @@ void convert_to_csr(std::shared_ptr exec, num_rows, as_cuda_type(row_ptrs), as_cuda_type(coo_row_ptrs.get_const_data())); - grid_num = ceildiv(num_rows + 1, default_block_size); - auto add_values = Array(exec, grid_num); - - start_prefix_sum - <<>>(num_rows + 1, as_cuda_type(row_ptrs), - as_cuda_type(add_values.get_data())); - - finalize_prefix_sum<<>>( - num_rows + 1, as_cuda_type(row_ptrs), - as_cuda_type(add_values.get_const_data())); + prefix_sum(exec, row_ptrs, num_rows + 1); // Fill the value grid_num = ceildiv(num_rows, default_block_size); diff --git a/cuda/matrix/sellp_kernels.cu b/cuda/matrix/sellp_kernels.cu index b34b4a68291..024ed44b38e 100644 --- a/cuda/matrix/sellp_kernels.cu +++ b/cuda/matrix/sellp_kernels.cu @@ -40,10 +40,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "cuda/base/config.hpp" #include "cuda/base/cusparse_bindings.hpp" #include "cuda/base/types.hpp" -#include "cuda/components/prefix_sum.cuh" #include "cuda/components/reduction.cuh" @@ -175,13 +175,7 @@ void convert_to_csr(std::shared_ptr exec, grid_dim = ceildiv(num_rows + 1, default_block_size); auto add_values = Array(exec, grid_dim); - start_prefix_sum<<>>( - num_rows + 1, as_cuda_type(result_row_ptrs), - as_cuda_type(add_values.get_data())); - - finalize_prefix_sum<<>>( - num_rows + 1, as_cuda_type(result_row_ptrs), - as_cuda_type(add_values.get_const_data())); + prefix_sum(exec, result_row_ptrs, num_rows + 1); grid_dim = ceildiv(num_rows, default_block_size); @@ -190,8 +184,6 @@ void convert_to_csr(std::shared_ptr exec, as_cuda_type(source_col_idxs), as_cuda_type(source_values), as_cuda_type(result_row_ptrs), as_cuda_type(result_col_idxs), as_cuda_type(result_values)); - - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -217,7 +209,6 @@ void count_nonzeros(std::shared_ptr exec, as_cuda_type(nnz_per_row.get_data())); *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/cuda/matrix/sparsity_csr_kernels.cu b/cuda/matrix/sparsity_csr_kernels.cu index 0a0dc3f0a63..ac060ebe213 100644 --- a/cuda/matrix/sparsity_csr_kernels.cu +++ b/cuda/matrix/sparsity_csr_kernels.cu @@ -48,7 +48,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cuda/base/types.hpp" #include "cuda/components/atomic.cuh" #include "cuda/components/cooperative_groups.cuh" -#include "cuda/components/prefix_sum.cuh" #include "cuda/components/reduction.cuh" #include "cuda/components/segment_scan.cuh" #include "cuda/components/uninitialized_array.hpp" diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index bd703702ccf..a0b1b938167 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -96,6 +96,7 @@ set(GINKGO_HIP_SOURCES base/version.hip.cpp components/zero_array.hip.cpp factorization/par_ilu_kernels.hip.cpp + matrix/common_kernels.hip.cpp matrix/coo_kernels.hip.cpp matrix/csr_kernels.hip.cpp matrix/dense_kernels.hip.cpp diff --git a/hip/factorization/par_ilu_kernels.hip.cpp b/hip/factorization/par_ilu_kernels.hip.cpp index 8b6637d0622..86170cda799 100644 --- a/hip/factorization/par_ilu_kernels.hip.cpp +++ b/hip/factorization/par_ilu_kernels.hip.cpp @@ -41,9 +41,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "hip/base/math.hip.hpp" #include "hip/base/types.hip.hpp" -#include "hip/components/prefix_sum.hip.hpp" namespace gko { @@ -70,7 +70,6 @@ void initialize_row_ptrs_l_u( IndexType *l_row_ptrs, IndexType *u_row_ptrs) { const size_type num_rows{system_matrix->get_size()[0]}; - const size_type num_row_ptrs{num_rows + 1}; const dim3 block_size{default_block_size, 1, 1}; const uint32 number_blocks = @@ -84,22 +83,8 @@ void initialize_row_ptrs_l_u( as_hip_type(system_matrix->get_const_values()), as_hip_type(l_row_ptrs), as_hip_type(u_row_ptrs)); - Array block_sum(exec, grid_dim.x); - auto block_sum_ptr = block_sum.get_data(); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(block_size), 0, 0, num_row_ptrs, - as_hip_type(l_row_ptrs), as_hip_type(block_sum_ptr)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(block_size), 0, 0, num_row_ptrs, - as_hip_type(l_row_ptrs), as_hip_type(block_sum_ptr)); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(block_size), 0, 0, num_row_ptrs, - as_hip_type(u_row_ptrs), as_hip_type(block_sum_ptr)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(block_size), 0, 0, num_row_ptrs, - as_hip_type(u_row_ptrs), as_hip_type(block_sum_ptr)); + prefix_sum(exec, l_row_ptrs, num_rows + 1); + prefix_sum(exec, u_row_ptrs, num_rows + 1); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/matrix/common_kernels.hip.cpp b/hip/matrix/common_kernels.hip.cpp new file mode 100644 index 00000000000..a3cc319727c --- /dev/null +++ b/hip/matrix/common_kernels.hip.cpp @@ -0,0 +1,75 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include "core/matrix/common_kernels.hpp" + + +#include "hip/components/prefix_sum.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { + + +constexpr int prefix_sum_block_size = 512; + + +template +void prefix_sum(std::shared_ptr exec, IndexType *counts, + size_type num_entries) +{ + auto num_blocks = ceildiv(num_entries, prefix_sum_block_size); + Array block_sum_array(exec, num_blocks); + auto block_sums = block_sum_array.get_data(); + hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), + dim3(num_blocks), dim3(prefix_sum_block_size), 0, 0, + num_entries, counts, block_sums); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(finalize_prefix_sum), + dim3(num_blocks), dim3(prefix_sum_block_size), 0, 0, num_entries, + counts, block_sums); +} + + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); + +// explicitly instantiate for size_type as well, as this is used in the SellP +// format +template void prefix_sum(std::shared_ptr exec, + size_type *counts, size_type num_entries); + + +} // namespace hip +} // namespace kernels +} // namespace gko \ No newline at end of file diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index 4099979d182..c1ff237074e 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "core/matrix/csr_builder.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" @@ -59,7 +60,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "hip/base/types.hip.hpp" #include "hip/components/atomic.hip.hpp" #include "hip/components/cooperative_groups.hip.hpp" -#include "hip/components/prefix_sum.hip.hpp" #include "hip/components/reduction.hip.hpp" #include "hip/components/segment_scan.hip.hpp" #include "hip/components/uninitialized_array.hip.hpp" @@ -611,18 +611,7 @@ void advanced_spgemm(std::shared_ptr exec, d_col_idxs, m, c_row_ptrs); // build row pointers - auto num_row_ptrs = m + 1; - auto num_blocks_prefixsum = ceildiv(num_row_ptrs, default_block_size); - Array block_sums_array(exec, num_blocks_prefixsum); - auto block_sums = block_sums_array.get_data(); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(start_prefix_sum), - dim3(num_blocks_prefixsum), dim3(default_block_size), 0, 0, - num_row_ptrs, c_row_ptrs, block_sums); - hipLaunchKernelGGL( - HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(num_blocks_prefixsum), dim3(default_block_size), 0, 0, - num_row_ptrs, c_row_ptrs, block_sums); + prefix_sum(exec, c_row_ptrs, m + 1); // accumulate non-zeros for alpha * A * B + beta * D ValueType vbeta{}; @@ -749,19 +738,7 @@ void convert_to_sellp(std::shared_ptr exec, stride_factor, as_hip_type(nnz_per_row.get_const_data()), as_hip_type(slice_lengths), as_hip_type(slice_sets)); - auto add_values = - Array(exec, ceildiv(slice_num + 1, default_block_size)); - grid_dim = ceildiv(slice_num + 1, default_block_size); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - slice_num + 1, as_hip_type(slice_sets), - as_hip_type(add_values.get_data())); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - slice_num + 1, as_hip_type(slice_sets), - as_hip_type(add_values.get_const_data())); + prefix_sum(exec, slice_sets, slice_num + 1); grid_dim = ceildiv(num_rows, default_block_size); hipLaunchKernelGGL(kernel::fill_in_sellp, dim3(grid_dim), @@ -770,9 +747,6 @@ void convert_to_sellp(std::shared_ptr exec, as_hip_type(source_col_idxs), as_hip_type(slice_lengths), as_hip_type(slice_sets), as_hip_type(result_col_idxs), as_hip_type(result_values)); - - nnz_per_row.clear(); - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -860,11 +834,6 @@ void calculate_total_cols(std::shared_ptr exec, exec->get_master()->copy_from(exec.get(), 1, d_result.get_const_data(), result); - - block_results.clear(); - nnz_per_row.clear(); - max_nnz_per_slice.clear(); - d_result.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -1001,10 +970,6 @@ void calculate_max_nnz_per_row(std::shared_ptr exec, exec->get_master()->copy_from(exec.get(), 1, d_result.get_const_data(), result); - - nnz_per_row.clear(); - block_results.clear(); - d_result.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -1039,17 +1004,7 @@ void convert_to_hybrid(std::shared_ptr exec, as_hip_type(source->get_const_row_ptrs()), as_hip_type(coo_offset.get_data())); - auto add_values = - Array(exec, ceildiv(num_rows, default_block_size)); - grid_dim = ceildiv(num_rows, default_block_size); - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, num_rows, - as_hip_type(coo_offset.get_data()), - as_hip_type(add_values.get_data())); - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, num_rows, - as_hip_type(coo_offset.get_data()), - as_hip_type(add_values.get_const_data())); + prefix_sum(exec, coo_offset.get_data(), num_rows); grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); hipLaunchKernelGGL(kernel::fill_in_hybrid, dim3(grid_dim), diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 5654c2b5c7e..99600409a86 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -45,11 +45,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/hipblas_bindings.hip.hpp" #include "hip/base/pointer_mode_guard.hip.hpp" #include "hip/components/cooperative_groups.hip.hpp" -#include "hip/components/prefix_sum.hip.hpp" #include "hip/components/reduction.hip.hpp" #include "hip/components/uninitialized_array.hip.hpp" @@ -264,15 +264,7 @@ void convert_to_coo(std::shared_ptr exec, const size_type grid_dim = ceildiv(num_rows, default_block_size); auto add_values = Array(exec, grid_dim); - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, num_rows, - as_hip_type(nnz_prefix_sum.get_data()), - as_hip_type(add_values.get_data())); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, num_rows, - as_hip_type(nnz_prefix_sum.get_data()), - as_hip_type(add_values.get_data())); + prefix_sum(exec, nnz_prefix_sum.get_data(), num_rows); hipLaunchKernelGGL(kernel::fill_in_coo, dim3(grid_dim), dim3(default_block_size), 0, 0, num_rows, num_cols, @@ -280,9 +272,6 @@ void convert_to_coo(std::shared_ptr exec, as_hip_type(source->get_const_values()), as_hip_type(row_idxs), as_hip_type(col_idxs), as_hip_type(values)); - - nnz_prefix_sum.clear(); - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -311,25 +300,14 @@ void convert_to_csr(std::shared_ptr exec, stride, as_hip_type(source->get_const_values()), as_hip_type(row_ptrs)); - size_type grid_dim = ceildiv(num_rows + 1, default_block_size); - auto add_values = Array(exec, grid_dim); + prefix_sum(exec, row_ptrs, num_rows + 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - num_rows + 1, as_hip_type(row_ptrs), - as_hip_type(add_values.get_data())); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - num_rows + 1, as_hip_type(row_ptrs), - as_hip_type(add_values.get_const_data())); + size_type grid_dim = ceildiv(num_rows, default_block_size); hipLaunchKernelGGL( kernel::fill_in_csr, dim3(grid_dim), dim3(default_block_size), 0, 0, num_rows, num_cols, stride, as_hip_type(source->get_const_values()), as_hip_type(row_ptrs), as_hip_type(col_idxs), as_hip_type(values)); - - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -406,19 +384,7 @@ void convert_to_sellp(std::shared_ptr exec, as_hip_type(nnz_per_row.get_const_data()), as_hip_type(slice_lengths), as_hip_type(slice_sets)); - auto add_values = - Array(exec, ceildiv(slice_num + 1, default_block_size)); - grid_dim = ceildiv(slice_num + 1, default_block_size); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - slice_num + 1, as_hip_type(slice_sets), - as_hip_type(add_values.get_data())); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - slice_num + 1, as_hip_type(slice_sets), - as_hip_type(add_values.get_const_data())); + prefix_sum(exec, slice_sets, slice_num + 1); grid_dim = ceildiv(num_rows, default_block_size); hipLaunchKernelGGL( @@ -426,9 +392,6 @@ void convert_to_sellp(std::shared_ptr exec, num_rows, num_cols, slice_size, stride, as_hip_type(source->get_const_values()), as_hip_type(slice_lengths), as_hip_type(slice_sets), as_hip_type(col_idxs), as_hip_type(vals)); - - add_values.clear(); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -455,7 +418,6 @@ void count_nonzeros(std::shared_ptr exec, calculate_nonzeros_per_row(exec, source, &nnz_per_row); *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL); @@ -493,9 +455,6 @@ void calculate_max_nnz_per_row(std::shared_ptr exec, exec->get_master()->copy_from(exec.get(), 1, d_result.get_const_data(), result); - d_result.clear(); - block_results.clear(); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( @@ -564,11 +523,6 @@ void calculate_total_cols(std::shared_ptr exec, exec->get_master()->copy_from(exec.get(), 1, d_result.get_const_data(), result); - - block_results.clear(); - nnz_per_row.clear(); - max_nnz_per_slice.clear(); - d_result.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( diff --git a/hip/matrix/ell_kernels.hip.cpp b/hip/matrix/ell_kernels.hip.cpp index d451f9b99f5..d2a17d6e525 100644 --- a/hip/matrix/ell_kernels.hip.cpp +++ b/hip/matrix/ell_kernels.hip.cpp @@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" #include "hip/base/config.hip.hpp" @@ -54,7 +55,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "hip/components/atomic.hip.hpp" #include "hip/components/cooperative_groups.hip.hpp" #include "hip/components/format_conversion.hip.hpp" -#include "hip/components/prefix_sum.hip.hpp" #include "hip/components/reduction.hip.hpp" #include "hip/components/zero_array.hip.hpp" @@ -316,15 +316,7 @@ void convert_to_csr(std::shared_ptr exec, size_type grid_dim = ceildiv(num_rows + 1, default_block_size); auto add_values = Array(exec, grid_dim); - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - num_rows + 1, as_hip_type(row_ptrs), - as_hip_type(add_values.get_data())); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - num_rows + 1, as_hip_type(row_ptrs), - as_hip_type(add_values.get_const_data())); + prefix_sum(exec, row_ptrs, num_rows + 1); hipLaunchKernelGGL( kernel::fill_in_csr, dim3(grid_dim), dim3(default_block_size), 0, 0, @@ -332,8 +324,6 @@ void convert_to_csr(std::shared_ptr exec, as_hip_type(source->get_const_values()), as_hip_type(source->get_const_col_idxs()), as_hip_type(row_ptrs), as_hip_type(col_idxs), as_hip_type(values)); - - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -351,7 +341,6 @@ void count_nonzeros(std::shared_ptr exec, calculate_nonzeros_per_row(exec, source, &nnz_per_row); *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/matrix/hybrid_kernels.hip.cpp b/hip/matrix/hybrid_kernels.hip.cpp index 04a6be08785..0e45df63125 100644 --- a/hip/matrix/hybrid_kernels.hip.cpp +++ b/hip/matrix/hybrid_kernels.hip.cpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "core/matrix/coo_kernels.hpp" #include "core/matrix/ell_kernels.hpp" #include "hip/base/config.hip.hpp" @@ -47,7 +48,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "hip/components/atomic.hip.hpp" #include "hip/components/cooperative_groups.hip.hpp" #include "hip/components/format_conversion.hip.hpp" -#include "hip/components/prefix_sum.hip.hpp" #include "hip/components/reduction.hip.hpp" #include "hip/components/segment_scan.hip.hpp" #include "hip/components/zero_array.hip.hpp" @@ -134,18 +134,7 @@ void convert_to_csr(std::shared_ptr exec, 0, num_rows, as_hip_type(row_ptrs), as_hip_type(coo_row_ptrs.get_const_data())); - grid_num = ceildiv(num_rows + 1, default_block_size); - auto add_values = Array(exec, grid_num); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_num), dim3(default_block_size), 0, 0, - num_rows + 1, as_hip_type(row_ptrs), - as_hip_type(add_values.get_data())); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_num), dim3(default_block_size), 0, 0, - num_rows + 1, as_hip_type(row_ptrs), - as_hip_type(add_values.get_const_data())); + prefix_sum(exec, row_ptrs, num_rows + 1); // Fill the value grid_num = ceildiv(num_rows, default_block_size); diff --git a/hip/matrix/sellp_kernels.hip.cpp b/hip/matrix/sellp_kernels.hip.cpp index 0df2ef62634..10a513d01a7 100644 --- a/hip/matrix/sellp_kernels.hip.cpp +++ b/hip/matrix/sellp_kernels.hip.cpp @@ -43,10 +43,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/hipsparse_bindings.hip.hpp" #include "hip/base/types.hip.hpp" -#include "hip/components/prefix_sum.hip.hpp" #include "hip/components/reduction.hip.hpp" @@ -179,18 +179,7 @@ void convert_to_csr(std::shared_ptr exec, 0, num_rows, slice_size, as_hip_type(source_slice_sets), as_hip_type(source_values), as_hip_type(result_row_ptrs)); - grid_dim = ceildiv(num_rows + 1, default_block_size); - auto add_values = Array(exec, grid_dim); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(start_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - num_rows + 1, as_hip_type(result_row_ptrs), - as_hip_type(add_values.get_data())); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(finalize_prefix_sum), - dim3(grid_dim), dim3(default_block_size), 0, 0, - num_rows + 1, as_hip_type(result_row_ptrs), - as_hip_type(add_values.get_const_data())); + prefix_sum(exec, result_row_ptrs, num_rows + 1); grid_dim = ceildiv(num_rows, default_block_size); @@ -200,8 +189,6 @@ void convert_to_csr(std::shared_ptr exec, as_hip_type(source_col_idxs), as_hip_type(source_values), as_hip_type(result_row_ptrs), as_hip_type(result_col_idxs), as_hip_type(result_values)); - - add_values.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -228,7 +215,6 @@ void count_nonzeros(std::shared_ptr exec, as_hip_type(nnz_per_row.get_data())); *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); - nnz_per_row.clear(); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/matrix/sparsity_csr_kernels.hip.cpp b/hip/matrix/sparsity_csr_kernels.hip.cpp index 65915b518e6..54ab4a7c69f 100644 --- a/hip/matrix/sparsity_csr_kernels.hip.cpp +++ b/hip/matrix/sparsity_csr_kernels.hip.cpp @@ -48,7 +48,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "hip/base/types.hip.hpp" #include "hip/components/atomic.hip.hpp" #include "hip/components/cooperative_groups.hip.hpp" -#include "hip/components/prefix_sum.hip.hpp" #include "hip/components/reduction.hip.hpp" #include "hip/components/segment_scan.hip.hpp" #include "hip/components/uninitialized_array.hip.hpp" diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index dbca65286c0..d1c585bd6c7 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -5,6 +5,7 @@ target_sources(ginkgo_omp PRIVATE base/version.cpp factorization/par_ilu_kernels.cpp + matrix/common_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/omp/factorization/par_ilu_kernels.cpp b/omp/factorization/par_ilu_kernels.cpp index 8512d8e5670..1a35445d0e6 100644 --- a/omp/factorization/par_ilu_kernels.cpp +++ b/omp/factorization/par_ilu_kernels.cpp @@ -38,6 +38,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" + + namespace gko { namespace kernels { namespace omp { @@ -55,14 +58,13 @@ void initialize_row_ptrs_l_u( const matrix::Csr *system_matrix, IndexType *l_row_ptrs, IndexType *u_row_ptrs) { + auto num_rows = system_matrix->get_size()[0]; auto row_ptrs = system_matrix->get_const_row_ptrs(); auto col_idxs = system_matrix->get_const_col_idxs(); - l_row_ptrs[0] = 0; - u_row_ptrs[0] = 0; // Calculate the NNZ per row first #pragma omp parallel for - for (size_type row = 0; row < system_matrix->get_size()[0]; ++row) { + for (size_type row = 0; row < num_rows; ++row) { size_type l_nnz{}; size_type u_nnz{}; bool has_diagonal{}; @@ -76,20 +78,13 @@ void initialize_row_ptrs_l_u( } has_diagonal |= col == row; } - l_row_ptrs[row + 1] = l_nnz + !has_diagonal; - u_row_ptrs[row + 1] = u_nnz + !has_diagonal; + l_row_ptrs[row] = l_nnz + !has_diagonal; + u_row_ptrs[row] = u_nnz + !has_diagonal; } // Now, compute the prefix-sum, to get proper row_ptrs for L and U - IndexType l_previous_nnz{}; - IndexType u_previous_nnz{}; - for (size_type row = 1; row < system_matrix->get_size()[0] + 1; ++row) { - l_previous_nnz += l_row_ptrs[row]; - u_previous_nnz += u_row_ptrs[row]; - - l_row_ptrs[row] = l_previous_nnz; - u_row_ptrs[row] = u_previous_nnz; - } + prefix_sum(exec, l_row_ptrs, num_rows + 1); + prefix_sum(exec, u_row_ptrs, num_rows + 1); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/omp/matrix/common_kernels.cpp b/omp/matrix/common_kernels.cpp new file mode 100644 index 00000000000..f5dabe7db3e --- /dev/null +++ b/omp/matrix/common_kernels.cpp @@ -0,0 +1,65 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include "core/matrix/common_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace omp { + + +template +void prefix_sum(std::shared_ptr exec, IndexType *counts, + size_type num_entries) +{ + IndexType partial_sum{}; + for (IndexType i = 0; i < num_entries; ++i) { + auto nnz = counts[i]; + counts[i] = partial_sum; + partial_sum += nnz; + } +} + + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); + +// explicitly instantiate for size_type as well, as this is used in the SellP +// format +template void prefix_sum(std::shared_ptr exec, + size_type *counts, size_type num_entries); + + +} // namespace omp +} // namespace kernels +} // namespace gko \ No newline at end of file diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index eea207e914a..dad2c64ce03 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/iterator_factory.hpp" +#include "core/matrix/common_kernels.hpp" #include "core/matrix/csr_builder.hpp" #include "omp/components/format_conversion.hpp" @@ -221,12 +222,11 @@ void spgemm(std::shared_ptr exec, for (size_type a_row = 0; a_row < num_rows; ++a_row) { local_col_idxs.clear(); spgemm_insert_row2(local_col_idxs, a, b, a_row); - c_row_ptrs[a_row + 1] = local_col_idxs.size(); + c_row_ptrs[a_row] = local_col_idxs.size(); } - // build row pointers: exclusive scan (thus the + 1) - c_row_ptrs[0] = 0; - std::partial_sum(c_row_ptrs + 1, c_row_ptrs + num_rows + 1, c_row_ptrs + 1); + // build row pointers + prefix_sum(exec, c_row_ptrs, num_rows + 1); // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; @@ -282,12 +282,11 @@ void advanced_spgemm(std::shared_ptr exec, if (valpha != zero(valpha)) { spgemm_insert_row2(local_col_idxs, a, b, a_row); } - c_row_ptrs[a_row + 1] = local_col_idxs.size(); + c_row_ptrs[a_row] = local_col_idxs.size(); } - // build row pointers: exclusive scan (thus the + 1) - c_row_ptrs[0] = 0; - std::partial_sum(c_row_ptrs + 1, c_row_ptrs + num_rows + 1, c_row_ptrs + 1); + // build row pointers + prefix_sum(exec, c_row_ptrs, num_rows + 1); // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index 7516fc8641f..2b94ef0ebad 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -3,6 +3,7 @@ target_sources(ginkgo_reference PRIVATE base/version.cpp factorization/par_ilu_kernels.cpp + matrix/common_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/reference/factorization/par_ilu_kernels.cpp b/reference/factorization/par_ilu_kernels.cpp index c84ca97183f..9f68a550961 100644 --- a/reference/factorization/par_ilu_kernels.cpp +++ b/reference/factorization/par_ilu_kernels.cpp @@ -38,6 +38,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/common_kernels.hpp" + + namespace gko { namespace kernels { namespace reference { @@ -74,8 +77,10 @@ void initialize_row_ptrs_l_u( } has_diagonal |= col == row; } - l_row_ptrs[row + 1] = l_nnz + !has_diagonal; - u_row_ptrs[row + 1] = u_nnz + !has_diagonal; + l_nnz += !has_diagonal; + u_nnz += !has_diagonal; + l_row_ptrs[row + 1] = l_nnz; + u_row_ptrs[row + 1] = u_nnz; } } diff --git a/reference/matrix/common_kernels.cpp b/reference/matrix/common_kernels.cpp new file mode 100644 index 00000000000..9b721cdb3f2 --- /dev/null +++ b/reference/matrix/common_kernels.cpp @@ -0,0 +1,66 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include "core/matrix/common_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace reference { + + +template +void prefix_sum(std::shared_ptr exec, + IndexType *counts, size_type num_entries) +{ + IndexType partial_sum{}; + for (IndexType i = 0; i < num_entries; ++i) { + auto nnz = counts[i]; + counts[i] = partial_sum; + partial_sum += nnz; + } +} + + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); + +// explicitly instantiate for size_type as well, as this is used in the SellP +// format +template void prefix_sum( + std::shared_ptr exec, size_type *counts, + size_type num_entries); + + +} // namespace reference +} // namespace kernels +} // namespace gko \ No newline at end of file diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 0e0a587b9ae..2d22ddb58e9 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -54,6 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/iterator_factory.hpp" +#include "core/matrix/common_kernels.hpp" #include "core/matrix/csr_builder.hpp" #include "reference/components/format_conversion.hpp" @@ -219,12 +220,11 @@ void spgemm(std::shared_ptr exec, for (size_type a_row = 0; a_row < num_rows; ++a_row) { local_col_idxs.clear(); spgemm_insert_row2(local_col_idxs, a, b, a_row); - c_row_ptrs[a_row + 1] = local_col_idxs.size(); + c_row_ptrs[a_row] = local_col_idxs.size(); } - // build row pointers: exclusive scan (thus the + 1) - c_row_ptrs[0] = 0; - std::partial_sum(c_row_ptrs + 1, c_row_ptrs + num_rows + 1, c_row_ptrs + 1); + // build row pointers + prefix_sum(exec, c_row_ptrs, num_rows + 1); // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; @@ -278,12 +278,11 @@ void advanced_spgemm(std::shared_ptr exec, if (valpha != zero(valpha)) { spgemm_insert_row2(local_col_idxs, a, b, a_row); } - c_row_ptrs[a_row + 1] = local_col_idxs.size(); + c_row_ptrs[a_row] = local_col_idxs.size(); } - // build row pointers: exclusive scan (thus the + 1) - c_row_ptrs[0] = 0; - std::partial_sum(c_row_ptrs + 1, c_row_ptrs + num_rows + 1, c_row_ptrs + 1); + // build row pointers + prefix_sum(exec, c_row_ptrs, num_rows + 1); // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; diff --git a/reference/test/factorization/par_ilu_kernels.cpp b/reference/test/factorization/par_ilu_kernels.cpp index 4e697c61e6a..ea7126641f5 100644 --- a/reference/test/factorization/par_ilu_kernels.cpp +++ b/reference/test/factorization/par_ilu_kernels.cpp @@ -83,6 +83,10 @@ class ParIlu : public ::testing::Test { : ref(gko::ReferenceExecutor::create()), exec(std::static_pointer_cast(ref)), // clang-format off + empty_csr(gko::initialize( + {{0., 0., 0.}, + {0., 0., 0.}, + {0., 0., 0.}}, exec)), identity(gko::initialize( {{1., 0., 0.}, {0., 1., 0.}, @@ -139,16 +143,16 @@ class ParIlu : public ::testing::Test { big_nodiag_l_expected(gko::initialize({{1., 0., 0., 0., 0., 0.}, {1., 1., 0., 0., 0., 0.}, {0., 2., 1., 0., 0., 0.}, - {2., 0., 0., 1., 0., 0.}, - {1., 1., 0., 1., 1., 0.}, - {0., 2., 1., 0.25, -0.5, 1.}}, + {1., 0., 2., 1., 0., 0.}, + {1., 1., 0., -2., 1., 0.}, + {0., 2., 1., -0.5, 2.5, 1.}}, exec)), big_nodiag_u_expected(gko::initialize({{1., 1., 1., 0., 1., 3.}, {0., 1., 1., 0., 1., 0.}, - {0., 0., 1., 3., 1., -2.}, - {0., 0., 0., 4., 2., 0.}, - {0., 0., 0., 0., -3., 3.}, - {0., 0., 0., 0., 0., 11.5}}, + {0., 0., 1., 3., 1., 5.}, + {0., 0., 0., -2., 1., -9.}, + {0., 0., 0., 0., 1., -15.}, + {0., 0., 0., 0., 0., 36.}}, exec)), // clang-format on ilu_factory_skip( @@ -163,6 +167,7 @@ class ParIlu : public ::testing::Test { std::shared_ptr ref; std::shared_ptr exec; + std::shared_ptr empty_csr; std::shared_ptr identity; std::shared_ptr lower_triangular; std::shared_ptr upper_triangular; @@ -203,6 +208,28 @@ TEST_F(ParIlu, KernelInitializeRowPtrsLU) } +TEST_F(ParIlu, KernelInitializeRowPtrsLUZeroMatrix) +{ + auto empty_csr_l_expected = Csr::create(ref); + identity->convert_to(gko::lend(empty_csr_l_expected)); + auto empty_csr_u_expected = Csr::create(ref); + identity->convert_to(gko::lend(empty_csr_u_expected)); + auto num_row_ptrs = empty_csr->get_size()[0] + 1; + std::vector l_row_ptrs_vector(num_row_ptrs); + std::vector u_row_ptrs_vector(num_row_ptrs); + auto l_row_ptrs = l_row_ptrs_vector.data(); + auto u_row_ptrs = u_row_ptrs_vector.data(); + + gko::kernels::reference::par_ilu_factorization::initialize_row_ptrs_l_u( + ref, gko::lend(empty_csr), l_row_ptrs, u_row_ptrs); + + ASSERT_TRUE(std::equal(l_row_ptrs, l_row_ptrs + num_row_ptrs, + empty_csr_l_expected->get_const_row_ptrs())); + ASSERT_TRUE(std::equal(u_row_ptrs, u_row_ptrs + num_row_ptrs, + empty_csr_u_expected->get_const_row_ptrs())); +} + + TEST_F(ParIlu, KernelInitializeLU) { // clang-format off @@ -233,6 +260,21 @@ TEST_F(ParIlu, KernelInitializeLU) } +TEST_F(ParIlu, KernelInitializeLUZeroMatrix) +{ + auto actual_l = Csr::create(ref); + auto actual_u = Csr::create(ref); + actual_l->copy_from(identity.get()); + actual_u->copy_from(identity.get()); + + gko::kernels::reference::par_ilu_factorization::initialize_l_u( + ref, gko::lend(empty_csr), gko::lend(actual_l), gko::lend(actual_u)); + + GKO_ASSERT_MTX_NEAR(actual_l, identity, 1e-14); + GKO_ASSERT_MTX_NEAR(actual_u, identity, 1e-14); +} + + TEST_F(ParIlu, KernelComputeLU) { // clang-format off From ffbbf8aead226a60d736054691c39361309f5e90 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 8 Jan 2020 11:15:20 +0100 Subject: [PATCH 2/4] move prefix_sum kernel to components --- .../common_kernels.hpp => components/prefix_sum.hpp} | 6 +++--- cuda/CMakeLists.txt | 2 +- cuda/{matrix/common_kernels.cu => components/prefix_sum.cu} | 2 +- cuda/factorization/par_ilu_kernels.cu | 2 +- cuda/matrix/csr_kernels.cu | 2 +- cuda/matrix/dense_kernels.cu | 2 +- cuda/matrix/ell_kernels.cu | 2 +- cuda/matrix/hybrid_kernels.cu | 2 +- cuda/matrix/sellp_kernels.cu | 2 +- hip/CMakeLists.txt | 2 +- .../prefix_sum.hip.cpp} | 2 +- hip/factorization/par_ilu_kernels.hip.cpp | 2 +- hip/matrix/csr_kernels.hip.cpp | 2 +- hip/matrix/dense_kernels.hip.cpp | 2 +- hip/matrix/ell_kernels.hip.cpp | 2 +- hip/matrix/hybrid_kernels.hip.cpp | 2 +- hip/matrix/sellp_kernels.hip.cpp | 2 +- omp/CMakeLists.txt | 2 +- .../common_kernels.cpp => components/prefix_sum.cpp} | 2 +- omp/factorization/par_ilu_kernels.cpp | 2 +- omp/matrix/csr_kernels.cpp | 2 +- reference/CMakeLists.txt | 2 +- .../common_kernels.cpp => components/prefix_sum.cpp} | 2 +- reference/factorization/par_ilu_kernels.cpp | 2 +- reference/matrix/csr_kernels.cpp | 2 +- 25 files changed, 27 insertions(+), 27 deletions(-) rename core/{matrix/common_kernels.hpp => components/prefix_sum.hpp} (94%) rename cuda/{matrix/common_kernels.cu => components/prefix_sum.cu} (98%) rename hip/{matrix/common_kernels.hip.cpp => components/prefix_sum.hip.cpp} (98%) rename omp/{matrix/common_kernels.cpp => components/prefix_sum.cpp} (98%) rename reference/{matrix/common_kernels.cpp => components/prefix_sum.cpp} (98%) diff --git a/core/matrix/common_kernels.hpp b/core/components/prefix_sum.hpp similarity index 94% rename from core/matrix/common_kernels.hpp rename to core/components/prefix_sum.hpp index dd44730a4be..a2e3bdc808b 100644 --- a/core/matrix/common_kernels.hpp +++ b/core/components/prefix_sum.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_MATRIX_COMMON_KERNELS_HPP_ -#define GKO_CORE_MATRIX_COMMON_KERNELS_HPP_ +#ifndef GKO_CORE_COMPONENTS_PREFIX_SUM_HPP_ +#define GKO_CORE_COMPONENTS_PREFIX_SUM_HPP_ #include @@ -89,4 +89,4 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace kernels } // namespace gko -#endif // GKO_CORE_MATRIX_COMMON_KERNELS_HPP_ \ No newline at end of file +#endif // GKO_CORE_COMPONENTS_PREFIX_SUM_HPP_ \ No newline at end of file diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 96d7ed44a13..9a9bf302586 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -62,9 +62,9 @@ target_sources(ginkgo_cuda base/exception.cpp base/executor.cpp base/version.cpp + components/prefix_sum.cu components/zero_array.cu factorization/par_ilu_kernels.cu - matrix/common_kernels.cu matrix/coo_kernels.cu matrix/csr_kernels.cu matrix/dense_kernels.cu diff --git a/cuda/matrix/common_kernels.cu b/cuda/components/prefix_sum.cu similarity index 98% rename from cuda/matrix/common_kernels.cu rename to cuda/components/prefix_sum.cu index 6952e1d4f21..83a01233d6c 100644 --- a/cuda/matrix/common_kernels.cu +++ b/cuda/components/prefix_sum.cu @@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "cuda/components/prefix_sum.cuh" diff --git a/cuda/factorization/par_ilu_kernels.cu b/cuda/factorization/par_ilu_kernels.cu index 1cd41b6b1c0..14894a9ef1d 100644 --- a/cuda/factorization/par_ilu_kernels.cu +++ b/cuda/factorization/par_ilu_kernels.cu @@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "cuda/base/math.hpp" #include "cuda/base/types.hpp" diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 6f4280ced10..fdf0bf25e1c 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -46,7 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_builder.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 4b6a6c3cbbc..b7e656e858d 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "cuda/base/config.hpp" #include "cuda/base/cublas_bindings.hpp" #include "cuda/base/pointer_mode_guard.hpp" diff --git a/cuda/matrix/ell_kernels.cu b/cuda/matrix/ell_kernels.cu index 89ce0f9d27e..4dba873649c 100644 --- a/cuda/matrix/ell_kernels.cu +++ b/cuda/matrix/ell_kernels.cu @@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" #include "cuda/base/config.hpp" diff --git a/cuda/matrix/hybrid_kernels.cu b/cuda/matrix/hybrid_kernels.cu index fc9bdc4c1dd..5071f51ee2a 100644 --- a/cuda/matrix/hybrid_kernels.cu +++ b/cuda/matrix/hybrid_kernels.cu @@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/coo_kernels.hpp" #include "core/matrix/ell_kernels.hpp" #include "cuda/base/config.hpp" diff --git a/cuda/matrix/sellp_kernels.cu b/cuda/matrix/sellp_kernels.cu index 024ed44b38e..2cc8ad2cc22 100644 --- a/cuda/matrix/sellp_kernels.cu +++ b/cuda/matrix/sellp_kernels.cu @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "cuda/base/config.hpp" #include "cuda/base/cusparse_bindings.hpp" #include "cuda/base/types.hpp" diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index a0b1b938167..5bedaf1eea8 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -94,9 +94,9 @@ set(GINKGO_HIP_SOURCES base/exception.hip.cpp base/executor.hip.cpp base/version.hip.cpp + components/prefix_sum.hip.cpp components/zero_array.hip.cpp factorization/par_ilu_kernels.hip.cpp - matrix/common_kernels.hip.cpp matrix/coo_kernels.hip.cpp matrix/csr_kernels.hip.cpp matrix/dense_kernels.hip.cpp diff --git a/hip/matrix/common_kernels.hip.cpp b/hip/components/prefix_sum.hip.cpp similarity index 98% rename from hip/matrix/common_kernels.hip.cpp rename to hip/components/prefix_sum.hip.cpp index a3cc319727c..f0283b6c80e 100644 --- a/hip/matrix/common_kernels.hip.cpp +++ b/hip/components/prefix_sum.hip.cpp @@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "hip/components/prefix_sum.hip.hpp" diff --git a/hip/factorization/par_ilu_kernels.hip.cpp b/hip/factorization/par_ilu_kernels.hip.cpp index 86170cda799..58d957c45b6 100644 --- a/hip/factorization/par_ilu_kernels.hip.cpp +++ b/hip/factorization/par_ilu_kernels.hip.cpp @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "hip/base/math.hip.hpp" #include "hip/base/types.hip.hpp" diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index c1ff237074e..2aa7eb3c055 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -49,7 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_builder.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 99600409a86..46540b33a52 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/hipblas_bindings.hip.hpp" #include "hip/base/pointer_mode_guard.hip.hpp" diff --git a/hip/matrix/ell_kernels.hip.cpp b/hip/matrix/ell_kernels.hip.cpp index d2a17d6e525..835a4a6fc34 100644 --- a/hip/matrix/ell_kernels.hip.cpp +++ b/hip/matrix/ell_kernels.hip.cpp @@ -46,7 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" #include "hip/base/config.hip.hpp" diff --git a/hip/matrix/hybrid_kernels.hip.cpp b/hip/matrix/hybrid_kernels.hip.cpp index 0e45df63125..397e62fe2b7 100644 --- a/hip/matrix/hybrid_kernels.hip.cpp +++ b/hip/matrix/hybrid_kernels.hip.cpp @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/coo_kernels.hpp" #include "core/matrix/ell_kernels.hpp" #include "hip/base/config.hip.hpp" diff --git a/hip/matrix/sellp_kernels.hip.cpp b/hip/matrix/sellp_kernels.hip.cpp index 10a513d01a7..49f6ace3c15 100644 --- a/hip/matrix/sellp_kernels.hip.cpp +++ b/hip/matrix/sellp_kernels.hip.cpp @@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/hipsparse_bindings.hip.hpp" #include "hip/base/types.hip.hpp" diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index d1c585bd6c7..2206f74659c 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -4,8 +4,8 @@ add_library(ginkgo_omp $ "") target_sources(ginkgo_omp PRIVATE base/version.cpp + components/prefix_sum.cpp factorization/par_ilu_kernels.cpp - matrix/common_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/omp/matrix/common_kernels.cpp b/omp/components/prefix_sum.cpp similarity index 98% rename from omp/matrix/common_kernels.cpp rename to omp/components/prefix_sum.cpp index f5dabe7db3e..7f487a552fb 100644 --- a/omp/matrix/common_kernels.cpp +++ b/omp/components/prefix_sum.cpp @@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" namespace gko { diff --git a/omp/factorization/par_ilu_kernels.cpp b/omp/factorization/par_ilu_kernels.cpp index 1a35445d0e6..0a6f69d19f6 100644 --- a/omp/factorization/par_ilu_kernels.cpp +++ b/omp/factorization/par_ilu_kernels.cpp @@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" namespace gko { diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index dad2c64ce03..3552046a491 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -53,7 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/iterator_factory.hpp" -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_builder.hpp" #include "omp/components/format_conversion.hpp" diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index 2b94ef0ebad..bb7c9ac906a 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -2,8 +2,8 @@ add_library(ginkgo_reference $ "") target_sources(ginkgo_reference PRIVATE base/version.cpp + components/prefix_sum.cpp factorization/par_ilu_kernels.cpp - matrix/common_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/reference/matrix/common_kernels.cpp b/reference/components/prefix_sum.cpp similarity index 98% rename from reference/matrix/common_kernels.cpp rename to reference/components/prefix_sum.cpp index 9b721cdb3f2..b1dadd229d0 100644 --- a/reference/matrix/common_kernels.cpp +++ b/reference/components/prefix_sum.cpp @@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" namespace gko { diff --git a/reference/factorization/par_ilu_kernels.cpp b/reference/factorization/par_ilu_kernels.cpp index 9f68a550961..01e9a1d5ae7 100644 --- a/reference/factorization/par_ilu_kernels.cpp +++ b/reference/factorization/par_ilu_kernels.cpp @@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" namespace gko { diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 2d22ddb58e9..de0abeef99f 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -54,7 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/iterator_factory.hpp" -#include "core/matrix/common_kernels.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_builder.hpp" #include "reference/components/format_conversion.hpp" From bfa9a807d8f48c5af100396e6f4b0a4186ee3b4f Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 8 Jan 2020 12:00:41 +0100 Subject: [PATCH 3/4] add prefix_sum tests --- cuda/test/components/CMakeLists.txt | 1 + cuda/test/components/prefix_sum.cu | 94 ++++++++++++++++++++++++ hip/test/components/CMakeLists.txt | 1 + hip/test/components/prefix_sum.hip.cpp | 94 ++++++++++++++++++++++++ omp/test/CMakeLists.txt | 1 + omp/test/components/CMakeLists.txt | 1 + omp/test/components/prefix_sum.cpp | 93 +++++++++++++++++++++++ reference/test/CMakeLists.txt | 1 + reference/test/components/CMakeLists.txt | 1 + reference/test/components/prefix_sum.cpp | 70 ++++++++++++++++++ 10 files changed, 357 insertions(+) create mode 100644 cuda/test/components/prefix_sum.cu create mode 100644 hip/test/components/prefix_sum.hip.cpp create mode 100644 omp/test/components/CMakeLists.txt create mode 100644 omp/test/components/prefix_sum.cpp create mode 100644 reference/test/components/CMakeLists.txt create mode 100644 reference/test/components/prefix_sum.cpp diff --git a/cuda/test/components/CMakeLists.txt b/cuda/test/components/CMakeLists.txt index 5d0301b6552..4b501e93b4c 100644 --- a/cuda/test/components/CMakeLists.txt +++ b/cuda/test/components/CMakeLists.txt @@ -1 +1,2 @@ +ginkgo_create_cuda_test(prefix_sum) ginkgo_create_cuda_test(sorting) \ No newline at end of file diff --git a/cuda/test/components/prefix_sum.cu b/cuda/test/components/prefix_sum.cu new file mode 100644 index 00000000000..df24d3f646d --- /dev/null +++ b/cuda/test/components/prefix_sum.cu @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/prefix_sum.hpp" + + +#include +#include +#include + + +#include + + +#include + + +namespace { + + +class PrefixSum : public ::testing::Test { +protected: + using index_type = gko::int32; + PrefixSum() + : ref(gko::ReferenceExecutor::create()), + exec(gko::CudaExecutor::create(0, ref)), + rand(293), + total_size(42793), + vals(ref, total_size), + dvals(exec) + { + std::uniform_int_distribution dist(0, 1000); + for (gko::size_type i = 0; i < total_size; ++i) { + vals.get_data()[i] = dist(rand); + } + dvals = vals; + } + + void test(gko::size_type size) + { + gko::kernels::reference::prefix_sum(ref, vals.get_data(), size); + gko::kernels::cuda::prefix_sum(exec, dvals.get_data(), size); + + gko::Array dresult(ref, dvals); + auto dptr = dresult.get_const_data(); + auto ptr = vals.get_const_data(); + ASSERT_TRUE(std::equal(ptr, ptr + size, dptr)); + } + + std::shared_ptr ref; + std::shared_ptr exec; + std::default_random_engine rand; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; +}; + + +TEST_F(PrefixSum, SmallEqualsReference) { test(100); } + + +TEST_F(PrefixSum, BigEqualsReference) { test(total_size); } + + +} // namespace diff --git a/hip/test/components/CMakeLists.txt b/hip/test/components/CMakeLists.txt index e3c6ca06f01..9824e97743f 100644 --- a/hip/test/components/CMakeLists.txt +++ b/hip/test/components/CMakeLists.txt @@ -1 +1,2 @@ +ginkgo_create_hip_test(prefix_sum) ginkgo_create_hip_test(sorting) \ No newline at end of file diff --git a/hip/test/components/prefix_sum.hip.cpp b/hip/test/components/prefix_sum.hip.cpp new file mode 100644 index 00000000000..690ec1847b0 --- /dev/null +++ b/hip/test/components/prefix_sum.hip.cpp @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/prefix_sum.hpp" + + +#include +#include +#include + + +#include + + +#include + + +namespace { + + +class PrefixSum : public ::testing::Test { +protected: + using index_type = gko::int32; + PrefixSum() + : ref(gko::ReferenceExecutor::create()), + exec(gko::HipExecutor::create(0, ref)), + rand(293), + total_size(42793), + vals(ref, total_size), + dvals(exec) + { + std::uniform_int_distribution dist(0, 1000); + for (gko::size_type i = 0; i < total_size; ++i) { + vals.get_data()[i] = dist(rand); + } + dvals = vals; + } + + void test(gko::size_type size) + { + gko::kernels::reference::prefix_sum(ref, vals.get_data(), size); + gko::kernels::hip::prefix_sum(exec, dvals.get_data(), size); + + gko::Array dresult(ref, dvals); + auto dptr = dresult.get_const_data(); + auto ptr = vals.get_const_data(); + ASSERT_TRUE(std::equal(ptr, ptr + size, dptr)); + } + + std::shared_ptr ref; + std::shared_ptr exec; + std::default_random_engine rand; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; +}; + + +TEST_F(PrefixSum, SmallEqualsReference) { test(100); } + + +TEST_F(PrefixSum, BigEqualsReference) { test(total_size); } + + +} // namespace diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index f3fb866163a..d746413f53f 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -1,5 +1,6 @@ include(${CMAKE_SOURCE_DIR}/cmake/create_test.cmake) +add_subdirectory(components) add_subdirectory(factorization) add_subdirectory(matrix) add_subdirectory(preconditioner) diff --git a/omp/test/components/CMakeLists.txt b/omp/test/components/CMakeLists.txt new file mode 100644 index 00000000000..faffe4e94e1 --- /dev/null +++ b/omp/test/components/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(prefix_sum) \ No newline at end of file diff --git a/omp/test/components/prefix_sum.cpp b/omp/test/components/prefix_sum.cpp new file mode 100644 index 00000000000..b0c24ab7a63 --- /dev/null +++ b/omp/test/components/prefix_sum.cpp @@ -0,0 +1,93 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/prefix_sum.hpp" + + +#include +#include +#include + + +#include + + +#include + + +namespace { + + +class PrefixSum : public ::testing::Test { +protected: + using index_type = gko::int32; + PrefixSum() + : ref(gko::ReferenceExecutor::create()), + exec(gko::OmpExecutor::create()), + rand(293), + total_size(42793), + vals(ref, total_size), + dvals(exec) + { + std::uniform_int_distribution dist(0, 1000); + for (gko::size_type i = 0; i < total_size; ++i) { + vals.get_data()[i] = dist(rand); + } + dvals = vals; + } + + void test(gko::size_type size) + { + gko::kernels::reference::prefix_sum(ref, vals.get_data(), size); + gko::kernels::omp::prefix_sum(exec, dvals.get_data(), size); + + auto dptr = dvals.get_const_data(); + auto ptr = vals.get_const_data(); + ASSERT_TRUE(std::equal(ptr, ptr + size, dptr)); + } + + std::shared_ptr ref; + std::shared_ptr exec; + std::default_random_engine rand; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; +}; + + +TEST_F(PrefixSum, SmallEqualsReference) { test(100); } + + +TEST_F(PrefixSum, BigEqualsReference) { test(total_size); } + + +} // namespace diff --git a/reference/test/CMakeLists.txt b/reference/test/CMakeLists.txt index f0e39d5c568..b359d8146ed 100644 --- a/reference/test/CMakeLists.txt +++ b/reference/test/CMakeLists.txt @@ -1,6 +1,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) +add_subdirectory(components) add_subdirectory(factorization) add_subdirectory(log) add_subdirectory(matrix) diff --git a/reference/test/components/CMakeLists.txt b/reference/test/components/CMakeLists.txt new file mode 100644 index 00000000000..faffe4e94e1 --- /dev/null +++ b/reference/test/components/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(prefix_sum) \ No newline at end of file diff --git a/reference/test/components/prefix_sum.cpp b/reference/test/components/prefix_sum.cpp new file mode 100644 index 00000000000..2dfa31d5330 --- /dev/null +++ b/reference/test/components/prefix_sum.cpp @@ -0,0 +1,70 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/prefix_sum.hpp" + + +#include +#include +#include + + +#include + + +namespace { + + +class PrefixSum : public ::testing::Test { +protected: + using index_type = gko::int32; + PrefixSum() + : exec(gko::ReferenceExecutor::create()), + vals{3, 5, 6, 7, 1, 5, 9, 7, 2, 0, 5}, + expected{0, 3, 8, 14, 21, 22, 27, 36, 43, 45, 45} + {} + + std::shared_ptr exec; + std::vector vals; + std::vector expected; +}; + + +TEST_F(PrefixSum, Works) +{ + gko::kernels::reference::prefix_sum(exec, vals.data(), vals.size()); + + ASSERT_EQ(vals, expected); +} + + +} // namespace From 4abd1ebb5996a2538f89059060c9832d7ed32ca6 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 12 Jan 2020 19:22:39 +0100 Subject: [PATCH 4/4] improve formatting Co-Authored-By: Yuhsiang M. Tsai --- cuda/components/prefix_sum.cu | 4 +--- hip/components/prefix_sum.hip.cpp | 4 +--- omp/components/prefix_sum.cpp | 4 +--- reference/components/prefix_sum.cpp | 4 +--- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/cuda/components/prefix_sum.cu b/cuda/components/prefix_sum.cu index 83a01233d6c..ca273917faa 100644 --- a/cuda/components/prefix_sum.cu +++ b/cuda/components/prefix_sum.cu @@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #include "core/components/prefix_sum.hpp" @@ -60,7 +59,6 @@ void prefix_sum(std::shared_ptr exec, IndexType *counts, block_sums); } - GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); // explicitly instantiate for size_type as well, as this is used in the SellP @@ -71,4 +69,4 @@ template void prefix_sum(std::shared_ptr exec, } // namespace cuda } // namespace kernels -} // namespace gko \ No newline at end of file +} // namespace gko diff --git a/hip/components/prefix_sum.hip.cpp b/hip/components/prefix_sum.hip.cpp index f0283b6c80e..69907ea002f 100644 --- a/hip/components/prefix_sum.hip.cpp +++ b/hip/components/prefix_sum.hip.cpp @@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #include "core/components/prefix_sum.hpp" @@ -61,7 +60,6 @@ void prefix_sum(std::shared_ptr exec, IndexType *counts, counts, block_sums); } - GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); // explicitly instantiate for size_type as well, as this is used in the SellP @@ -72,4 +70,4 @@ template void prefix_sum(std::shared_ptr exec, } // namespace hip } // namespace kernels -} // namespace gko \ No newline at end of file +} // namespace gko diff --git a/omp/components/prefix_sum.cpp b/omp/components/prefix_sum.cpp index 7f487a552fb..9c8f7428aa6 100644 --- a/omp/components/prefix_sum.cpp +++ b/omp/components/prefix_sum.cpp @@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #include "core/components/prefix_sum.hpp" @@ -51,7 +50,6 @@ void prefix_sum(std::shared_ptr exec, IndexType *counts, } } - GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); // explicitly instantiate for size_type as well, as this is used in the SellP @@ -62,4 +60,4 @@ template void prefix_sum(std::shared_ptr exec, } // namespace omp } // namespace kernels -} // namespace gko \ No newline at end of file +} // namespace gko diff --git a/reference/components/prefix_sum.cpp b/reference/components/prefix_sum.cpp index b1dadd229d0..f70e934539e 100644 --- a/reference/components/prefix_sum.cpp +++ b/reference/components/prefix_sum.cpp @@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #include "core/components/prefix_sum.hpp" @@ -51,7 +50,6 @@ void prefix_sum(std::shared_ptr exec, } } - GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); // explicitly instantiate for size_type as well, as this is used in the SellP @@ -63,4 +61,4 @@ template void prefix_sum( } // namespace reference } // namespace kernels -} // namespace gko \ No newline at end of file +} // namespace gko