diff --git a/common/src/KokkosKernels_SimpleUtils.hpp b/common/src/KokkosKernels_SimpleUtils.hpp index bb2a6d43b9..86944e9c4d 100644 --- a/common/src/KokkosKernels_SimpleUtils.hpp +++ b/common/src/KokkosKernels_SimpleUtils.hpp @@ -75,15 +75,16 @@ class SquareRootFunctor { template struct ExclusiveParallelPrefixSum { - typedef typename view_t::value_type idx; + typedef typename view_t::value_type value_type; view_t array_sum; ExclusiveParallelPrefixSum(view_t arr_) : array_sum(arr_) {} KOKKOS_INLINE_FUNCTION - void operator()(const size_t ii, size_t &update, const bool final) const { - idx val = array_sum(ii); + void operator()(const size_t ii, value_type &update, const bool final) const { + value_type val = + (ii == array_sum.extent(0) - 1) ? value_type(0) : array_sum(ii); if (final) { - array_sum(ii) = idx(update); + array_sum(ii) = value_type(update); } update += val; } @@ -118,6 +119,25 @@ inline void kk_exclusive_parallel_prefix_sum( ExclusiveParallelPrefixSum(arr)); } +/*** + * \brief Function performs the exclusive parallel prefix sum. That is each + * entry holds the sum until itself. This version also returns the final sum + * equivalent to the sum-reduction of arr before doing the scan. + * \param num_elements: size of the array + * \param arr: the array for which the prefix sum will be performed. + * \param finalSum: will be set to arr[num_elements - 1] after computing the + * prefix sum. + */ +template +inline void kk_exclusive_parallel_prefix_sum( + typename view_t::value_type num_elements, view_t arr, + typename view_t::non_const_value_type &finalSum) { + typedef Kokkos::RangePolicy my_exec_space; + Kokkos::parallel_scan("KokkosKernels::Common::PrefixSum", + my_exec_space(0, num_elements), + ExclusiveParallelPrefixSum(arr), finalSum); +} + /*** * \brief Function performs the inclusive parallel prefix sum. That is each * entry holds the sum until itself including itself. \param num_elements: size diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp index 5448843168..09b26fbc76 100644 --- a/perf_test/sparse/KokkosSparse_spadd.cpp +++ b/perf_test/sparse/KokkosSparse_spadd.cpp @@ -171,9 +171,6 @@ void run_experiment(const Params& params) { lno_view_t; typedef typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type lno_nnz_view_t; - typedef typename crsMat_t::StaticCrsGraphType::row_map_type const_lno_view_t; - typedef - typename crsMat_t::StaticCrsGraphType::entries_type const_lno_nnz_view_t; lno_view_t row_mapC; // entriesC, valuesC and cusparseBuffer are allocated inside @@ -200,10 +197,8 @@ void run_experiment(const Params& params) { double numericTime = 0; // Do an untimed warm up symbolic, and preallocate space for C entries/values - spadd_symbolic(&kh, A.graph.row_map, A.graph.entries, - B.graph.row_map, B.graph.entries, row_mapC); + spadd_symbolic(&kh, A.graph.row_map, A.graph.entries, B.graph.row_map, + B.graph.entries, row_mapC); bool use_kk = !params.use_cusparse && !params.use_mkl; @@ -261,11 +256,8 @@ void run_experiment(const Params& params) { for (int sumRep = 0; sumRep < params.repeat; sumRep++) { timer.reset(); if (use_kk) { - spadd_symbolic(&kh, A.graph.row_map, A.graph.entries, - B.graph.row_map, B.graph.entries, - row_mapC); + spadd_symbolic(&kh, A.graph.row_map, A.graph.entries, B.graph.row_map, + B.graph.entries, row_mapC); c_nnz = addHandle->get_c_nnz(); } else if (params.use_cusparse) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE diff --git a/sparse/CMakeLists.txt b/sparse/CMakeLists.txt index 800e93e153..9457d5b368 100644 --- a/sparse/CMakeLists.txt +++ b/sparse/CMakeLists.txt @@ -125,6 +125,20 @@ KOKKOSKERNELS_GENERATE_ETI(Sparse_spiluk_numeric spiluk_numeric TYPE_LISTS FLOATS ORDINALS OFFSETS LAYOUTS DEVICES ) +KOKKOSKERNELS_GENERATE_ETI(Sparse_par_ilut_symbolic par_ilut_symbolic + COMPONENTS sparse + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS ORDINALS OFFSETS LAYOUTS DEVICES +) + +KOKKOSKERNELS_GENERATE_ETI(Sparse_par_ilut_numeric par_ilut_numeric + COMPONENTS sparse + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS ORDINALS OFFSETS LAYOUTS DEVICES +) + KOKKOSKERNELS_GENERATE_ETI(Sparse_sptrsv_symbolic sptrsv_symbolic COMPONENTS sparse HEADER_LIST ETI_HEADERS diff --git a/sparse/eti/generated_specializations_cpp/par_ilut_numeric/KokkosSparse_par_ilut_numeric_eti_spec_inst.cpp.in b/sparse/eti/generated_specializations_cpp/par_ilut_numeric/KokkosSparse_par_ilut_numeric_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..de06bdfa4d --- /dev/null +++ b/sparse/eti/generated_specializations_cpp/par_ilut_numeric/KokkosSparse_par_ilut_numeric_eti_spec_inst.cpp.in @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" + +#include "KokkosSparse_par_ilut_numeric_spec.hpp" +namespace KokkosSparse { +namespace Impl { +@SPARSE_PAR_ILUT_NUMERIC_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/sparse/eti/generated_specializations_cpp/par_ilut_symbolic/KokkosSparse_par_ilut_symbolic_eti_spec_inst.cpp.in b/sparse/eti/generated_specializations_cpp/par_ilut_symbolic/KokkosSparse_par_ilut_symbolic_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..adaa577bb7 --- /dev/null +++ b/sparse/eti/generated_specializations_cpp/par_ilut_symbolic/KokkosSparse_par_ilut_symbolic_eti_spec_inst.cpp.in @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" + +#include "KokkosSparse_par_ilut_symbolic_spec.hpp" +namespace KokkosSparse { +namespace Impl { +@SPARSE_PAR_ILUT_SYMBOLIC_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_numeric_eti_spec_avail.hpp.in b/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_numeric_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..718a78f203 --- /dev/null +++ b/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_numeric_eti_spec_avail.hpp.in @@ -0,0 +1,53 @@ +#ifndef KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_AVAIL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosSparse { +namespace Impl { + +@SPARSE_PAR_ILUT_NUMERIC_ETI_AVAIL_BLOCK@ + +} // Impl +} // KokkosSparse +#endif // KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_AVAIL_HPP_ diff --git a/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_numeric_eti_spec_decl.hpp.in b/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_numeric_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..23738d1acf --- /dev/null +++ b/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_numeric_eti_spec_decl.hpp.in @@ -0,0 +1,53 @@ +#ifndef KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_DECL_HPP_ +#define KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_DECL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosSparse { +namespace Impl { + +@SPARSE_PAR_ILUT_NUMERIC_DECL_BLOCK@ + +} // Impl +} // KokkosSparse +#endif // KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_DECL_HPP_ diff --git a/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_symbolic_eti_spec_avail.hpp.in b/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_symbolic_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..f26d7cb598 --- /dev/null +++ b/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_symbolic_eti_spec_avail.hpp.in @@ -0,0 +1,53 @@ +#ifndef KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_AVAIL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosSparse { +namespace Impl { + +@SPARSE_PAR_ILUT_SYMBOLIC_ETI_AVAIL_BLOCK@ + +} // Impl +} // KokkosSparse +#endif // KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_AVAIL_HPP_ diff --git a/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_symbolic_eti_spec_decl.hpp.in b/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_symbolic_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..d7f643850e --- /dev/null +++ b/sparse/eti/generated_specializations_hpp/KokkosSparse_par_ilut_symbolic_eti_spec_decl.hpp.in @@ -0,0 +1,53 @@ +#ifndef KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_DECL_HPP_ +#define KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_DECL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosSparse { +namespace Impl { + +@SPARSE_PAR_ILUT_SYMBOLIC_ETI_DECL_BLOCK@ + +} // Impl +} // KokkosSparse +#endif // KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_DECL_HPP_ diff --git a/sparse/impl/KokkosSparse_par_ilut_numeric_impl.hpp b/sparse/impl/KokkosSparse_par_ilut_numeric_impl.hpp new file mode 100644 index 0000000000..f66e4586ac --- /dev/null +++ b/sparse/impl/KokkosSparse_par_ilut_numeric_impl.hpp @@ -0,0 +1,951 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSSPARSE_IMPL_PAR_ILUT_NUMERIC_HPP_ +#define KOKKOSSPARSE_IMPL_PAR_ILUT_NUMERIC_HPP_ + +/// \file KokkosSparse_par_ilut_numeric_impl.hpp +/// \brief Implementation(s) of the numeric phase of sparse parallel ILUT. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +//#define NUMERIC_OUTPUT_INFO + +namespace KokkosSparse { +namespace Impl { +namespace Experimental { + +template +struct IlutWrap { + // + // Useful types + // + using execution_space = typename IlutHandle::execution_space; + using index_t = typename IlutHandle::nnz_lno_t; + using size_type = typename IlutHandle::size_type; + using scalar_t = typename IlutHandle::nnz_scalar_t; + using HandleDeviceEntriesType = typename IlutHandle::nnz_lno_view_t; + using HandleDeviceRowMapType = typename IlutHandle::nnz_row_view_t; + using HandleDeviceValueType = typename IlutHandle::nnz_value_view_t; + using karith = typename Kokkos::ArithTraits; + using policy_type = typename IlutHandle::TeamPolicy; + using member_type = typename policy_type::member_type; + using range_policy = typename IlutHandle::RangePolicy; + + /** + * prefix_sum: Take a row_map of counts and transform it to sums, and + * return the total sum. + */ + template + static size_type prefix_sum(RowMapType& row_map) { + size_type result = 0; + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum< + RowMapType, typename IlutHandle::HandleExecSpace>(row_map.extent(0), + row_map, result); + return result; + } + + /** + * Just a convenience wrapper around spgemm + */ + template + static void multiply_matrices( + KHandle& kh, IlutHandle& ih, const LRowMapType& L_row_map, + const LEntriesType& L_entries, const LValuesType& L_values, + const URowMapType& U_row_map, const UEntriesType& U_entries, + const UValuesType& U_values, LURowMapType& LU_row_map, + LUEntriesType& LU_entries, LUValuesType& LU_values) { + const size_type nrows = ih.get_nrows(); + + KokkosSparse::Experimental::spgemm_symbolic( + &kh, nrows, nrows, nrows, L_row_map, L_entries, false, U_row_map, + U_entries, false, LU_row_map); + + const size_type lu_nnz_size = kh.get_spgemm_handle()->get_c_nnz(); + Kokkos::resize(LU_entries, lu_nnz_size); + Kokkos::resize(LU_values, lu_nnz_size); + + KokkosSparse::Experimental::spgemm_numeric( + &kh, nrows, nrows, nrows, L_row_map, L_entries, L_values, false, + U_row_map, U_entries, U_values, false, LU_row_map, LU_entries, + LU_values); + + // Need to sort LU CRS if on CUDA! + sort_crs_matrix(LU_row_map, LU_entries, LU_values); + } + + /** + * Just a convenience wrapper around transpose_matrix + */ + template + static void transpose_wrap(IlutHandle& ih, const RowMapType& row_map, + const EntriesType& entries, + const ValuesType& values, TRowMapType& t_row_map, + TEntriesType& t_entries, TValuesType& t_values) { + const size_type nrows = ih.get_nrows(); + + // Need to reset t_row_map + Kokkos::deep_copy(t_row_map, 0); + + Kokkos::resize(t_entries, entries.extent(0)); + Kokkos::resize(t_values, values.extent(0)); + + KokkosSparse::Impl::transpose_matrix< + HandleDeviceRowMapType, HandleDeviceEntriesType, HandleDeviceValueType, + HandleDeviceRowMapType, HandleDeviceEntriesType, HandleDeviceValueType, + HandleDeviceRowMapType, execution_space>( + nrows, nrows, row_map, entries, values, t_row_map, t_entries, t_values); + + // Need to ensure output is sorted + sort_crs_matrix(t_row_map, t_entries, t_values); + } + + /** + * Adds new entries from the sparsity pattern of A - L * U + * to L and U, where new values are chosen based on the residual + * value divided by the corresponding diagonal entry. + */ + template + static void add_candidates( + IlutHandle& ih, const ARowMapType& A_row_map, + const AEntriesType& A_entries, const AValuesType& A_values, + const LRowMapType& L_row_map, const LEntriesType& L_entries, + const LValuesType& L_values, const URowMapType& U_row_map, + const UEntriesType& U_entries, const UValuesType& U_values, + const LURowMapType& LU_row_map, const LUEntriesType& LU_entries, + const LUValuesType& LU_values, LNewRowMapType& L_new_row_map, + LNewEntriesType& L_new_entries, LNewValuesType& L_new_values, + UNewRowMapType& U_new_row_map, UNewEntriesType& U_new_entries, + UNewValuesType& U_new_values) { + const size_type nrows = ih.get_nrows(); + + const policy_type policy = ih.get_default_team_policy(); + + // Sizing run for add_candidates. Count nnz's and remove dupes + Kokkos::parallel_for( + "add_candidates sizing", policy, + KOKKOS_LAMBDA(const member_type& team) { + const auto row_idx = team.league_rank(); + + const auto a_row_nnz_begin = A_row_map(row_idx); + const auto a_row_nnz_end = A_row_map(row_idx + 1); + + const auto lu_row_nnz_begin = LU_row_map(row_idx); + const auto lu_row_nnz_end = LU_row_map(row_idx + 1); + + // Really wish kokkos could do a multi-reduce here + size_type a_l_nnz = 0, a_u_nnz = 0, lu_l_nnz = 0, lu_u_nnz = 0, + dup_l_nnz = 0, dup_u_nnz = 0; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, a_row_nnz_begin, a_row_nnz_end), + [&](const size_type nnz, size_type& nnzL_inner) { + const auto col_idx = A_entries(nnz); + nnzL_inner += col_idx <= row_idx; + }, + a_l_nnz); + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, a_row_nnz_begin, a_row_nnz_end), + [&](const size_type nnz, size_type& nnzU_inner) { + const auto col_idx = A_entries(nnz); + nnzU_inner += col_idx >= row_idx; + }, + a_u_nnz); + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, lu_row_nnz_begin, lu_row_nnz_end), + [&](const size_type nnz, size_type& nnzL_inner) { + const auto col_idx = LU_entries(nnz); + nnzL_inner += col_idx <= row_idx; + }, + lu_l_nnz); + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, lu_row_nnz_begin, lu_row_nnz_end), + [&](const size_type nnz, size_type& nnzU_inner) { + const auto col_idx = LU_entries(nnz); + nnzU_inner += col_idx >= row_idx; + }, + lu_u_nnz); + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, a_row_nnz_begin, a_row_nnz_end), + [&](const size_type nnz, size_type& dupL_inner) { + const auto a_col_idx = A_entries(nnz); + if (a_col_idx <= row_idx) { + for (size_type lu_i = lu_row_nnz_begin; lu_i < lu_row_nnz_end; + ++lu_i) { + const auto lu_col_idx = LU_entries(lu_i); + if (a_col_idx == lu_col_idx) { + ++dupL_inner; + break; + } else if (lu_col_idx > a_col_idx) { + break; + } + } + } + }, + dup_l_nnz); + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, a_row_nnz_begin, a_row_nnz_end), + [&](const size_type nnz, size_type& dupU_inner) { + const auto a_col_idx = A_entries(nnz); + if (a_col_idx >= row_idx) { + for (size_type lu_i = lu_row_nnz_begin; lu_i < lu_row_nnz_end; + ++lu_i) { + const auto lu_col_idx = LU_entries(lu_i); + if (a_col_idx == lu_col_idx) { + ++dupU_inner; + break; + } else if (lu_col_idx > a_col_idx) { + break; + } + } + } + }, + dup_u_nnz); + + team.team_barrier(); + + Kokkos::single(Kokkos::PerTeam(team), [&]() { + const auto l_nnz = (a_l_nnz + lu_l_nnz - dup_l_nnz); + const auto u_nnz = (a_u_nnz + lu_u_nnz - dup_u_nnz); + + L_new_row_map(row_idx) = l_nnz; + U_new_row_map(row_idx) = u_nnz; + }); + }); + + // prefix sum + const size_type l_new_nnz_tot = prefix_sum(L_new_row_map); + const size_type u_new_nnz_tot = prefix_sum(U_new_row_map); + + Kokkos::resize(L_new_entries, l_new_nnz_tot); + Kokkos::resize(U_new_entries, u_new_nnz_tot); + Kokkos::resize(L_new_values, l_new_nnz_tot); + Kokkos::resize(U_new_values, u_new_nnz_tot); + + constexpr auto sentinel = std::numeric_limits::max(); + + // Now compute the actual candidate values + Kokkos::parallel_for( + "add_candidates", + range_policy(0, nrows), // No team level parallelism in this alg + KOKKOS_LAMBDA(const size_type row_idx) { + auto a_row_nnz_begin = A_row_map(row_idx); + const auto a_row_nnz_end = A_row_map(row_idx + 1); + const auto a_tot = a_row_nnz_end - a_row_nnz_begin; + + auto lu_row_nnz_begin = LU_row_map(row_idx); + const auto lu_row_nnz_end = LU_row_map(row_idx + 1); + const auto lu_tot = lu_row_nnz_end - lu_row_nnz_begin; + + const auto tot = a_tot + lu_tot; + + size_type l_new_nnz = L_new_row_map(row_idx); + size_type u_new_nnz = U_new_row_map(row_idx); + size_type l_old_begin = L_row_map(row_idx); + size_type l_old_end = L_row_map(row_idx + 1) - 1; // skip diagonal + size_type u_old_begin = U_row_map(row_idx); + size_type u_old_end = U_row_map(row_idx + 1); + bool finished_l = l_old_begin == l_old_end; + bool skip = false; + for (size_type i = 0; i < tot; ++i) { + if (skip) { + skip = false; + continue; + } + + const auto a_col = a_row_nnz_begin < a_row_nnz_end + ? A_entries(a_row_nnz_begin) + : sentinel; + auto a_val = a_row_nnz_begin < a_row_nnz_end + ? A_values(a_row_nnz_begin) + : 0.; + const auto lu_col = lu_row_nnz_begin < lu_row_nnz_end + ? LU_entries(lu_row_nnz_begin) + : sentinel; + auto lu_val = lu_row_nnz_begin < lu_row_nnz_end + ? LU_values(lu_row_nnz_begin) + : 0.; + + const size_type col_idx = Kokkos::fmin(a_col, lu_col); + + const bool a_active = col_idx == a_col; + const bool lu_active = col_idx == lu_col; + + a_val = a_active ? a_val : 0.; + lu_val = lu_active ? lu_val : 0.; + + skip = a_active && lu_active; + + a_row_nnz_begin += a_active; + lu_row_nnz_begin += lu_active; + + const auto r_val = a_val - lu_val; + // load matching entry of L + U + const auto lpu_col = + finished_l ? (u_old_begin < u_old_end ? U_entries(u_old_begin) + : sentinel) + : L_entries(l_old_begin); + const auto lpu_val = + finished_l + ? (u_old_begin < u_old_end ? U_values(u_old_begin) : 0.) + : L_values(l_old_begin); + // load diagonal entry of U for lower diagonal entries + const auto diag = + col_idx < row_idx ? U_values(U_row_map(col_idx)) : 1.; + // if there is already an entry present, use that instead. + const auto out_val = lpu_col == col_idx ? lpu_val : r_val / diag; + // store output entries + if (row_idx >= col_idx) { + L_new_entries(l_new_nnz) = col_idx; + L_new_values(l_new_nnz) = row_idx == col_idx ? 1. : out_val; + ++l_new_nnz; + } + if (row_idx <= col_idx) { + U_new_entries(u_new_nnz) = col_idx; + U_new_values(u_new_nnz) = out_val; + ++u_new_nnz; + } + // advance entry of L + U if we used it + if (finished_l) { + u_old_begin += (lpu_col == col_idx); + } else { + l_old_begin += (lpu_col == col_idx); + finished_l = (l_old_begin == l_old_end); + } + } + }); + } + + /** + * A device-safe lower_bound impl + */ + template + KOKKOS_FUNCTION static ForwardIterator kok_lower_bound(ForwardIterator first, + ForwardIterator last, + const T& val) { + ForwardIterator it; + size_t count, step; + count = last - first; + while (count > 0) { + it = first; + step = count / 2; + it += step; + if (*it < val) { // or: if (comp(*it,val)), for version (2) + first = ++it; + count -= step + 1; + } else + count = step; + } + return first; + } + + /** + * The compute_sum component of compute_l_u_factors + */ + template + KOKKOS_FUNCTION static Kokkos::pair< + typename AValuesType::non_const_value_type, size_type> + compute_sum(const size_type row_idx, typename IlutHandle::nnz_lno_t col_idx, + const ARowMapType& A_row_map, const AEntriesType& A_entries, + const AValuesType& A_values, const LRowMapType& L_row_map, + const LEntriesType& L_entries, const LValuesType& L_values, + const UtRowMapType& Ut_row_map, const UtEntriesType& Ut_entries, + const UtValuesType& Ut_values) { + const auto a_row_nnz_begin = A_row_map(row_idx); + const auto a_row_nnz_end = A_row_map(row_idx + 1); + auto a_nnz_it = kok_lower_bound(A_entries.data() + a_row_nnz_begin, + A_entries.data() + a_row_nnz_end, col_idx); + const size_type a_nnz = a_nnz_it - A_entries.data(); + const bool has_a = a_nnz < a_row_nnz_end && A_entries(a_nnz) == col_idx; + const auto a_val = has_a ? A_values(a_nnz) : 0.0; + scalar_t sum = 0.0; + size_type ut_nnz = 0; + + auto l_row_nnz = L_row_map(row_idx); + const auto l_row_nnz_end = L_row_map(row_idx + 1); + + auto ut_row_nnz = Ut_row_map(col_idx); + const auto ut_row_nnz_end = Ut_row_map(col_idx + 1); + + const auto last_entry = Kokkos::fmin(row_idx, col_idx); + while (l_row_nnz < l_row_nnz_end && ut_row_nnz < ut_row_nnz_end) { + const auto l_col = L_entries(l_row_nnz); + const auto u_row = Ut_entries(ut_row_nnz); + if (l_col == u_row && l_col < last_entry) { + sum += L_values(l_row_nnz) * Ut_values(ut_row_nnz); + } + if (static_cast(u_row) == row_idx) { + ut_nnz = ut_row_nnz; + } + + l_row_nnz += l_col <= u_row ? 1 : 0; + ut_row_nnz += u_row <= l_col ? 1 : 0; + } + + return Kokkos::make_pair(a_val - sum, ut_nnz); + } + + template + KOKKOS_FUNCTION static void compute_l_u_factors_impl( + const ARowMapType& A_row_map, const AEntriesType& A_entries, + const AValuesType& A_values, LRowMapType& L_row_map, + LEntriesType& L_entries, LValuesType& L_values, URowMapType& U_row_map, + UEntriesType& U_entries, UValuesType& U_values, UtRowMapType& Ut_row_map, + UtEntriesType& Ut_entries, UtValuesType& Ut_values, MemberType& team) { + const auto row_idx = team.league_rank(); + + const auto l_row_nnz_begin = L_row_map(row_idx); + const auto l_row_nnz_end = L_row_map(row_idx + 1); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, l_row_nnz_begin, l_row_nnz_end - 1), + [&](const size_type l_nnz) { + const auto col_idx = L_entries(l_nnz); + const auto u_diag = Ut_values(Ut_row_map(col_idx + 1) - 1); + if (u_diag != 0.0) { + const auto new_val = + compute_sum(row_idx, col_idx, A_row_map, A_entries, A_values, + L_row_map, L_entries, L_values, Ut_row_map, + Ut_entries, Ut_values) + .first / + u_diag; + L_values(l_nnz) = new_val; + } + }); + + team.team_barrier(); + + const auto u_row_nnz_begin = U_row_map(row_idx); + const auto u_row_nnz_end = U_row_map(row_idx + 1); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, u_row_nnz_begin, u_row_nnz_end), + [&](const size_type u_nnz) { + const auto col_idx = U_entries(u_nnz); + const auto sum = compute_sum(row_idx, col_idx, A_row_map, A_entries, + A_values, L_row_map, L_entries, L_values, + Ut_row_map, Ut_entries, Ut_values); + const auto new_val = sum.first; + const auto ut_nnz = sum.second; + U_values(u_nnz) = new_val; + Ut_values(ut_nnz) = new_val; // ut_nnz is not guarateed to fail into + // range used exclusively by this team + }); + } + + /** + * Implements a single iteration/sweep of the fixed-point ILU algorithm. + * The results of this function are non-deterministic due to concurrent + * reading and writing of Ut values. deterministic can be set to true to + * make this function determistic, but it will be run in Serial exe space + * if so. + */ + template + static void compute_l_u_factors( + IlutHandle& ih, const ARowMapType& A_row_map, + const AEntriesType& A_entries, const AValuesType& A_values, + LRowMapType& L_row_map, LEntriesType& L_entries, LValuesType& L_values, + URowMapType& U_row_map, UEntriesType& U_entries, UValuesType& U_values, + UtRowMapType& Ut_row_map, UtEntriesType& Ut_entries, + UtValuesType& Ut_values, bool deterministic) { + if (deterministic) { +#ifdef KOKKOS_ENABLE_SERIAL + using spolicy_type = Kokkos::TeamPolicy; + using smember_type = typename spolicy_type::member_type; + + const size_type nrows = ih.get_nrows(); + spolicy_type policy(nrows, 1); + + auto A_row_map_h = Kokkos::create_mirror_view(A_row_map); + auto A_entries_h = Kokkos::create_mirror_view(A_entries); + auto A_values_h = Kokkos::create_mirror_view(A_values); + auto L_row_map_h = Kokkos::create_mirror_view(L_row_map); + auto L_entries_h = Kokkos::create_mirror_view(L_entries); + auto L_values_h = Kokkos::create_mirror_view(L_values); + auto U_row_map_h = Kokkos::create_mirror_view(U_row_map); + auto U_entries_h = Kokkos::create_mirror_view(U_entries); + auto U_values_h = Kokkos::create_mirror_view(U_values); + auto Ut_row_map_h = Kokkos::create_mirror_view(Ut_row_map); + auto Ut_entries_h = Kokkos::create_mirror_view(Ut_entries); + auto Ut_values_h = Kokkos::create_mirror_view(Ut_values); + + Kokkos::deep_copy(A_row_map_h, A_row_map); + Kokkos::deep_copy(A_entries_h, A_entries); + Kokkos::deep_copy(A_values_h, A_values); + Kokkos::deep_copy(L_row_map_h, L_row_map); + Kokkos::deep_copy(L_entries_h, L_entries); + Kokkos::deep_copy(L_values_h, L_values); + Kokkos::deep_copy(U_row_map_h, U_row_map); + Kokkos::deep_copy(U_entries_h, U_entries); + Kokkos::deep_copy(U_values_h, U_values); + Kokkos::deep_copy(Ut_row_map_h, Ut_row_map); + Kokkos::deep_copy(Ut_entries_h, Ut_entries); + Kokkos::deep_copy(Ut_values_h, Ut_values); + + Kokkos::parallel_for( + "compute_l_u_factors", policy, + KOKKOS_LAMBDA(const smember_type& team) { + compute_l_u_factors_impl( + A_row_map_h, A_entries_h, A_values_h, L_row_map_h, L_entries_h, + L_values_h, U_row_map_h, U_entries_h, U_values_h, Ut_row_map_h, + Ut_entries_h, Ut_values_h, team); + }); + + Kokkos::deep_copy(L_values, L_values_h); + Kokkos::deep_copy(U_values, U_values_h); + Kokkos::deep_copy(Ut_values, Ut_values_h); +#else + throw std::runtime_error( + "compute_l_u factors cannot be deterministic without Kokkos::Serial " + "available"); +#endif + } else { + const auto policy = ih.get_default_team_policy(); + + Kokkos::parallel_for( + "compute_l_u_factors", policy, + KOKKOS_LAMBDA(const member_type& team) { + compute_l_u_factors_impl(A_row_map, A_entries, A_values, L_row_map, + L_entries, L_values, U_row_map, U_entries, + U_values, Ut_row_map, Ut_entries, + Ut_values, team); + }); + } + } + + /** + * Select threshold based on filter rank. Do all this on host + */ + template + static typename IlutHandle::float_t threshold_select( + ValuesType& values, const typename IlutHandle::nnz_lno_t rank, + ValuesCopyType& values_copy) { + const index_t size = values.extent(0); + + Kokkos::resize(values_copy, size); + Kokkos::deep_copy(values_copy, values); + + auto begin = values_copy.data(); + auto target = begin + rank; + auto end = begin + size; + std::nth_element(begin, target, end, [](scalar_t a, scalar_t b) { + return karith::abs(a) < karith::abs(b); + }); + + return karith::abs(values_copy(rank)); + } + + /** + * Remove non-diagnal elements that are below the threshold. + */ + template + static void threshold_filter(IlutHandle& ih, + const typename IlutHandle::float_t threshold, + const IRowMapType& I_row_map, + const IEntriesType& I_entries, + const IValuesType& I_values, + ORowMapType& O_row_map, OEntriesType& O_entries, + OValuesType& O_values) { + const auto policy = ih.get_default_team_policy(); + const size_type nrows = ih.get_nrows(); + + Kokkos::parallel_for( + "threshold_filter count", policy, + KOKKOS_LAMBDA(const member_type& team) { + const auto row_idx = team.league_rank(); + + const auto row_nnx_begin = I_row_map(row_idx); + const auto row_nnx_end = I_row_map(row_idx + 1); + + size_type count = 0; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, row_nnx_begin, row_nnx_end), + [&](const size_type nnz, size_type& count_inner) { + if (karith::abs(I_values(nnz)) >= threshold || + I_entries(nnz) == row_idx) { + count_inner += 1; + } + }, + count); + + Kokkos::single(Kokkos::PerTeam(team), + [=]() { O_row_map(row_idx) = count; }); + }); + + const auto new_nnz = prefix_sum(O_row_map); + + Kokkos::resize(O_entries, new_nnz); + Kokkos::resize(O_values, new_nnz); + + Kokkos::parallel_for( + "threshold_filter assign", range_policy(0, nrows), + KOKKOS_LAMBDA(const size_type row_idx) { + const auto i_row_nnx_begin = I_row_map(row_idx); + const auto i_row_nnx_end = I_row_map(row_idx + 1); + + auto onnz = O_row_map(row_idx); + + for (size_type innz = i_row_nnx_begin; innz < i_row_nnx_end; ++innz) { + if (karith::abs(I_values(innz)) >= threshold || + static_cast(I_entries(innz)) == row_idx) { + O_entries(onnz) = I_entries(innz); + O_values(onnz) = I_values(innz); + ++onnz; + } + } + }); + } + + /** + * Compute residual norm for R = A - LU + */ + template + static typename IlutHandle::nnz_scalar_t compute_residual_norm( + KHandle& kh, IlutHandle& ih, const ARowMapType& A_row_map, + const AEntriesType& A_entries, const AValuesType& A_values, + const LRowMapType& L_row_map, const LEntriesType& L_entries, + const LValuesType& L_values, const URowMapType& U_row_map, + const UEntriesType& U_entries, const UValuesType& U_values, + RRowMapType& R_row_map, REntriesType& R_entries, RValuesType& R_values, + LURowMapType& LU_row_map, LUEntriesType& LU_entries, + LUValuesType& LU_values) { + multiply_matrices(kh, ih, L_row_map, L_entries, L_values, U_row_map, + U_entries, U_values, LU_row_map, LU_entries, LU_values); + + auto addHandle = kh.get_spadd_handle(); + KokkosSparse::Experimental::spadd_symbolic( + &kh, A_row_map, A_entries, LU_row_map, LU_entries, R_row_map); + + const size_type r_nnz = addHandle->get_c_nnz(); + Kokkos::resize(R_entries, r_nnz); + Kokkos::resize(R_values, r_nnz); + + KokkosSparse::Experimental::spadd_numeric( + &kh, A_row_map, A_entries, A_values, 1., LU_row_map, LU_entries, + LU_values, -1., R_row_map, R_entries, R_values); + + scalar_t result; + + auto policy = ih.get_default_team_policy(); + + Kokkos::parallel_reduce( + "compute_residual_norm", policy, + KOKKOS_LAMBDA(const member_type& team, scalar_t& total_sum) { + const auto row_idx = team.league_rank(); + + const auto a_row_nnz_begin = A_row_map(row_idx); + const auto a_row_nnz_end = A_row_map(row_idx + 1); + + const auto a_row_entries_begin = A_entries.data() + a_row_nnz_begin; + const auto a_row_entries_end = A_entries.data() + a_row_nnz_end; + + const auto r_row_nnz_begin = R_row_map(row_idx); + const auto r_row_nnz_end = R_row_map(row_idx + 1); + + scalar_t team_sum = 0.; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, r_row_nnz_begin, r_row_nnz_end), + [&](const size_type nnz, scalar_t& sum_inner) { + const auto r_col_idx = R_entries(nnz); + const index_t* lb = kok_lower_bound( + a_row_entries_begin, a_row_entries_end, r_col_idx); + if (lb != a_row_entries_end && *lb == r_col_idx) { + sum_inner += R_values(nnz) * R_values(nnz); + } + }, + team_sum); + + Kokkos::single(Kokkos::PerTeam(team), + [&]() { total_sum += team_sum; }); + }, + result); + + return karith::sqrt(result); + } + + /** + * Set the initial L/U values for the initial approximation + */ + template + static void initialize_LU( + IlutHandle& ih, const ARowMapType& A_row_map, + const AEntriesType& A_entries, const AValuesType& A_values, + const LRowMapType& L_row_map, const LEntriesType& L_entries, + const LValuesType& L_values, const URowMapType& U_row_map, + const UEntriesType& U_entries, const UValuesType& U_values) { + const size_type nrows = ih.get_nrows(); + + Kokkos::parallel_for( + "approx LU values", + range_policy(0, nrows), // No team level parallelism in this alg + KOKKOS_LAMBDA(const index_t& row_idx) { + const auto row_nnz_begin = A_row_map(row_idx); + const auto row_nnz_end = A_row_map(row_idx + 1); + + size_type current_index_l = L_row_map(row_idx); + size_type current_index_u = + U_row_map(row_idx) + 1; // we treat the diagonal separately + + // if there is no diagonal value, set it to 1 by default + scalar_t diag = 1.; + + for (size_type row_nnz = row_nnz_begin; row_nnz < row_nnz_end; + ++row_nnz) { + const auto val = A_values(row_nnz); + const auto col_idx = A_entries(row_nnz); + + if (col_idx < row_idx) { + L_entries(current_index_l) = col_idx; + L_values(current_index_l) = val; + ++current_index_l; + } else if (col_idx == row_idx) { + // save diagonal + diag = val; + } else { + U_entries(current_index_u) = col_idx; + U_values(current_index_u) = val; + ++current_index_u; + } + } + + // store diagonal values separately + const auto l_diag_idx = L_row_map(row_idx + 1) - 1; + const auto u_diag_idx = U_row_map(row_idx); + L_entries(l_diag_idx) = row_idx; + U_entries(u_diag_idx) = row_idx; + L_values(l_diag_idx) = 1.; + U_values(u_diag_idx) = diag; + }); + } + + /** + * The main par_ilut numeric function. + */ + template + static void ilut_numeric(KHandle& kh, IlutHandle& thandle, + const ARowMapType& A_row_map, + const AEntriesType& A_entries, + const AValuesType& A_values, LRowMapType& L_row_map, + LEntriesType& L_entries, LValuesType& L_values, + URowMapType& U_row_map, UEntriesType& U_entries, + UValuesType& U_values, bool deterministic) { + const size_type nrows = thandle.get_nrows(); + const auto fill_in_limit = thandle.get_fill_in_limit(); + const auto l_nnz_limit = + static_cast(fill_in_limit * thandle.get_nnzL()); + const auto u_nnz_limit = + static_cast(fill_in_limit * thandle.get_nnzU()); + + const auto residual_norm_delta_stop = + thandle.get_residual_norm_delta_stop(); + const size_type max_iter = thandle.get_max_iter(); + + std::string myalg("SPGEMM_KK_MEMORY"); + KokkosSparse::SPGEMMAlgorithm spgemm_algorithm = + KokkosSparse::StringToSPGEMMAlgorithm(myalg); + kh.create_spgemm_handle(spgemm_algorithm); + + kh.create_spadd_handle(true /*we expect inputs to be sorted*/); + + // + // temporary workspaces and scalars + // + HandleDeviceRowMapType LU_row_map( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "LU_row_map"), + nrows + 1), + L_new_row_map( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "L_new_row_map"), + nrows + 1), + U_new_row_map( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "U_new_row_map"), + nrows + 1), + R_row_map(Kokkos::view_alloc(Kokkos::WithoutInitializing, "R_row_map"), + nrows + 1), + Ut_new_row_map("Ut_new_row_map", nrows + 1); + + HandleDeviceEntriesType LU_entries, L_new_entries, U_new_entries, + Ut_new_entries, R_entries; + HandleDeviceValueType LU_values, L_new_values, U_new_values, Ut_new_values, + V_copy_d, R_values; + auto V_copy = Kokkos::create_mirror_view(V_copy_d); + + size_type itr = 0; + scalar_t prev_residual = std::numeric_limits::max(); + bool converged = false; + + // Set the initial L/U values for the initial approximation + initialize_LU(thandle, A_row_map, A_entries, A_values, L_row_map, L_entries, + L_values, U_row_map, U_entries, U_values); + + // + // main loop + // + while (!converged && itr < max_iter) { + // LU = L*U + if (prev_residual == std::numeric_limits::max()) { + multiply_matrices(kh, thandle, L_row_map, L_entries, L_values, + U_row_map, U_entries, U_values, LU_row_map, + LU_entries, LU_values); + } + + // Identify candidate locations and add them + add_candidates(thandle, A_row_map, A_entries, A_values, L_row_map, + L_entries, L_values, U_row_map, U_entries, U_values, + LU_row_map, LU_entries, LU_values, L_new_row_map, + L_new_entries, L_new_values, U_new_row_map, U_new_entries, + U_new_values); + + // Get transpose of U_new, needed for compute_l_u_factors + transpose_wrap(thandle, U_new_row_map, U_new_entries, U_new_values, + Ut_new_row_map, Ut_new_entries, Ut_new_values); + + // Do one sweep of the fixed-point ILU algorithm + compute_l_u_factors( + thandle, A_row_map, A_entries, A_values, L_new_row_map, L_new_entries, + L_new_values, U_new_row_map, U_new_entries, U_new_values, + Ut_new_row_map, Ut_new_entries, Ut_new_values, deterministic); + + // Filter smallest elements from L_new and U_new. Store result back + // in L and U. + { + const index_t l_nnz = L_new_values.extent(0); + const index_t u_nnz = U_new_values.extent(0); + + const auto l_filter_rank = std::max(0, l_nnz - l_nnz_limit - 1); + const auto u_filter_rank = std::max(0, u_nnz - u_nnz_limit - 1); + + const auto l_threshold = + threshold_select(L_new_values, l_filter_rank, V_copy); + const auto u_threshold = + threshold_select(U_new_values, u_filter_rank, V_copy); + + threshold_filter(thandle, l_threshold, L_new_row_map, L_new_entries, + L_new_values, L_row_map, L_entries, L_values); + + threshold_filter(thandle, u_threshold, U_new_row_map, U_new_entries, + U_new_values, U_row_map, U_entries, U_values); + } + + // Get transpose of U, needed for compute_l_u_factors. Store in Ut_new* + // since we aren't using those temporaries anymore + transpose_wrap(thandle, U_row_map, U_entries, U_values, Ut_new_row_map, + Ut_new_entries, Ut_new_values); + + // Do one sweep of the fixed-point ILU algorithm + compute_l_u_factors(thandle, A_row_map, A_entries, A_values, L_row_map, + L_entries, L_values, U_row_map, U_entries, U_values, + Ut_new_row_map, Ut_new_entries, Ut_new_values, + deterministic); + + // Compute residual and terminate if converged + { + const auto curr_residual = compute_residual_norm( + kh, thandle, A_row_map, A_entries, A_values, L_row_map, L_entries, + L_values, U_row_map, U_entries, U_values, R_row_map, R_entries, + R_values, LU_row_map, LU_entries, LU_values); + + if (karith::abs(prev_residual - curr_residual) <= + karith::abs(residual_norm_delta_stop)) { + converged = true; + } else { + prev_residual = curr_residual; + } + } + + ++itr; + } + + kh.destroy_spgemm_handle(); + kh.destroy_spadd_handle(); + } // end ilut_numeric + +}; // struct IlutWrap + +} // namespace Experimental +} // namespace Impl +} // namespace KokkosSparse + +#endif diff --git a/sparse/impl/KokkosSparse_par_ilut_numeric_spec.hpp b/sparse/impl/KokkosSparse_par_ilut_numeric_spec.hpp new file mode 100644 index 0000000000..56302c9d24 --- /dev/null +++ b/sparse/impl/KokkosSparse_par_ilut_numeric_spec.hpp @@ -0,0 +1,274 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSSPARSE_IMPL_PAR_ILUT_NUMERIC_SPEC_HPP_ +#define KOKKOSSPARSE_IMPL_PAR_ILUT_NUMERIC_SPEC_HPP_ + +#include +#include +#include +#include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosKernels_Handle.hpp" + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#include +#endif + +namespace KokkosSparse { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct par_ilut_numeric_eti_spec_avail { + enum : bool { value = false }; +}; + +} // namespace Impl +} // namespace KokkosSparse + +#define KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_AVAIL( \ + SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + template <> \ + struct par_ilut_numeric_eti_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ + Kokkos::View< \ + const OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const ORDINAL_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const SCALAR_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosSparse { +namespace Impl { + +// Unification layer +/// \brief Implementation of KokkosSparse::par_ilut_numeric + +template ::value, + bool eti_spec_avail = par_ilut_numeric_eti_spec_avail< + KernelHandle, ARowMapType, AEntriesType, AValuesType, LRowMapType, + LEntriesType, LValuesType, URowMapType, UEntriesType, + UValuesType>::value> +struct PAR_ILUT_NUMERIC { + static void par_ilut_numeric(KernelHandle *handle, + const ARowMapType &A_row_map, + const AEntriesType &A_entries, + const AValuesType &A_values, + LRowMapType &L_row_map, LEntriesType &L_entries, + LValuesType &L_values, URowMapType &U_row_map, + UEntriesType &U_entries, UValuesType &U_values, + bool deterministic = false); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +//! Full specialization of par_ilut_numeric +// Unification layer +template +struct PAR_ILUT_NUMERIC { + static void par_ilut_numeric(KernelHandle *handle, + const ARowMapType &A_row_map, + const AEntriesType &A_entries, + const AValuesType &A_values, + LRowMapType &L_row_map, LEntriesType &L_entries, + LValuesType &L_values, URowMapType &U_row_map, + UEntriesType &U_entries, UValuesType &U_values, + bool deterministic = false) { + auto par_ilut_handle = handle->get_par_ilut_handle(); + using Ilut = Experimental::IlutWrap< + typename std::remove_pointer::type>; + + Ilut::ilut_numeric(*handle, *par_ilut_handle, A_row_map, A_entries, + A_values, L_row_map, L_entries, L_values, U_row_map, + U_entries, U_values, deterministic); + } +}; + +#endif +} // namespace Impl +} // namespace KokkosSparse + +// +// Macro for declaration of full specialization of +// This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_DECL( \ + SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + extern template struct PAR_ILUT_NUMERIC< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ + Kokkos::View< \ + const OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const ORDINAL_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const SCALAR_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSSPARSE_PAR_ILUT_NUMERIC_ETI_SPEC_INST( \ + SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + template struct PAR_ILUT_NUMERIC< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ + Kokkos::View< \ + const OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const ORDINAL_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const SCALAR_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#include +#include + +#endif diff --git a/sparse/impl/KokkosSparse_par_ilut_symbolic_impl.hpp b/sparse/impl/KokkosSparse_par_ilut_symbolic_impl.hpp new file mode 100644 index 0000000000..db5654a4f1 --- /dev/null +++ b/sparse/impl/KokkosSparse_par_ilut_symbolic_impl.hpp @@ -0,0 +1,123 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSSPARSE_IMPL_PAR_ILUT_SYMBOLIC_HPP_ +#define KOKKOSSPARSE_IMPL_PAR_ILUT_SYMBOLIC_HPP_ + +/// \file KokkosSparse_par_ilut_symbolic_impl.hpp +/// \brief Implementation of the symbolic phase of sparse ILU(k). + +#include +#include +#include +#include +#include +#include + +//#define SYMBOLIC_OUTPUT_INFO + +namespace KokkosSparse { +namespace Impl { +namespace Experimental { + +template +void ilut_symbolic(IlutHandle& thandle, const ARowMapType& A_row_map_d, + const AEntriesType& A_entries_d, LRowMapType& L_row_map_d, + URowMapType& U_row_map_d) { + using execution_space = typename ARowMapType::execution_space; + using policy_type = Kokkos::TeamPolicy; + using member_type = typename policy_type::member_type; + using size_type = typename IlutHandle::size_type; + using Ilut = IlutWrap; + + const auto policy = thandle.get_default_team_policy(); + + // Sizing for the initial L/U approximation + Kokkos::parallel_for( + "symbolic sizing", policy, KOKKOS_LAMBDA(const member_type& team) { + const auto row_idx = team.league_rank(); + + const auto row_nnz_begin = A_row_map_d(row_idx); + const auto row_nnz_end = A_row_map_d(row_idx + 1); + + size_type nnzsL_temp = 0, nnzsU_temp = 0; + // Multi-reductions are not supported at the TeamThread level + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, row_nnz_begin, row_nnz_end), + [&](const size_type nnz, size_type& nnzsL_inner) { + const auto col_idx = A_entries_d(nnz); + nnzsL_inner += col_idx < row_idx; + }, + nnzsL_temp); + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, row_nnz_begin, row_nnz_end), + [&](const size_type nnz, size_type& nnzsU_inner) { + const auto col_idx = A_entries_d(nnz); + nnzsU_inner += col_idx > row_idx; + }, + nnzsU_temp); + + team.team_barrier(); + + Kokkos::single(Kokkos::PerTeam(team), [&]() { + L_row_map_d(row_idx) = nnzsL_temp + 1; + U_row_map_d(row_idx) = nnzsU_temp + 1; + }); + }); + + const size_type nnzsL = Ilut::prefix_sum(L_row_map_d); + const size_type nnzsU = Ilut::prefix_sum(U_row_map_d); + + thandle.set_nnzL(nnzsL); + thandle.set_nnzU(nnzsU); + +} // end ilut_symbolic + +} // namespace Experimental +} // namespace Impl +} // namespace KokkosSparse + +#endif diff --git a/sparse/impl/KokkosSparse_par_ilut_symbolic_spec.hpp b/sparse/impl/KokkosSparse_par_ilut_symbolic_spec.hpp new file mode 100644 index 0000000000..2ba47683c5 --- /dev/null +++ b/sparse/impl/KokkosSparse_par_ilut_symbolic_spec.hpp @@ -0,0 +1,206 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSSPARSE_IMPL_PAR_ILUT_SYMBOLIC_SPEC_HPP_ +#define KOKKOSSPARSE_IMPL_PAR_ILUT_SYMBOLIC_SPEC_HPP_ + +#include +#include +#include +#include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosKernels_Handle.hpp" + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosSparse { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct par_ilut_symbolic_eti_spec_avail { + enum : bool { value = false }; +}; + +} // namespace Impl +} // namespace KokkosSparse + +#define KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_AVAIL( \ + SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + template <> \ + struct par_ilut_symbolic_eti_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ + Kokkos::View< \ + const OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const ORDINAL_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosSparse { +namespace Impl { + +// Unification layer +/// \brief Implementation of KokkosSparse::par_ilut_symbolic + +template ::value, + bool eti_spec_avail = par_ilut_symbolic_eti_spec_avail< + KernelHandle, ARowMapType, AEntriesType, LRowMapType, + URowMapType>::value> +struct PAR_ILUT_SYMBOLIC { + static void par_ilut_symbolic(KernelHandle *handle, + const ARowMapType &A_row_map, + const AEntriesType &A_entries, + LRowMapType &L_row_map, URowMapType &U_row_map); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +//! Full specialization of par_ilut_symbolic +// Unification layer +template +struct PAR_ILUT_SYMBOLIC { + static void par_ilut_symbolic(KernelHandle *handle, + const ARowMapType &A_row_map, + const AEntriesType &A_entries, + LRowMapType &L_row_map, + URowMapType &U_row_map) { + auto par_ilut_handle = handle->get_par_ilut_handle(); + + Experimental::ilut_symbolic(*par_ilut_handle, A_row_map, A_entries, + L_row_map, U_row_map); + par_ilut_handle->set_symbolic_complete(); + } +}; +#endif +} // namespace Impl +} // namespace KokkosSparse + +// +// Macro for declaration of full specialization of +// This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_DECL( \ + SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + extern template struct PAR_ILUT_SYMBOLIC< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ + Kokkos::View< \ + const OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const ORDINAL_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSSPARSE_PAR_ILUT_SYMBOLIC_ETI_SPEC_INST( \ + SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ + MEM_SPACE_TYPE) \ + template struct PAR_ILUT_SYMBOLIC< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE, MEM_SPACE_TYPE>, \ + Kokkos::View< \ + const OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const ORDINAL_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + OFFSET_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#include +#include + +#endif diff --git a/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp b/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp index e6c7e05632..258df3b4dc 100644 --- a/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp +++ b/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp @@ -248,10 +248,10 @@ struct SPGEMM_NUMERIC< mkl_numeric(sh, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB, entriesB, valuesB, transposeB, row_mapC, entriesC, valuesC, handle->get_verbose()); + break; #else throw std::runtime_error("MKL was not enabled in this build!"); #endif - break; case SPGEMM_MKL2PHASE: mkl2phase_apply(sh, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB, entriesB, valuesB, transposeB, row_mapC, diff --git a/sparse/src/KokkosKernels_Handle.hpp b/sparse/src/KokkosKernels_Handle.hpp index a918dc61bf..40049c0c48 100644 --- a/sparse/src/KokkosKernels_Handle.hpp +++ b/sparse/src/KokkosKernels_Handle.hpp @@ -49,6 +49,7 @@ #include "KokkosSparse_spadd_handle.hpp" #include "KokkosSparse_sptrsv_handle.hpp" #include "KokkosSparse_spiluk_handle.hpp" +#include "KokkosSparse_par_ilut_handle.hpp" #include "KokkosKernels_default_types.hpp" #ifndef _KOKKOSKERNELHANDLE_HPP @@ -183,8 +184,9 @@ class KokkosKernelsHandle { this->spgemmHandle = right_side_handle.get_spgemm_handle(); this->spaddHandle = right_side_handle.get_spadd_handle(); - this->sptrsvHandle = right_side_handle.get_sptrsv_handle(); - this->spilukHandle = right_side_handle.get_spiluk_handle(); + this->sptrsvHandle = right_side_handle.get_sptrsv_handle(); + this->spilukHandle = right_side_handle.get_spiluk_handle(); + this->par_ilutHandle = right_side_handle.get_par_ilut_handle(); this->team_work_size = right_side_handle.get_set_team_work_size(); this->shared_memory_size = right_side_handle.get_shmem_size(); @@ -201,12 +203,13 @@ class KokkosKernelsHandle { is_owner_of_the_gs_sptrsvL_handle = false; is_owner_of_the_gs_sptrsvU_handle = false; // ---------------------------------------- // - is_owner_of_the_d2_gc_handle = false; - is_owner_of_the_gs_handle = false; - is_owner_of_the_spgemm_handle = false; - is_owner_of_the_spadd_handle = false; - is_owner_of_the_sptrsv_handle = false; - is_owner_of_the_spiluk_handle = false; + is_owner_of_the_d2_gc_handle = false; + is_owner_of_the_gs_handle = false; + is_owner_of_the_spgemm_handle = false; + is_owner_of_the_spadd_handle = false; + is_owner_of_the_sptrsv_handle = false; + is_owner_of_the_spiluk_handle = false; + is_owner_of_the_par_ilut_handle = false; // return *this; } @@ -301,6 +304,11 @@ class KokkosKernelsHandle { HandleTempMemorySpace, HandlePersistentMemorySpace> SPILUKHandleType; + typedef typename KokkosSparse::Experimental::PAR_ILUTHandle< + const_size_type, const_nnz_lno_t, const_nnz_scalar_t, HandleExecSpace, + HandleTempMemorySpace, HandlePersistentMemorySpace> + PAR_ILUTHandleType; + private: GraphColoringHandleType *gcHandle; GraphColorDistance2HandleType *gcHandle_d2; @@ -316,6 +324,7 @@ class KokkosKernelsHandle { SPADDHandleType *spaddHandle; SPTRSVHandleType *sptrsvHandle; SPILUKHandleType *spilukHandle; + PAR_ILUTHandleType *par_ilutHandle; int team_work_size; size_t shared_memory_size; @@ -338,6 +347,7 @@ class KokkosKernelsHandle { bool is_owner_of_the_spadd_handle; bool is_owner_of_the_sptrsv_handle; bool is_owner_of_the_spiluk_handle; + bool is_owner_of_the_par_ilut_handle; public: KokkosKernelsHandle() @@ -354,6 +364,7 @@ class KokkosKernelsHandle { spaddHandle(NULL), sptrsvHandle(NULL), spilukHandle(NULL), + par_ilutHandle(NULL), team_work_size(-1), shared_memory_size(16128), suggested_team_size(-1), @@ -374,7 +385,8 @@ class KokkosKernelsHandle { is_owner_of_the_spgemm_handle(true), is_owner_of_the_spadd_handle(true), is_owner_of_the_sptrsv_handle(true), - is_owner_of_the_spiluk_handle(true) {} + is_owner_of_the_spiluk_handle(true), + is_owner_of_the_par_ilut_handle(true) {} ~KokkosKernelsHandle() { this->destroy_gs_handle(); @@ -389,6 +401,7 @@ class KokkosKernelsHandle { this->destroy_spadd_handle(); this->destroy_sptrsv_handle(); this->destroy_spiluk_handle(); + this->destroy_par_ilut_handle(); } void set_verbose(bool verbose_) { this->KKVERBOSE = verbose_; } @@ -872,6 +885,23 @@ class KokkosKernelsHandle { } } + PAR_ILUTHandleType *get_par_ilut_handle() { return this->par_ilutHandle; } + void create_par_ilut_handle(size_type nrows, size_type nnzL = 0, + size_type nnzU = 0) { + this->destroy_par_ilut_handle(); + this->is_owner_of_the_par_ilut_handle = true; + this->par_ilutHandle = new PAR_ILUTHandleType(nrows, nnzL, nnzU); + this->par_ilutHandle->reset_handle(nrows, nnzL, nnzU); + this->par_ilutHandle->set_team_size(this->team_work_size); + this->par_ilutHandle->set_vector_size(this->vector_size); + } + void destroy_par_ilut_handle() { + if (is_owner_of_the_par_ilut_handle && this->par_ilutHandle != nullptr) { + delete this->par_ilutHandle; + this->par_ilutHandle = nullptr; + } + } + }; // end class KokkosKernelsHandle } // namespace Experimental diff --git a/sparse/src/KokkosSparse.hpp b/sparse/src/KokkosSparse.hpp index 20f33a9002..702db14344 100644 --- a/sparse/src/KokkosSparse.hpp +++ b/sparse/src/KokkosSparse.hpp @@ -60,3 +60,4 @@ #include "KokkosSparse_trsv.hpp" #include "KokkosSparse_spgemm.hpp" #include "KokkosSparse_gauss_seidel.hpp" +#include "KokkosSparse_par_ilut.hpp" diff --git a/sparse/src/KokkosSparse_par_ilut.hpp b/sparse/src/KokkosSparse_par_ilut.hpp new file mode 100644 index 0000000000..8136d3b064 --- /dev/null +++ b/sparse/src/KokkosSparse_par_ilut.hpp @@ -0,0 +1,482 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file KokkosSparse_par_ilut.hpp +/// \brief Parallel threshold incomplete LU factorization ILU(t) +/// +/// This file provides KokkosSparse::par_ilut. This function performs a +/// local (no MPI) sparse ILU(t) on matrices stored in +/// compressed row sparse ("Crs") format. It is expected that symbolic +/// is called before numeric. The numeric function offers a deterministic +/// flag that will force the function to have deterministic results. This +/// is useful for testing but incurs a big performance penalty. +/// +/// This algorithm is described in the paper: +/// PARILUT - A New Parallel Threshold ILU Factorization - Anzt, Chow, Dongarra + +#ifndef KOKKOSSPARSE_PAR_ILUT_HPP_ +#define KOKKOSSPARSE_PAR_ILUT_HPP_ + +#include + +#include "KokkosKernels_helpers.hpp" +#include "KokkosKernels_Error.hpp" +#include "KokkosSparse_par_ilut_symbolic_spec.hpp" +#include "KokkosSparse_par_ilut_numeric_spec.hpp" + +namespace KokkosSparse { +namespace Experimental { + +#define KOKKOSKERNELS_PAR_ILUT_SAME_TYPE(A, B) \ + std::is_same::type, \ + typename std::remove_const::type>::value + +template +void par_ilut_symbolic(KernelHandle* handle, ARowMapType& A_rowmap, + AEntriesType& A_entries, LRowMapType& L_rowmap, + URowMapType& U_rowmap) { + using size_type = typename KernelHandle::size_type; + using ordinal_type = typename KernelHandle::nnz_lno_t; + + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename ARowMapType::non_const_value_type, size_type), + "par_ilut_symbolic: A size_type must match KernelHandle " + "size_type (const doesn't matter)"); + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename AEntriesType::non_const_value_type, ordinal_type), + "par_ilut_symbolic: A entry type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename LRowMapType::non_const_value_type, size_type), + "par_ilut_symbolic: L size_type must match KernelHandle " + "size_type (const doesn't matter)"); + + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename URowMapType::non_const_value_type, size_type), + "par_ilut_symbolic: U size_type must match KernelHandle " + "size_type (const doesn't matter)"); + + static_assert(Kokkos::is_view::value, + "par_ilut_symbolic: A_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_symbolic: A_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_symbolic: L_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_symbolic: U_rowmap is not a Kokkos::View."); + + static_assert( + (int)LRowMapType::rank == (int)ARowMapType::rank, + "par_ilut_symbolic: The ranks of L_rowmap and A_rowmap do not match."); + + static_assert( + (int)LRowMapType::rank == (int)URowMapType::rank, + "par_ilut_symbolic: The ranks of L_rowmap and U_rowmap do not match."); + + static_assert(LRowMapType::rank == 1, + "par_ilut_symbolic: A_rowmap, L_rowmap and U_rowmap must all " + "have rank 1."); + + static_assert(std::is_same::value, + "par_ilut_symbolic: The output L_rowmap must be nonconst."); + static_assert(std::is_same::value, + "par_ilut_symbolic: The output U_rowmap must be nonconst."); + static_assert(std::is_same::value, + "par_ilut_symbolic: Views LRowMapType and ARowMapType have " + "different device_types."); + static_assert(std::is_same::value, + "par_ilut_symbolic: Views LRowMapType and URowMapType have " + "different device_types."); + + static_assert( + std::is_same< + typename LRowMapType::device_type::execution_space, + typename KernelHandle::PAR_ILUTHandleType::execution_space>::value, + "par_ilut_symbolic: KernelHandle and Views have different execution " + "spaces."); + + using c_size_t = typename KernelHandle::const_size_type; + using c_lno_t = typename KernelHandle::const_nnz_lno_t; + using c_scalar_t = typename KernelHandle::const_nnz_scalar_t; + + using c_exec_t = typename KernelHandle::HandleExecSpace; + using c_temp_t = typename KernelHandle::HandleTempMemorySpace; + using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; + + using const_handle_type = + typename KokkosKernels::Experimental::KokkosKernelsHandle< + c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; + + const_handle_type tmp_handle(*handle); + + using ARowMap_Internal = Kokkos::View< + typename ARowMapType::const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename ARowMapType::device_type, + Kokkos::MemoryTraits >; + + using AEntries_Internal = Kokkos::View< + typename AEntriesType::const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout< + AEntriesType>::array_layout, + typename AEntriesType::device_type, + Kokkos::MemoryTraits >; + + using LRowMap_Internal = Kokkos::View< + typename LRowMapType::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename LRowMapType::device_type, + Kokkos::MemoryTraits >; + + using URowMap_Internal = Kokkos::View< + typename URowMapType::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename URowMapType::device_type, + Kokkos::MemoryTraits >; + + ARowMap_Internal A_rowmap_i = A_rowmap; + AEntries_Internal A_entries_i = A_entries; + LRowMap_Internal L_rowmap_i = L_rowmap; + URowMap_Internal U_rowmap_i = U_rowmap; + + KokkosSparse::Impl::PAR_ILUT_SYMBOLIC< + const_handle_type, ARowMap_Internal, AEntries_Internal, LRowMap_Internal, + URowMap_Internal>::par_ilut_symbolic(&tmp_handle, A_rowmap_i, A_entries_i, + L_rowmap_i, U_rowmap_i); + +} // par_ilut_symbolic + +template +void par_ilut_numeric(KernelHandle* handle, ARowMapType& A_rowmap, + AEntriesType& A_entries, AValuesType& A_values, + LRowMapType& L_rowmap, LEntriesType& L_entries, + LValuesType& L_values, URowMapType& U_rowmap, + UEntriesType& U_entries, UValuesType& U_values, + bool deterministic) { + using size_type = typename KernelHandle::size_type; + using ordinal_type = typename KernelHandle::nnz_lno_t; + using scalar_type = typename KernelHandle::nnz_scalar_t; + + static_assert( + KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename ARowMapType::non_const_value_type, size_type), + "par_ilut_numeric: A size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename AEntriesType::non_const_value_type, ordinal_type), + "par_ilut_numeric: A entry type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename AValuesType::value_type, scalar_type), + "par_ilut_numeric: A scalar type must match KernelHandle entry " + "type (aka nnz_scalar_t, and const doesn't matter)"); + + static_assert( + KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename LRowMapType::non_const_value_type, size_type), + "par_ilut_numeric: L size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename LEntriesType::non_const_value_type, ordinal_type), + "par_ilut_numeric: L entry type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename LValuesType::value_type, scalar_type), + "par_ilut_numeric: L scalar type must match KernelHandle entry " + "type (aka nnz_scalar_t, and const doesn't matter)"); + + static_assert( + KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename URowMapType::non_const_value_type, size_type), + "par_ilut_numeric: U size_type must match KernelHandle size_type " + "(const doesn't matter)"); + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename UEntriesType::non_const_value_type, ordinal_type), + "par_ilut_numeric: U entry type must match KernelHandle entry " + "type (aka nnz_lno_t, and const doesn't matter)"); + static_assert(KOKKOSKERNELS_PAR_ILUT_SAME_TYPE( + typename UValuesType::value_type, scalar_type), + "par_ilut_numeric: U scalar type must match KernelHandle entry " + "type (aka nnz_scalar_t, and const doesn't matter)"); + + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: A_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: A_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: A_values is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: L_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: L_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: L_values is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: U_rowmap is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: U_entries is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "par_ilut_numeric: U_values is not a Kokkos::View."); + + static_assert( + (int)LRowMapType::rank == (int)ARowMapType::rank, + "par_ilut_numeric: The ranks of L_rowmap and A_rowmap do not match."); + static_assert( + (int)LEntriesType::rank == (int)AEntriesType::rank, + "par_ilut_numeric: The ranks of L_entries and A_entries do not match."); + static_assert( + (int)LValuesType::rank == (int)AValuesType::rank, + "par_ilut_numeric: The ranks of L_values and A_values do not match."); + + static_assert( + (int)LRowMapType::rank == (int)URowMapType::rank, + "par_ilut_numeric: The ranks of L_rowmap and U_rowmap do not match."); + static_assert( + (int)LEntriesType::rank == (int)UEntriesType::rank, + "par_ilut_numeric: The ranks of L_entries and U_entries do not match."); + static_assert( + (int)LValuesType::rank == (int)UValuesType::rank, + "par_ilut_numeric: The ranks of L_values and U_values do not match."); + + static_assert(LRowMapType::rank == 1, + "par_ilut_numeric: A_rowmap, L_rowmap and U_rowmap must all " + "have rank 1."); + static_assert(LEntriesType::rank == 1, + "par_ilut_numeric: A_entries, L_entries and U_entries must all " + "have rank 1."); + static_assert(LValuesType::rank == 1, + "par_ilut_numeric: A_values, L_values and U_values must all " + "have rank 1."); + + static_assert( + std::is_same::value, + "par_ilut_numeric: The output L_entries must be nonconst."); + static_assert(std::is_same::value, + "par_ilut_numeric: The output L_values must be nonconst."); + static_assert( + std::is_same::value, + "par_ilut_numeric: The output U_entries must be nonconst."); + static_assert(std::is_same::value, + "par_ilut_numeric: The output U_values must be nonconst."); + + static_assert(std::is_same::value, + "par_ilut_numeric: Views LRowMapType and ARowMapType have " + "different device_types."); + static_assert(std::is_same::value, + "par_ilut_numeric: Views LEntriesType and AEntriesType have " + "different device_types."); + static_assert(std::is_same::value, + "par_ilut_numeric: Views LValuesType and AValuesType have " + "different device_types."); + + static_assert(std::is_same::value, + "par_ilut_numeric: Views LRowMapType and URowMapType have " + "different device_types."); + static_assert(std::is_same::value, + "par_ilut_numeric: Views LEntriesType and UEntriesType have " + "different device_types."); + static_assert(std::is_same::value, + "par_ilut_numeric: Views LValuesType and UValuesType have " + "different device_types."); + + static_assert( + std::is_same< + typename LRowMapType::device_type::execution_space, + typename KernelHandle::PAR_ILUTHandleType::execution_space>::value, + "par_ilut_numeric: KernelHandle and Views have different execution " + "spaces."); + static_assert( + std::is_same< + typename LEntriesType::device_type::execution_space, + typename KernelHandle::PAR_ILUTHandleType::execution_space>::value, + "par_ilut_numeric: KernelHandle and Views have different execution " + "spaces."); + static_assert( + std::is_same< + typename LValuesType::device_type::execution_space, + typename KernelHandle::PAR_ILUTHandleType::execution_space>::value, + "par_ilut_numeric: KernelHandle and Views have different execution " + "spaces."); + + static_assert( + std::is_same::value, + "par_ilut_numeric: rowmap and entries have different device types."); + static_assert( + std::is_same::value, + "par_ilut_numeric: rowmap and values have different device types."); + + // Check if symbolic has been called + if (handle->get_par_ilut_handle()->is_symbolic_complete() == false) { + std::ostringstream os; + os << "KokkosSparse::Experimental::par_ilut_numeric: par_ilut_symbolic " + "must be " + "called before par_ilut_numeric."; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + using c_size_t = typename KernelHandle::const_size_type; + using c_lno_t = typename KernelHandle::const_nnz_lno_t; + using c_scalar_t = typename KernelHandle::const_nnz_scalar_t; + + using c_exec_t = typename KernelHandle::HandleExecSpace; + using c_temp_t = typename KernelHandle::HandleTempMemorySpace; + using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; + + using const_handle_type = + typename KokkosKernels::Experimental::KokkosKernelsHandle< + c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; + + const_handle_type tmp_handle(*handle); + + using ARowMap_Internal = Kokkos::View< + typename ARowMapType::const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename ARowMapType::device_type, + Kokkos::MemoryTraits >; + + using AEntries_Internal = Kokkos::View< + typename AEntriesType::const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout< + AEntriesType>::array_layout, + typename AEntriesType::device_type, + Kokkos::MemoryTraits >; + + using AValues_Internal = Kokkos::View< + typename AValuesType::const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename AValuesType::device_type, + Kokkos::MemoryTraits >; + + using LRowMap_Internal = Kokkos::View< + typename LRowMapType::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename LRowMapType::device_type, + Kokkos::MemoryTraits >; + + using LEntries_Internal = + Kokkos::View::array_layout, + typename LEntriesType::device_type, + Kokkos::MemoryTraits >; + + using LValues_Internal = Kokkos::View< + typename LValuesType::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename LValuesType::device_type, + Kokkos::MemoryTraits >; + + using URowMap_Internal = Kokkos::View< + typename URowMapType::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename URowMapType::device_type, + Kokkos::MemoryTraits >; + + using UEntries_Internal = + Kokkos::View::array_layout, + typename UEntriesType::device_type, + Kokkos::MemoryTraits >; + + using UValues_Internal = Kokkos::View< + typename UValuesType::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename UValuesType::device_type, + Kokkos::MemoryTraits >; + + ARowMap_Internal A_rowmap_i = A_rowmap; + AEntries_Internal A_entries_i = A_entries; + AValues_Internal A_values_i = A_values; + LRowMap_Internal L_rowmap_i = L_rowmap; + LEntries_Internal L_entries_i = L_entries; + LValues_Internal L_values_i = L_values; + URowMap_Internal U_rowmap_i = U_rowmap; + UEntries_Internal U_entries_i = U_entries; + UValues_Internal U_values_i = U_values; + + KokkosSparse::Impl::PAR_ILUT_NUMERIC< + const_handle_type, ARowMap_Internal, AEntries_Internal, AValues_Internal, + LRowMap_Internal, LEntries_Internal, LValues_Internal, URowMap_Internal, + UEntries_Internal, + UValues_Internal>::par_ilut_numeric(&tmp_handle, A_rowmap_i, A_entries_i, + A_values_i, L_rowmap_i, L_entries_i, + L_values_i, U_rowmap_i, U_entries_i, + U_values_i, deterministic); + + // These may have been resized + L_entries = L_entries_i; + L_values = L_values_i; + U_entries = U_entries_i; + U_values = U_values_i; + +} // par_ilut_numeric + +} // namespace Experimental +} // namespace KokkosSparse + +#undef KOKKOSKERNELS_PAR_ILUT_SAME_TYPE + +#endif // KOKKOSSPARSE_PAR_ILUT_HPP_ diff --git a/sparse/src/KokkosSparse_par_ilut_handle.hpp b/sparse/src/KokkosSparse_par_ilut_handle.hpp new file mode 100644 index 0000000000..d8bc6b11f2 --- /dev/null +++ b/sparse/src/KokkosSparse_par_ilut_handle.hpp @@ -0,0 +1,196 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include + +#ifndef _PAR_ILUTHANDLE_HPP +#define _PAR_ILUTHANDLE_HPP + +#define KEEP_DIAG + +namespace KokkosSparse { +namespace Experimental { + +template +class PAR_ILUTHandle { + public: + using HandleExecSpace = ExecutionSpace; + using HandleTempMemorySpace = TemporaryMemorySpace; + using HandlePersistentMemorySpace = PersistentMemorySpace; + + using execution_space = ExecutionSpace; + using memory_space = HandlePersistentMemorySpace; + using TeamPolicy = Kokkos::TeamPolicy; + using RangePolicy = Kokkos::RangePolicy; + + using size_type = typename std::remove_const::type; + using const_size_type = const size_type; + + using nnz_lno_t = typename std::remove_const::type; + using const_nnz_lno_t = const nnz_lno_t; + + using nnz_scalar_t = typename std::remove_const::type; + using const_nnz_scalar_t = const nnz_scalar_t; + + using float_t = typename Kokkos::ArithTraits::mag_type; + + using nnz_row_view_t = + typename Kokkos::View; + + using nnz_lno_view_t = + typename Kokkos::View; + + using nnz_value_view_t = + typename Kokkos::View; + + using signed_integral_t = typename std::make_signed< + typename nnz_row_view_t::non_const_value_type>::type; + + using signed_nnz_lno_view_t = + Kokkos::View; + + private: + size_type nrows; + size_type nnzL; + size_type nnzU; + size_type max_iter; + nnz_scalar_t residual_norm_delta_stop; + + bool symbolic_complete; + + int team_size; + int vector_size; + + float_t fill_in_limit; + + public: + PAR_ILUTHandle(const size_type nrows_, const size_type nnzL_ = 0, + const size_type nnzU_ = 0, const size_type max_iter_ = 1, + const nnz_scalar_t residual_norm_delta_stop_ = 0., + const float_t fill_in_limit_ = 0.75, + bool symbolic_complete_ = false) + : nrows(nrows_), + nnzL(nnzL_), + nnzU(nnzU_), + max_iter(max_iter_), + residual_norm_delta_stop(residual_norm_delta_stop_), + symbolic_complete(symbolic_complete_), + team_size(-1), + vector_size(-1), + fill_in_limit(fill_in_limit_) {} + + void reset_handle(const size_type nrows_, const size_type nnzL_, + const size_type nnzU_) { + set_nrows(nrows_); + set_nnzL(nnzL_); + set_nnzU(nnzU_); + reset_symbolic_complete(); + } + + KOKKOS_INLINE_FUNCTION + ~PAR_ILUTHandle() {} + + KOKKOS_INLINE_FUNCTION + size_type get_nrows() const { return nrows; } + + KOKKOS_INLINE_FUNCTION + void set_nrows(const size_type nrows_) { this->nrows = nrows_; } + + KOKKOS_INLINE_FUNCTION + size_type get_nnzL() const { return nnzL; } + + KOKKOS_INLINE_FUNCTION + void set_nnzL(const size_type nnzL_) { this->nnzL = nnzL_; } + + KOKKOS_INLINE_FUNCTION + size_type get_nnzU() const { return nnzU; } + + KOKKOS_INLINE_FUNCTION + void set_nnzU(const size_type nnzU_) { this->nnzU = nnzU_; } + + bool is_symbolic_complete() const { return symbolic_complete; } + + void set_symbolic_complete() { this->symbolic_complete = true; } + void reset_symbolic_complete() { this->symbolic_complete = false; } + + void set_team_size(const int ts) { this->team_size = ts; } + int get_team_size() const { return this->team_size; } + + void set_vector_size(const int vs) { this->vector_size = vs; } + int get_vector_size() const { return this->vector_size; } + + void set_max_iter(const size_type max_iter_) { this->max_iter = max_iter_; } + int get_max_iter() const { return this->max_iter; } + + void set_residual_norm_delta_stop( + const nnz_scalar_t residual_norm_delta_stop_) { + this->residual_norm_delta_stop = residual_norm_delta_stop_; + } + nnz_scalar_t get_residual_norm_delta_stop() const { + return this->residual_norm_delta_stop; + } + + void set_fill_in_limit(const float_t fill_in_limit_) { + this->fill_in_limit = fill_in_limit_; + } + float_t get_fill_in_limit() const { return this->fill_in_limit; } + + TeamPolicy get_default_team_policy() const { + if (team_size == -1) { + return TeamPolicy(nrows, Kokkos::AUTO); + } else { + return TeamPolicy(nrows, team_size); + } + } +}; + +} // namespace Experimental +} // namespace KokkosSparse + +#endif diff --git a/sparse/src/KokkosSparse_spadd.hpp b/sparse/src/KokkosSparse_spadd.hpp index 38bead14de..50c862f27c 100644 --- a/sparse/src/KokkosSparse_spadd.hpp +++ b/sparse/src/KokkosSparse_spadd.hpp @@ -57,8 +57,7 @@ namespace Experimental { // kernel handle has information about whether it is sorted add or not. template + typename blno_nnz_view_t_, typename clno_row_view_t_> void spadd_symbolic( KernelHandle* handle, const alno_row_view_t_ a_rowmap, const alno_nnz_view_t_ a_entries, const blno_row_view_t_ b_rowmap, @@ -222,13 +221,9 @@ void spadd_symbolic(KernelHandle* handle, const AMatrix& A, const BMatrix& B, row_map_type row_mapC( Kokkos::view_alloc(Kokkos::WithoutInitializing, "row map"), A.numRows() + 1); - KokkosSparse::Experimental::spadd_symbolic< - KernelHandle, typename AMatrix::row_map_type::const_type, - typename AMatrix::index_type::const_type, - typename BMatrix::row_map_type::const_type, - typename BMatrix::index_type::const_type, row_map_type, entries_type>( - handle, A.graph.row_map, A.graph.entries, B.graph.row_map, - B.graph.entries, row_mapC); + KokkosSparse::Experimental::spadd_symbolic(handle, A.graph.row_map, + A.graph.entries, B.graph.row_map, + B.graph.entries, row_mapC); // Now create and allocate the entries and values // views so we can build a graph and then matrix C diff --git a/sparse/src/KokkosSparse_spgemm_handle.hpp b/sparse/src/KokkosSparse_spgemm_handle.hpp index 7514082ef1..feb0e66afd 100644 --- a/sparse/src/KokkosSparse_spgemm_handle.hpp +++ b/sparse/src/KokkosSparse_spgemm_handle.hpp @@ -235,7 +235,7 @@ class SPGEMMHandle { }; #else struct cuSparseSpgemmHandleType { - cusparse_spgemm_handle_t handle; + cusparseHandle_t handle; cusparseOperation_t transA; cusparseOperation_t transB; cusparseMatDescr_t a_descr; diff --git a/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_avail.hpp b/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_avail.hpp new file mode 100644 index 0000000000..748c1ae1ae --- /dev/null +++ b/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_avail.hpp @@ -0,0 +1,62 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSPARSE_PAR_ILUT_NUMERIC_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSPARSE_PAR_ILUT_NUMERIC_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosSparse { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct par_ilut_numeric_tpl_spec_avail { + enum : bool { value = false }; +}; + +} // namespace Impl +} // namespace KokkosSparse + +#endif diff --git a/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_decl.hpp new file mode 100644 index 0000000000..f59c1bdb55 --- /dev/null +++ b/sparse/tpls/KokkosSparse_par_ilut_numeric_tpl_spec_decl.hpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSPARSE_PAR_ILUT_NUMERIC_TPL_SPEC_DECL_HPP_ +#define KOKKOSPARSE_PAR_ILUT_NUMERIC_TPL_SPEC_DECL_HPP_ + +namespace KokkosSparse { +namespace Impl {} +} // namespace KokkosSparse + +#endif diff --git a/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_avail.hpp b/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_avail.hpp new file mode 100644 index 0000000000..f63df5d638 --- /dev/null +++ b/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_avail.hpp @@ -0,0 +1,60 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSPARSE_PAR_ILUT_SYMBOLIC_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSPARSE_PAR_ILUT_SYMBOLIC_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosSparse { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct par_ilut_symbolic_tpl_spec_avail { + enum : bool { value = false }; +}; + +} // namespace Impl +} // namespace KokkosSparse + +#endif diff --git a/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_decl.hpp new file mode 100644 index 0000000000..e30dee508a --- /dev/null +++ b/sparse/tpls/KokkosSparse_par_ilut_symbolic_tpl_spec_decl.hpp @@ -0,0 +1,52 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSPARSE_PAR_ILUT_SYMBOLIC_TPL_SPEC_DECL_HPP_ +#define KOKKOSPARSE_PAR_ILUT_SYMBOLIC_TPL_SPEC_DECL_HPP_ + +namespace KokkosSparse { +namespace Impl {} +} // namespace KokkosSparse + +#endif diff --git a/sparse/unit_test/Test_Sparse.hpp b/sparse/unit_test/Test_Sparse.hpp index 33269b6b0e..cde0ede5e3 100644 --- a/sparse/unit_test/Test_Sparse.hpp +++ b/sparse/unit_test/Test_Sparse.hpp @@ -20,6 +20,7 @@ #include "Test_Sparse_spmv_bsr.hpp" #include "Test_Sparse_sptrsv.hpp" #include "Test_Sparse_trsv.hpp" +#include "Test_Sparse_par_ilut.hpp" #include "Test_Sparse_Transpose.hpp" #include "Test_Sparse_TestUtils_RandCscMat.hpp" #include "Test_Sparse_csc2csr.hpp" diff --git a/sparse/unit_test/Test_Sparse_par_ilut.hpp b/sparse/unit_test/Test_Sparse_par_ilut.hpp new file mode 100644 index 0000000000..85d2f8d7c9 --- /dev/null +++ b/sparse/unit_test/Test_Sparse_par_ilut.hpp @@ -0,0 +1,327 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +#include +#include + +#include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosKernels_IOUtils.hpp" +#include "KokkosBlas1_nrm2.hpp" +#include "KokkosSparse_spmv.hpp" +#include "KokkosSparse_par_ilut.hpp" + +#include + +using namespace KokkosSparse; +using namespace KokkosSparse::Experimental; +using namespace KokkosKernels; +using namespace KokkosKernels::Experimental; + +typedef Kokkos::complex kokkos_complex_double; +typedef Kokkos::complex kokkos_complex_float; + +namespace Test { + +template +std::vector> decompress_matrix( + Kokkos::View& row_map, + Kokkos::View& entries, + Kokkos::View& values) { + const size_type nrows = row_map.size() - 1; + std::vector> result; + result.resize(nrows); + for (auto& row : result) { + row.resize(nrows, 0.0); + } + + auto hrow_map = Kokkos::create_mirror_view(row_map); + auto hentries = Kokkos::create_mirror_view(entries); + auto hvalues = Kokkos::create_mirror_view(values); + Kokkos::deep_copy(hrow_map, row_map); + Kokkos::deep_copy(hentries, entries); + Kokkos::deep_copy(hvalues, values); + + for (size_type row_idx = 0; row_idx < nrows; ++row_idx) { + const size_type row_nnz_begin = hrow_map(row_idx); + const size_type row_nnz_end = hrow_map(row_idx + 1); + for (size_type row_nnz = row_nnz_begin; row_nnz < row_nnz_end; ++row_nnz) { + const lno_t col_idx = hentries(row_nnz); + const scalar_t value = hvalues(row_nnz); + result[row_idx][col_idx] = value; + } + } + + return result; +} + +template +void check_matrix(const std::string& name, + Kokkos::View& row_map, + Kokkos::View& entries, + Kokkos::View& values, + const std::vector>& expected) { + const auto decompressed_mtx = decompress_matrix(row_map, entries, values); + + const size_type nrows = row_map.size() - 1; + for (size_type row_idx = 0; row_idx < nrows; ++row_idx) { + for (size_type col_idx = 0; col_idx < nrows; ++col_idx) { + EXPECT_NEAR(expected[row_idx][col_idx], + decompressed_mtx[row_idx][col_idx], 0.01) + << "Failed check is: " << name << "[" << row_idx << "][" << col_idx + << "]"; + } + } +} + +template +void print_matrix(const std::vector>& matrix) { + for (const auto& row : matrix) { + for (const auto& item : row) { + std::printf("%.2f ", item); + } + std::cout << std::endl; + } +} + +template +void run_test_par_ilut() { + typedef Kokkos::View RowMapType; + typedef Kokkos::View EntriesType; + typedef Kokkos::View ValuesType; + + // Simple test fixture A + std::vector> A = {{1., 6., 4., 7.}, + {2., -5., 0., 8.}, + {0.5, -3., 6., 0.}, + {0.2, -0.5, -9., 0.}}; + + const scalar_t ZERO = scalar_t(0); + + const size_type nrows = A.size(); + + // Count A nnz's + size_type nnz = 0; + for (size_type row_idx = 0; row_idx < nrows; ++row_idx) { + for (size_type col_idx = 0; col_idx < nrows; ++col_idx) { + if (A[row_idx][col_idx] != ZERO) { + ++nnz; + } + } + } + + // Allocate device CRS views for A + RowMapType row_map("row_map", nrows + 1); + EntriesType entries("entries", nnz); + ValuesType values("values", nnz); + + // Create host mirror views for CRS A + auto hrow_map = Kokkos::create_mirror_view(row_map); + auto hentries = Kokkos::create_mirror_view(entries); + auto hvalues = Kokkos::create_mirror_view(values); + + // Compress A into CRS (host views) + size_type curr_nnz = 0; + for (size_type row_idx = 0; row_idx < nrows; ++row_idx) { + for (size_type col_idx = 0; col_idx < nrows; ++col_idx) { + if (A[row_idx][col_idx] != ZERO) { + hentries(curr_nnz) = col_idx; + hvalues(curr_nnz) = A[row_idx][col_idx]; + ++curr_nnz; + } + hrow_map(row_idx + 1) = curr_nnz; + } + } + + // Copy host A CRS views to device A CRS views + Kokkos::deep_copy(row_map, hrow_map); + Kokkos::deep_copy(entries, hentries); + Kokkos::deep_copy(values, hvalues); + + // Make kernel handle + typedef KokkosKernels::Experimental::KokkosKernelsHandle< + size_type, lno_t, scalar_t, typename device::execution_space, + typename device::memory_space, typename device::memory_space> + KernelHandle; + + KernelHandle kh; + + kh.create_par_ilut_handle(nrows); + + auto par_ilut_handle = kh.get_par_ilut_handle(); + + // Allocate L and U CRS views as outputs + RowMapType L_row_map("L_row_map", nrows + 1); + RowMapType U_row_map("U_row_map", nrows + 1); + + // Initial L/U approximations for A + par_ilut_symbolic(&kh, row_map, entries, L_row_map, U_row_map); + + const size_type nnzL = par_ilut_handle->get_nnzL(); + const size_type nnzU = par_ilut_handle->get_nnzU(); + + EXPECT_EQ(nnzL, 10); + EXPECT_EQ(nnzU, 8); + + EntriesType L_entries("L_entries", nnzL); + ValuesType L_values("L_values", nnzL); + EntriesType U_entries("U_entries", nnzU); + ValuesType U_values("U_values", nnzU); + + par_ilut_numeric(&kh, row_map, entries, values, L_row_map, L_entries, + L_values, U_row_map, U_entries, U_values, +#ifdef KOKKOS_ENABLE_SERIAL + true /*deterministic*/ +#else + false /*cannot ask for determinism*/ +#endif + ); + + // Use this to check LU + // std::vector > expected_LU = { + // {1.0, 6.0, 4.0, 7.0}, + // {2.0, 7.0, 8.0, 22.0}, + // {0.5, 18.0, 8.0, -20.5}, + // {0.2, 3.7, -53.2, -1.60} + // }; + + // check_matrix("LU numeric", L_row_map, L_entries, L_values, expected_LU); + + // Use these fixtures to test add_candidates + // std::vector > expected_L_candidates = { + // {1., 0., 0., 0.}, + // {2., 1., 0., 0.}, + // {0.50, -3., 1., 0.}, + // {0.20, -0.50, -9., 1.} + // }; + + // check_matrix("L numeric", L_row_map, L_entries, L_values, + // expected_L_candidates); + + // std::vector > expected_U_candidates = { + // {1., 6., 4., 7.}, + // {0., -5., -8., 8.}, + // {0., 0., 6., 20.50}, + // {0., 0., 0., 1.} + // }; + + // check_matrix("U numeric", U_row_map, U_entries, U_values, + // expected_U_candidates); + + // Use these fixtures to test compute_l_u_factors + // std::vector > expected_L_candidates = { + // {1., 0., 0., 0.}, + // {2., 1., 0., 0.}, + // {0.50, 0.35, 1., 0.}, + // {0.20, 0.10, -1.32, 1.} + // }; + + // check_matrix("L numeric", L_row_map, L_entries, L_values, + // expected_L_candidates); + + // std::vector > expected_U_candidates = { + // {1., 6., 4., 7.}, + // {0., -17., -8., -6.}, + // {0., 0., 6.82, -1.38}, + // {0., 0., 0., -2.62} + // }; + + // check_matrix("U numeric", U_row_map, U_entries, U_values, + // expected_U_candidates); + + // Serial is required for deterministic mode and the checks below cannot + // reliably pass without determinism. +#ifdef KOKKOS_ENABLE_SERIAL + + // Use these fixtures to test full numeric + std::vector> expected_L_candidates = { + {1., 0., 0., 0.}, + {2., 1., 0., 0.}, + {0.50, 0.35, 1., 0.}, + {0., 0., -1.32, 1.}}; + + check_matrix("L numeric", L_row_map, L_entries, L_values, + expected_L_candidates); + + std::vector> expected_U_candidates = { + {1., 6., 4., 7.}, + {0., -17., -8., -6.}, + {0., 0., 6.82, 0.}, + {0., 0., 0., 0.} // [3] = 0 for full alg, -2.62 for post-threshold only + }; + + check_matrix("U numeric", U_row_map, U_entries, U_values, + expected_U_candidates); + + // Checking + + kh.destroy_par_ilut_handle(); +#endif +} + +} // namespace Test + +template +void test_par_ilut() { + Test::run_test_par_ilut(); +} + +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, \ + sparse##_##par_ilut##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_par_ilut(); \ + } + +#define NO_TEST_COMPLEX + +#include + +#undef KOKKOSKERNELS_EXECUTE_TEST +#define NO_TEST_COMPLEX diff --git a/sparse/unit_test/Test_Sparse_spadd.hpp b/sparse/unit_test/Test_Sparse_spadd.hpp index 881f891837..84d30a7dde 100644 --- a/sparse/unit_test/Test_Sparse_spadd.hpp +++ b/sparse/unit_test/Test_Sparse_spadd.hpp @@ -114,12 +114,9 @@ void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, // initialized Kokkos::deep_copy(c_row_map, (size_type)5); auto addHandle = handle.get_spadd_handle(); - KokkosSparse::Experimental::spadd_symbolic< - KernelHandle, typename row_map_type::const_type, - typename entries_type::const_type, typename row_map_type::const_type, - typename entries_type::const_type, row_map_type, entries_type>( - &handle, A.graph.row_map, A.graph.entries, B.graph.row_map, - B.graph.entries, c_row_map); + KokkosSparse::Experimental::spadd_symbolic(&handle, A.graph.row_map, + A.graph.entries, B.graph.row_map, + B.graph.entries, c_row_map); size_type c_nnz = addHandle->get_c_nnz(); // Fill values, entries with incorrect incorret values_type c_values( @@ -127,15 +124,10 @@ void test_spadd(lno_t numRows, lno_t numCols, size_type minNNZ, Kokkos::deep_copy(c_values, ((typename KAT::mag_type)5) * KAT::one()); entries_type c_entries("C entries", c_nnz); Kokkos::deep_copy(c_entries, (lno_t)5); - KokkosSparse::Experimental::spadd_numeric< - KernelHandle, typename row_map_type::const_type, - typename entries_type::const_type, scalar_t, - typename values_type::const_type, typename row_map_type::const_type, - typename entries_type::const_type, scalar_t, - typename values_type::const_type, row_map_type, entries_type, - values_type>(&handle, A.graph.row_map, A.graph.entries, A.values, - KAT::one(), B.graph.row_map, B.graph.entries, B.values, - KAT::one(), c_row_map, c_entries, c_values); + KokkosSparse::Experimental::spadd_numeric( + &handle, A.graph.row_map, A.graph.entries, A.values, KAT::one(), + B.graph.row_map, B.graph.entries, B.values, KAT::one(), c_row_map, + c_entries, c_values); // done with handle // create C using CRS arrays crsMat_t C("C", numRows, numCols, c_nnz, c_values, c_row_map, c_entries);