diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 5b7d3654f2..ed426ed131 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -276,6 +276,13 @@ KOKKOSKERNELS_GENERATE_ETI(Blas1_rotmg rotmg TYPE_LISTS REAL_FLOATS LAYOUTS DEVICES ) +KOKKOSKERNELS_GENERATE_ETI(Blas1_swap swap + COMPONENTS blas + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) + KOKKOSKERNELS_GENERATE_ETI(Blas2_gemv gemv COMPONENTS blas HEADER_LIST ETI_HEADERS diff --git a/blas/eti/generated_specializations_cpp/swap/KokkosBlas1_swap_eti_spec_inst.cpp.in b/blas/eti/generated_specializations_cpp/swap/KokkosBlas1_swap_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..3c94c724b1 --- /dev/null +++ b/blas/eti/generated_specializations_cpp/swap/KokkosBlas1_swap_eti_spec_inst.cpp.in @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosBlas1_swap_spec.hpp" + +namespace KokkosBlas { +namespace Impl { +@BLAS1_SWAP_ETI_INST_BLOCK@ +} //IMPL +} //Kokkos diff --git a/blas/eti/generated_specializations_hpp/KokkosBlas1_swap_eti_spec_avail.hpp.in b/blas/eti/generated_specializations_hpp/KokkosBlas1_swap_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..fb16463a27 --- /dev/null +++ b/blas/eti/generated_specializations_hpp/KokkosBlas1_swap_eti_spec_avail.hpp.in @@ -0,0 +1,51 @@ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSBLAS1_SWAP_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS1_SWAP_ETI_SPEC_AVAIL_HPP_ + +namespace KokkosBlas { +namespace Impl { +@BLAS1_SWAP_ETI_AVAIL_BLOCK@ + } //IMPL +} //Kokkos +#endif diff --git a/blas/eti/generated_specializations_hpp/KokkosBlas1_swap_eti_spec_decl.hpp.in b/blas/eti/generated_specializations_hpp/KokkosBlas1_swap_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..91239f78b0 --- /dev/null +++ b/blas/eti/generated_specializations_hpp/KokkosBlas1_swap_eti_spec_decl.hpp.in @@ -0,0 +1,51 @@ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSBLAS1_SWAP_ETI_SPEC_DECL_HPP_ +#define KOKKOSBLAS1_SWAP_ETI_SPEC_DECL_HPP_ + +namespace KokkosBlas { +namespace Impl { +@BLAS1_SWAP_ETI_DECL_BLOCK@ + } //IMPL +} //Kokkos +#endif diff --git a/blas/impl/KokkosBlas1_swap_impl.hpp b/blas/impl/KokkosBlas1_swap_impl.hpp new file mode 100644 index 0000000000..a1affb5414 --- /dev/null +++ b/blas/impl/KokkosBlas1_swap_impl.hpp @@ -0,0 +1,81 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSBLAS1_SWAP_IMPL_HPP_ +#define KOKKOSBLAS1_SWAP_IMPL_HPP_ + +#include +#include + +namespace KokkosBlas { +namespace Impl { + +template +struct swap_functor { + using scalar_type = typename XVector::non_const_value_type; + + XVector X; + YVector Y; + + swap_functor(XVector const& X_, YVector const& Y_) : X(X_), Y(Y_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(int const entryIdx) const { + scalar_type const temp = Y(entryIdx); + Y(entryIdx) = X(entryIdx); + X(entryIdx) = temp; + } +}; + +template +void Swap_Invoke(ExecutionSpace const& space, XVector const& X, + YVector const& Y) { + Kokkos::RangePolicy swap_policy(space, 0, X.extent(0)); + swap_functor swap_func(X, Y); + Kokkos::parallel_for("KokkosBlas::swap", swap_policy, swap_func); +} + +} // namespace Impl +} // namespace KokkosBlas + +#endif // KOKKOSBLAS1_SWAP_IMPL_HPP_ diff --git a/blas/impl/KokkosBlas1_swap_spec.hpp b/blas/impl/KokkosBlas1_swap_spec.hpp new file mode 100644 index 0000000000..b1d1e55f12 --- /dev/null +++ b/blas/impl/KokkosBlas1_swap_spec.hpp @@ -0,0 +1,165 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS1_SWAP_SPEC_HPP_ +#define KOKKOSBLAS1_SWAP_SPEC_HPP_ + +#include +#include + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosBlas { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct swap_eti_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosBlas + +// +// Macro for declaration of full specialization availability +// KokkosBlas::Impl::Swap. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _INST macro below) across one or +// more .cpp files. +// +#define KOKKOSBLAS1_SWAP_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct swap_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosBlas { +namespace Impl { + +// Unification layer +template ::value, + bool eti_spec_avail = + swap_eti_spec_avail::value> +struct Swap { + static void swap(ExecutionSpace const& space, XVector const& X, + YVector const& Y); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +//! Full specialization of Swap. +template +struct Swap { + static void swap(ExecutionSpace const& space, XVector const& X, + YVector const& Y) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY + ? "KokkosBlas::swap[ETI]" + : "KokkosBlas::swap[noETI]"); +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION + if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) + printf("KokkosBlas1::swap<> ETI specialization for < %s, %s, %s >\n", + typeid(ExecutionSpace).name(), typeid(XVector).name(), + typeid(YVector).name()); + else { + printf("KokkosBlas1::swap<> non-ETI specialization for < %s, %s, %s >\n", + typeid(ExecutionSpace).name(), typeid(XVector).name(), + typeid(YVector).name()); + } +#endif + Swap_Invoke(space, X, Y); + Kokkos::Profiling::popRegion(); + } +}; +#endif + +} // namespace Impl +} // namespace KokkosBlas + +// +// Macro for declaration of full specialization of +// KokkosBlas::Impl::Swap. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSBLAS1_SWAP_ETI_SPEC_DECL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + extern template struct Swap< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +// +// Macro for definition of full specialization of +// KokkosBlas::Impl::Swap. This is NOT for users!!! We +// use this macro in one or more .cpp files in this directory. +// +#define KOKKOSBLAS1_SWAP_ETI_SPEC_INST(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template struct Swap< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#include +#include + +#endif // KOKKOSBLAS1_SWAP_SPEC_HPP_ diff --git a/blas/src/KokkosBlas1_swap.hpp b/blas/src/KokkosBlas1_swap.hpp new file mode 100644 index 0000000000..66553ae819 --- /dev/null +++ b/blas/src/KokkosBlas1_swap.hpp @@ -0,0 +1,150 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS1_SWAP_HPP_ +#define KOKKOSBLAS1_SWAP_HPP_ + +#include + +namespace KokkosBlas { + +/// \brief Swaps the entries of vectors x and y. +/// +/// \tparam execution_space an execution space to perform parallel work +/// \tparam XVector Type of the first vector x; a 1-D Kokkos::View. +/// \tparam YVector Type of the first vector y; a 1-D Kokkos::View. +/// +/// \param space [in] execution space passed to execution policies +/// \param x [in/out] 1-D View. +/// \param y [in/out] 1-D View. +/// +/// \return x and y with swapped values, note that this is akin to +/// performing a deep_copy, swapping pointers inside view +/// can only be performed if no aliasing, subviews, etc... +/// exist, which cannot be asserted by this function. +/// +/// This function is non-blocking unless the underlying TPL requested +/// at compile time is itself blocking +template +void swap(execution_space const& space, XVector const& x, YVector const& y) { + // Assert properties of XVector + static_assert(Kokkos::is_view::value, + "KokkosBlas::swap: XVector must be a Kokkos::View."); + static_assert(XVector::rank == 1, + "KokkosBlas::swap: " + "Input vector x must have rank 1."); + static_assert(std::is_same_v, + "KokkosBlas::swap: XVector must store non const values."); + static_assert( + Kokkos::SpaceAccessibility::accessible, + "swap: execution_space cannot access data in XVector"); + + // Assert properties of YVector, could probably use a function for this as + // XVector and YVector are required to have identical properties... + static_assert(Kokkos::is_view::value, + "KokkosBlas::swap: YVector must be a Kokkos::View."); + static_assert(YVector::rank == 1, + "KokkosBlas::swap: " + "Input vector y must have rank 1."); + static_assert(std::is_same_v, + "KokkosBlas::swap: YVector must store non const values."); + static_assert( + Kokkos::SpaceAccessibility::accessible, + "swap: execution_space cannot access data in YVector"); + + using XVector_Internal = Kokkos::View< + typename XVector::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits >; + using YVector_Internal = Kokkos::View< + typename YVector::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits >; + + XVector_Internal X(x); + YVector_Internal Y(y); + + // Runtime check of the length of X and Y + if (static_cast(X.extent(0)) != static_cast(Y.extent(0))) { + throw std::runtime_error("X and Y must have equal extents!"); + } + + Kokkos::Profiling::pushRegion("KokkosBlas::swap"); + // If X.extent(0) == 0, do nothing + if (X.extent(0) != 0) { + Impl::Swap::swap(space, + X, Y); + } + Kokkos::Profiling::popRegion(); +} + +/// \brief Swaps the entries of vectors x and y. +/// +/// \tparam XVector Type of the first vector x; a 1-D Kokkos::View. +/// \tparam YVector Type of the first vector y; a 1-D Kokkos::View. +/// +/// \param x [in/out] 1-D View. +/// \param y [in/out] 1-D View. +/// +/// \return x and y with swapped values. +/// +/// This function is non-blocking unless the underlying TPL requested +/// at compile time is itself blocking. Note that the kernel will be +/// executed on the default stream of the execution_space associted with x. +template +void swap(const XVector& x, const YVector& y) { + const typename XVector::execution_space space = + typename XVector::execution_space(); + swap(space, x, y); +} + +} // namespace KokkosBlas + +#endif // KOKKOSBLAS1_SWAP_HPP_ diff --git a/blas/tpls/KokkosBlas1_swap_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas1_swap_tpl_spec_avail.hpp new file mode 100644 index 0000000000..84d438f030 --- /dev/null +++ b/blas/tpls/KokkosBlas1_swap_tpl_spec_avail.hpp @@ -0,0 +1,193 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosBlas { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct swap_tpl_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosBlas + +namespace KokkosBlas { +namespace Impl { + +// Generic Host side BLAS (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS +#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ + template <> \ + struct swap_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +#ifdef KOKKOS_ENABLE_SERIAL +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial) +#endif + +#ifdef KOKKOS_ENABLE_OPENMP +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(double, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(float, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP) +#endif +#endif + +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(SCALAR, LAYOUT, EXECSPACE, \ + MEMSPACE) \ + template <> \ + struct swap_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace) + +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, + Kokkos::Cuda, Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace) + +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace) + +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(double, Kokkos::LayoutRight, + Kokkos::Cuda, Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(float, Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace) +#endif + +// rocBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS +#define KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, EXECSPACE, \ + MEMSPACE) \ + template <> \ + struct swap_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace) + +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutRight, + Kokkos::HIP, Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(float, Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace) +KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace) +#endif + +} // namespace Impl +} // namespace KokkosBlas +#endif // KOKKOSBLAS1_SWAP_TPL_SPEC_AVAIL_HPP_ diff --git a/blas/tpls/KokkosBlas1_swap_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_swap_tpl_spec_decl.hpp new file mode 100644 index 0000000000..dce52012e5 --- /dev/null +++ b/blas/tpls/KokkosBlas1_swap_tpl_spec_decl.hpp @@ -0,0 +1,590 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS1_SWAP_TPL_SPEC_DECL_HPP_ +#define KOKKOSBLAS1_SWAP_TPL_SPEC_DECL_HPP_ + +namespace KokkosBlas { +namespace Impl { + +namespace { +template +inline void swap_print_specialization() { +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION + printf("KokkosBlas::swap<> TPL Blas specialization for < %s, %s, %s >\n", + typeid(XVector).name(), typeid(YVector).name(), + typeid(ExecutionSpace).name); +#endif +} +} // namespace +} // namespace Impl +} // namespace KokkosBlas + +// Generic Host side BLAS (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS +#include "KokkosBlas_Host_tpl.hpp" + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& /*space*/, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_BLAS,double]"); \ + HostBlas::swap(X.extent_int(0), X.data(), 1, Y.data(), 1); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& /*space*/, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_BLAS,float]"); \ + HostBlas::swap(X.extent_int(0), X.data(), 1, Y.data(), 1); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& /*space*/, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::swap[TPL_BLAS,complex]"); \ + HostBlas>::swap( \ + X.extent_int(0), reinterpret_cast*>(X.data()), \ + 1, reinterpret_cast*>(Y.data()), 1); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& /*space*/, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::swap[TPL_BLAS,complex]"); \ + HostBlas>::swap( \ + X.extent_int(0), reinterpret_cast*>(X.data()), \ + 1, reinterpret_cast*>(Y.data()), 1); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#ifdef KOKKOS_ENABLE_SERIAL +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, false) + +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, false) + +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, false) + +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::Serial, false) +#endif + +#ifdef KOKKOS_ENABLE_OPENMP +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, false) + +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, false) + +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, false) + +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_BLAS(Kokkos::LayoutLeft, Kokkos::OpenMP, false) +#endif + +} // namespace Impl +} // namespace KokkosBlas + +#endif // KOKKOSKERNELS_ENABLE_TPL_BLAS + +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_CUBLAS,double]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasSetStream(singleton.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasDswap( \ + singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_CUBLAS,float]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasSetStream(singleton.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSswap( \ + singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::swap[TPL_CUBLAS,complex]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasSetStream(singleton.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasZswap(singleton.handle, X.extent_int(0), \ + reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::swap[TPL_CUBLAS,complex]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasSetStream(singleton.handle, space.cuda_stream())); \ + KOKKOS_CUBLAS_SAFE_CALL_IMPL( \ + cublasCswap(singleton.handle, X.extent_int(0), \ + reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, false) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace, false) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, false) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace, false) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaSpace, false) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaSpace, false) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, + Kokkos::CudaUVMSpace, false) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutRight, Kokkos::Cuda, + Kokkos::CudaUVMSpace, false) +} // namespace Impl +} // namespace KokkosBlas +#endif // KOKKOSKERNELS_ENABLE_TPL_CUBLAS + +// rocBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_ROCBLAS,double]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = \ + KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_set_stream(singleton.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_dswap( \ + singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::swap[TPL_ROCBLAS,float]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = \ + KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_set_stream(singleton.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_sswap( \ + singleton.handle, X.extent_int(0), X.data(), 1, Y.data(), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::swap[TPL_ROCBLAS,complex_double]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = \ + KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_set_stream(singleton.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zswap( \ + singleton.handle, X.extent_int(0), \ + reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(LAYOUT, EXECSPACE, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template <> \ + struct Swap*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using XVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using YVector = Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void swap(EXECSPACE const& space, XVector const& X, \ + YVector const& Y) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::swap[TPL_ROCBLAS,complex_float]"); \ + swap_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = \ + KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_set_stream(singleton.handle, space.hip_stream())); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_cswap( \ + singleton.handle, X.extent_int(0), \ + reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace, false) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_DSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace, false) + +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace, false) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_SSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace, false) + +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace, false) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_ZSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace, false) + +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutLeft, Kokkos::HIP, + Kokkos::HIPSpace, false) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace, true) +KOKKOSBLAS1_CSWAP_TPL_SPEC_DECL_ROCBLAS(Kokkos::LayoutRight, Kokkos::HIP, + Kokkos::HIPSpace, false) +} // namespace Impl +} // namespace KokkosBlas +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCBLAS + +#endif diff --git a/blas/tpls/KokkosBlas_Host_tpl.cpp b/blas/tpls/KokkosBlas_Host_tpl.cpp index c354998063..b847de087e 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.cpp +++ b/blas/tpls/KokkosBlas_Host_tpl.cpp @@ -155,7 +155,7 @@ void F77_BLAS_MANGLE(zrotg, ZROTG)(std::complex* a, std::complex* s); /// -/// rotmg +/// rotm /// void F77_BLAS_MANGLE(srotm, SROTM)(const int* n, float* X, const int* incx, float* Y, const int* incy, @@ -172,6 +172,20 @@ void F77_BLAS_MANGLE(srotmg, SROTMG)(float* d1, float* d2, float* x1, void F77_BLAS_MANGLE(drotmg, DROTMG)(double* d1, double* d2, double* x1, const double* y1, double* param); +/// +/// swap +/// +void F77_BLAS_MANGLE(sswap, SSWAP)(int const* N, float* X, int const* incx, + float* Y, int const* incy); +void F77_BLAS_MANGLE(dswap, DSWAP)(int const* N, double* X, int const* incx, + double* Y, int const* incy); +void F77_BLAS_MANGLE(cswap, CSWAP)(int const* N, std::complex* X, + int const* incx, std::complex* Y, + int const* incy); +void F77_BLAS_MANGLE(zswap, ZSWAP)(int const* N, std::complex* X, + int const* incx, std::complex* Y, + int const* incy); + /// /// Gemv /// @@ -400,6 +414,11 @@ void F77_BLAS_MANGLE(zscal, #define F77_FUNC_SROTMG F77_BLAS_MANGLE(srotmg, SROTMG) #define F77_FUNC_DROTMG F77_BLAS_MANGLE(drotmg, DROTMG) +#define F77_FUNC_SSWAP F77_BLAS_MANGLE(sswap, SSWAP) +#define F77_FUNC_DSWAP F77_BLAS_MANGLE(dswap, DSWAP) +#define F77_FUNC_CSWAP F77_BLAS_MANGLE(cswap, CSWAP) +#define F77_FUNC_ZSWAP F77_BLAS_MANGLE(zswap, ZSWAP) + #define F77_FUNC_SGEMV F77_BLAS_MANGLE(sgemv, SGEMV) #define F77_FUNC_DGEMV F77_BLAS_MANGLE(dgemv, DGEMV) #define F77_FUNC_CGEMV F77_BLAS_MANGLE(cgemv, CGEMV) @@ -494,6 +513,11 @@ void HostBlas::rotmg(float* d1, float* d2, float* x1, const float* y1, F77_FUNC_SROTMG(d1, d2, x1, y1, param); } template <> +void HostBlas::swap(int const N, float* X, int const incx, float* Y, + int const incy) { + F77_FUNC_SSWAP(&N, X, &incx, Y, &incy); +} +template <> void HostBlas::gemv(const char trans, int m, int n, const float alpha, const float* a, int lda, const float* b, int ldb, const float beta, @@ -602,6 +626,11 @@ void HostBlas::rotmg(double* d1, double* d2, double* x1, F77_FUNC_DROTMG(d1, d2, x1, y1, param); } template <> +void HostBlas::swap(int const N, double* X, int const incx, double* Y, + int const incy) { + F77_FUNC_DSWAP(&N, X, &incx, Y, &incy); +} +template <> void HostBlas::gemv(const char trans, int m, int n, const double alpha, const double* a, int lda, const double* b, int ldb, const double beta, @@ -717,6 +746,13 @@ void HostBlas >::rotg(std::complex* a, std::complex* s) { F77_FUNC_CROTG(a, b, c, s); } +template <> +void HostBlas >::swap(int const N, std::complex* X, + int const incx, + std::complex* Y, + int const incy) { + F77_FUNC_CSWAP(&N, X, &incx, Y, &incy); +} template <> void HostBlas >::gemv(const char trans, int m, int n, @@ -865,6 +901,13 @@ void HostBlas >::rotg(std::complex* a, std::complex* s) { F77_FUNC_ZROTG(a, b, c, s); } +template <> +void HostBlas >::swap(int const N, std::complex* X, + int const incx, + std::complex* Y, + int const incy) { + F77_FUNC_ZSWAP(&N, X, &incx, Y, &incy); +} template <> void HostBlas >::gemv( diff --git a/blas/tpls/KokkosBlas_Host_tpl.hpp b/blas/tpls/KokkosBlas_Host_tpl.hpp index a3fbf67b89..2c616c9d95 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.hpp +++ b/blas/tpls/KokkosBlas_Host_tpl.hpp @@ -86,6 +86,8 @@ struct HostBlas { static void rotmg(T *d1, T *d2, T *x1, const T *y1, T *param); + static void swap(int const N, T *X, int const incx, T *Y, int const incy); + static void gemv(const char trans, int m, int n, const T alpha, const T *a, int lda, const T *b, int ldb, const T beta, /* */ T *c, int ldc); diff --git a/blas/unit_test/Test_Blas.hpp b/blas/unit_test/Test_Blas.hpp index 94d7c58877..db65c05f26 100644 --- a/blas/unit_test/Test_Blas.hpp +++ b/blas/unit_test/Test_Blas.hpp @@ -25,6 +25,7 @@ #include "Test_Blas1_rotmg.hpp" #include "Test_Blas1_scal.hpp" #include "Test_Blas1_sum.hpp" +#include "Test_Blas1_swap.hpp" #include "Test_Blas1_update.hpp" // Serial Blas 1 diff --git a/blas/unit_test/Test_Blas1_swap.hpp b/blas/unit_test/Test_Blas1_swap.hpp new file mode 100644 index 0000000000..a7e4fff433 --- /dev/null +++ b/blas/unit_test/Test_Blas1_swap.hpp @@ -0,0 +1,96 @@ +#include "KokkosBlas1_swap.hpp" + +namespace Test { +namespace Impl { + +template +void test_swap(int const vector_length) { + using vector_type = VectorType; + using execution_space = typename vector_type::execution_space; + using scalar_type = typename VectorType::non_const_value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + + // Note that Xref and Yref need to always be copies of X and Y + // hence the use of create_mirror instead of create_mirror_view. + vector_type X("X", vector_length), Y("Y", vector_length); + typename vector_type::HostMirror Xref = Kokkos::create_mirror(Y); + typename vector_type::HostMirror Yref = Kokkos::create_mirror(X); + + // Setup values in X, Y and copy them to Xref and Yref + const scalar_type range = 10 * Kokkos::ArithTraits::one(); + Kokkos::Random_XorShift64_Pool rand_pool(13718); + Kokkos::fill_random(X, rand_pool, range); + Kokkos::fill_random(Y, rand_pool, range); + + Kokkos::deep_copy(Xref, Y); + Kokkos::deep_copy(Yref, X); + + KokkosBlas::swap(X, Y); + Kokkos::fence(); + + typename vector_type::HostMirror Xtest = Kokkos::create_mirror_view(X); + typename vector_type::HostMirror Ytest = Kokkos::create_mirror_view(Y); + Kokkos::deep_copy(Xtest, X); + Kokkos::deep_copy(Ytest, Y); + + const mag_type tol = 10 * Kokkos::ArithTraits::eps(); + for (int idx = 0; idx < vector_length; ++idx) { + Test::EXPECT_NEAR_KK_REL(Xtest(idx), Xref(idx), tol); + Test::EXPECT_NEAR_KK_REL(Ytest(idx), Yref(idx), tol); + } +} + +} // namespace Impl +} // namespace Test + +template +int test_swap() { + using Vector = Kokkos::View; + + Test::Impl::test_swap(0); + Test::Impl::test_swap(10); + Test::Impl::test_swap(256); + Test::Impl::test_swap(1024); + + return 0; +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, swap_float) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::swap_float"); + test_swap(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, swap_double) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::swap_double"); + test_swap(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, swap_complex_float) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::swap_complex_float"); + test_swap, TestExecSpace>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, swap_complex_double) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::swap_complex_double"); + test_swap, TestExecSpace>(); + Kokkos::Profiling::popRegion(); +} +#endif diff --git a/docs/developer/apidocs/blas1.rst b/docs/developer/apidocs/blas1.rst index bfeb7fd1bb..1a68066271 100644 --- a/docs/developer/apidocs/blas1.rst +++ b/docs/developer/apidocs/blas1.rst @@ -50,6 +50,11 @@ sum --- .. doxygenfunction:: KokkosBlas::sum(const RV &R, const XMV &X, typename std::enable_if::value, int>::type = 0) +swap +--- +.. doxygenfunction:: KokkosBlas::swap(execution_space const& space, XVector const& X, YVector const& Y) +.. doxygenfunction:: KokkosBlas::swap(XVector const& X, YVector const& Y) + update ------ .. doxygenfunction:: KokkosBlas::update