diff --git a/.gitignore b/.gitignore index d64726e92e..6dcc5d6a5d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,7 @@ TAGS #Clangd indexing compile_commands.json .cache/ -.vscode/ \ No newline at end of file +.vscode/ + +#MacOS hidden files +.DS_Store diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index ee91079378..f825a2184a 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -58,3 +58,10 @@ KOKKOSKERNELS_GENERATE_ETI(Lapack_trtri trtri SOURCE_LIST SOURCES TYPE_LISTS FLOATS LAYOUTS DEVICES ) + +KOKKOSKERNELS_GENERATE_ETI(Lapack_svd svd + COMPONENTS lapack + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) diff --git a/lapack/eti/generated_specializations_cpp/svd/KokkosLapack_svd_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/svd/KokkosLapack_svd_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..62dd75475f --- /dev/null +++ b/lapack/eti/generated_specializations_cpp/svd/KokkosLapack_svd_eti_spec_inst.cpp.in @@ -0,0 +1,26 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosLapack_svd_spec.hpp" + +namespace KokkosLapack { +namespace Impl { +@LAPACK_SVD_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_svd_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_svd_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..49e526b7e8 --- /dev/null +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_svd_eti_spec_avail.hpp.in @@ -0,0 +1,24 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_SVD_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_SVD_ETI_SPEC_AVAIL_HPP_ +namespace KokkosLapack { +namespace Impl { +@LAPACK_SVD_ETI_AVAIL_BLOCK@ + } //IMPL +} //Kokkos +#endif diff --git a/lapack/impl/KokkosLapack_svd_impl.hpp b/lapack/impl/KokkosLapack_svd_impl.hpp new file mode 100644 index 0000000000..a1748b7ab5 --- /dev/null +++ b/lapack/impl/KokkosLapack_svd_impl.hpp @@ -0,0 +1,34 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_IMPL_SVD_HPP_ +#define KOKKOSLAPACK_IMPL_SVD_HPP_ + +/// \file KokkosLapack_svd_impl.hpp +/// \brief Implementation(s) of dense linear solve. + +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// NOTE: Might add the implementation of KokkosLapack::svd later + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_IMPL_SVD_HPP diff --git a/lapack/impl/KokkosLapack_svd_spec.hpp b/lapack/impl/KokkosLapack_svd_spec.hpp new file mode 100644 index 0000000000..619057602a --- /dev/null +++ b/lapack/impl/KokkosLapack_svd_spec.hpp @@ -0,0 +1,155 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSLAPACK_IMPL_SVD_SPEC_HPP_ +#define KOKKOSLAPACK_IMPL_SVD_SPEC_HPP_ + +#include +#include +#include + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct svd_eti_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization availability +// KokkosLapack::Impl::SVD. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _INST macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_SVD_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct svd_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// Unification layer +/// \brief Implementation of KokkosLapack::svd. + +template ::value, + bool eti_spec_avail = svd_eti_spec_avail< + ExecutionSpace, AMatrix, SVector, UMatrix, VMatrix>::value> +struct SVD { + static void svd(const ExecutionSpace &space, const AMatrix &A, + const SVector &S, const UMatrix &U, const VMatrix &Vt); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +//! Full specialization of svd +// Unification layer +template +struct SVD { + static void svd(const ExecutionSpace & /* space */, const char * /* jobu */, + const char * /* jobvt */, const AMatrix & /* A */, + const SVector & /* S */, const UMatrix & /* U */, + const VMatrix & /* Vt */) { + // NOTE: Might add the implementation of KokkosLapack::svd later + throw std::runtime_error( + "No fallback implementation of SVD (singular value decomposition) " + "exists. Enable LAPACK, CUSOLVER or ROCSOLVER TPL to use this " + "function."); + } +}; + +#endif +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization of +// KokkosLapack::Impl::SVD. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_SVD_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct SVD< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#define KOKKOSLAPACK_SVD_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct SVD< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#include + +#endif // KOKKOSLAPACK_IMPL_SVD_SPEC_HPP_ diff --git a/lapack/src/KokkosLapack_svd.hpp b/lapack/src/KokkosLapack_svd.hpp new file mode 100644 index 0000000000..5bc98767c4 --- /dev/null +++ b/lapack/src/KokkosLapack_svd.hpp @@ -0,0 +1,191 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/// \file KokkosLapack_svd.hpp +/// \brief Singular Value Decomposition (SVD) +/// +/// This file provides KokkosLapack::svd. This function performs a +/// local (no MPI) singular value decomposition of the input matrix A +/// and returns the singular values and vectors dedending on input flags. + +#ifndef KOKKOSLAPACK_SVD_HPP_ +#define KOKKOSLAPACK_SVD_HPP_ + +#include + +#include "KokkosLapack_svd_spec.hpp" +#include "KokkosKernels_Error.hpp" + +namespace KokkosLapack { + +/// \brief Compute the Singular Value Decomposition of A = U*S*Vt +/// +/// \tparam ExecutionSpace the space where the kernel will run. +/// \tparam AMatrix (mxn) matrix as a rank-2 Kokkos::View. +/// \tparam SVector min(m,n) vector as a rank-1 Kokkos::View +/// \tparam UMatrix (mxm) matrix as a rank-2 Kokkos::View +/// \tparam VMatrix () matrix as a rank-2 Kokkos::View +/// +/// \param space [in] execution space instance used to specified how to execute +/// the svd kernels. +/// \param A [in] An m-by-n matrix to be decomposed using its singular values. +/// \param S [out] Vector of the min(m, n) singular values of A. +/// \param U [out] the first min(m, n) columns of U are the left singular +/// vectors of A. \param Vt [out] the first min(m, n) columns of Vt are the +/// right singular vectors of A. +/// +template +void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], + const AMatrix& A, const SVector& S, const UMatrix& U, + const VMatrix& Vt) { + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::is_view::value, + "KokkosLapack::svd: A must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::svd: S must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::svd: U must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::svd: Vt must be a Kokkos::View."); + static_assert(AMatrix::rank() == 2, "KokkosLapack::svd: A must have rank 2."); + static_assert(SVector::rank() == 1, "KokkosLapack::svd: S must have rank 1."); + static_assert(UMatrix::rank() == 2, "KokkosLapack::svd: U must have rank 2."); + static_assert(VMatrix::rank() == 2, + "KokkosLapack::svd: Vt must have rank 2."); + + int64_t m = A.extent(0); + int64_t n = A.extent(1); + int64_t rankA = Kokkos::min(m, n); + + // Check validity of output views sizes + bool is_extent_invalid = false; + std::ostringstream os; + if (S.extent_int(0) != rankA) { + is_extent_invalid = true; + os << "KokkosLapack::svd: S has extent " << S.extent(0) << ", instead of " + << rankA << ".\n"; + } + if (U.extent_int(0) != m || U.extent_int(1) != m) { + is_extent_invalid = true; + os << "KokkosLapack::svd: U has extents (" << U.extent(0) << ", " + << U.extent(1) << ") instead of (" << m << ", " << m << ").\n"; + } + if (Vt.extent_int(0) != n || Vt.extent_int(1) != n) { + is_extent_invalid = true; + os << "KokkosLapack::svd: V has extents (" << Vt.extent(0) << ", " + << Vt.extent(1) << ") instead of (" << n << ", " << n << ").\n"; + } + if (is_extent_invalid) { + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + // Check the jobu and jobvt control flags + // The only valid options there are 'A', 'S', 'O' and 'N' + const bool is_jobu_invalid = + !((jobu[0] == 'A') || (jobu[0] == 'a') || (jobu[0] == 'S') || + (jobu[0] == 's') || (jobu[0] == 'O') || (jobu[0] == 'o') || + (jobu[0] == 'N') || (jobu[0] == 'n')); + + const bool is_jobvt_invalid = + !((jobvt[0] == 'A') || (jobvt[0] == 'a') || (jobvt[0] == 'S') || + (jobvt[0] == 's') || (jobvt[0] == 'O') || (jobvt[0] == 'o') || + (jobvt[0] == 'N') || (jobvt[0] == 'n')); + + if (is_jobu_invalid && is_jobvt_invalid) { + std::ostringstream oss; + oss << "KokkosLapack::svd: both jobu and jobvt are invalid!\n" + << "Possible values are A, S, O or N, submitted values are " << jobu[0] + << " and " << jobvt[0] << "\n"; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + if (is_jobu_invalid) { + std::ostringstream oss; + oss << "KokkosLapack::svd: jobu is invalid!\n" + << "Possible values are A, S, O or N, submitted value is " << jobu[0] + << "\n"; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + if (is_jobvt_invalid) { + std::ostringstream oss; + oss << "KokkosLapack::svd: jobvt is invalid!\n" + << "Possible values are A, S, O or N, submitted value is " << jobvt[0] + << "\n"; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + using AMatrix_Internal = Kokkos::View< + typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, + typename AMatrix::device_type, Kokkos::MemoryTraits>; + + using SVector_Internal = Kokkos::View< + typename SVector::non_const_value_type*, typename SVector::array_layout, + typename SVector::device_type, Kokkos::MemoryTraits>; + + using UMatrix_Internal = Kokkos::View< + typename UMatrix::non_const_value_type**, typename UMatrix::array_layout, + typename UMatrix::device_type, Kokkos::MemoryTraits>; + + using VMatrix_Internal = Kokkos::View< + typename VMatrix::non_const_value_type**, typename VMatrix::array_layout, + typename VMatrix::device_type, Kokkos::MemoryTraits>; + + AMatrix_Internal A_i = A; + SVector_Internal S_i = S; + UMatrix_Internal U_i = U; + VMatrix_Internal Vt_i = Vt; + + KokkosLapack::Impl::SVD::svd(space, jobu, + jobvt, A_i, + S_i, U_i, + Vt_i); +} + +/// \brief Compute the Singular Value Decomposition of A = U*S*Vt +/// +/// \tparam AMatrix (mxn) matrix as a rank-2 Kokkos::View. +/// \tparam SVector min(m,n) vector as a rank-1 Kokkos::View +/// \tparam UMatrix (mxm) matrix as a rank-2 Kokkos::View +/// \tparam VMatrix () matrix as a rank-2 Kokkos::View +/// +/// \param A [in] An m-by-n matrix to be decomposed using its singular values. +/// \param S [out] Vector of the min(m, n) singular values of A. +/// \param U [out] the first min(m, n) columns of U are the left singular +/// vectors of A. \param Vt [out] the first min(m, n) columns of Vt are the +/// right singular vectors of A. +/// +template +void svd(const char jobu[], const char jobvt[], const AMatrix& A, + const SVector& S, const UMatrix& U, const VMatrix& Vt) { + typename AMatrix::execution_space space{}; + svd(space, jobu, jobvt, A, S, U, Vt); +} + +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_SVD_HPP_ diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index d629a17f1d..ee745f92e8 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -38,6 +38,27 @@ void F77_BLAS_MANGLE(cgesv, CGESV)(int*, int*, std::complex*, int*, int*, void F77_BLAS_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, int*, std::complex*, int*, int*); +/// +/// Gesvd +/// + +void F77_BLAS_MANGLE(sgesvd, SGESVD)(const char*, const char*, int*, int*, + float*, int*, float*, float*, int*, float*, + int*, float*, int*, int*); +void F77_BLAS_MANGLE(dgesvd, DGESVD)(const char*, const char*, int*, int*, + double*, int*, double*, double*, int*, + double*, int*, double*, int*, int*); +void F77_BLAS_MANGLE(cgesvd, CGESVD)(const char*, const char*, int*, int*, + std::complex*, int*, float*, + std::complex*, int*, + std::complex*, int*, + std::complex*, int*, float*, int*); +void F77_BLAS_MANGLE(zgesvd, + ZGESVD)(const char*, const char*, int*, int*, + std::complex*, int*, double*, + std::complex*, int*, std::complex*, + int*, std::complex*, int*, double*, int*); + /// /// Trtri /// @@ -64,6 +85,11 @@ void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, #define F77_FUNC_CGESV F77_BLAS_MANGLE(cgesv, CGESV) #define F77_FUNC_ZGESV F77_BLAS_MANGLE(zgesv, ZGESV) +#define F77_FUNC_SGESVD F77_BLAS_MANGLE(sgesvd, SGESVD) +#define F77_FUNC_DGESVD F77_BLAS_MANGLE(dgesvd, DGESVD) +#define F77_FUNC_CGESVD F77_BLAS_MANGLE(cgesvd, CGESVD) +#define F77_FUNC_ZGESVD F77_BLAS_MANGLE(zgesvd, ZGESVD) + #define F77_FUNC_STRTRI F77_BLAS_MANGLE(strtri, STRTRI) #define F77_FUNC_DTRTRI F77_BLAS_MANGLE(dtrtri, DTRTRI) #define F77_FUNC_CTRTRI F77_BLAS_MANGLE(ctrtri, CTRTRI) @@ -82,6 +108,14 @@ void HostLapack::gesv(int n, int rhs, float* a, int lda, int* ipiv, F77_FUNC_SGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> +void HostLapack::gesvd(const char jobu, const char jobvt, int m, int n, + float* a, int lda, float* s, float* u, int ldu, + float* vt, int ldvt, float* work, int lwork, + float* /*rwork*/, int info) { + F77_FUNC_SGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, + &lwork, &info); +} +template <> int HostLapack::trtri(const char uplo, const char diag, int n, const float* a, int lda) { int info = 0; @@ -99,6 +133,14 @@ void HostLapack::gesv(int n, int rhs, double* a, int lda, int* ipiv, F77_FUNC_DGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> +void HostLapack::gesvd(const char jobu, const char jobvt, int m, int n, + double* a, int lda, double* s, double* u, + int ldu, double* vt, int ldvt, double* work, + int lwork, double* /*rwork*/, int info) { + F77_FUNC_DGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, + &lwork, &info); +} +template <> int HostLapack::trtri(const char uplo, const char diag, int n, const double* a, int lda) { int info = 0; @@ -118,6 +160,14 @@ void HostLapack >::gesv(int n, int rhs, F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> +void HostLapack >::gesvd( + const char jobu, const char jobvt, int m, int n, std::complex* a, + int lda, float* s, std::complex* u, int ldu, std::complex* vt, + int ldvt, std::complex* work, int lwork, float* rwork, int info) { + F77_FUNC_CGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, + &lwork, rwork, &info); +} +template <> int HostLapack >::trtri(const char uplo, const char diag, int n, const std::complex* a, int lda) { @@ -138,6 +188,15 @@ void HostLapack >::gesv(int n, int rhs, F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> +void HostLapack >::gesvd( + const char jobu, const char jobvt, int m, int n, std::complex* a, + int lda, double* s, std::complex* u, int ldu, + std::complex* vt, int ldvt, std::complex* work, int lwork, + double* rwork, int info) { + F77_FUNC_ZGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, + &lwork, rwork, &info); +} +template <> int HostLapack >::trtri(const char uplo, const char diag, int n, const std::complex* a, diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp index d74099aaec..bb89d59c89 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -33,6 +33,11 @@ struct HostLapack { static void gesv(int n, int rhs, T *a, int lda, int *ipiv, T *b, int ldb, int info); + static void gesvd(const char jobu, const char jobvt, int m, int n, T *A, + int lda, typename Kokkos::ArithTraits::mag_type *S, T *U, + int ldu, T *Vt, int ldvt, T *work, int lwork, + typename Kokkos::ArithTraits::mag_type *rwork, int info); + static int trtri(const char uplo, const char diag, int n, const T *a, int lda); }; diff --git a/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp new file mode 100644 index 0000000000..648b51d94d --- /dev/null +++ b/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp @@ -0,0 +1,170 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct svd_tpl_spec_avail { + enum : bool { value = false }; +}; + +// LAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, EXECSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Serial) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::OpenMP) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Threads) +#endif + +#endif + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif // CUDAUVMSPACE +#endif // CUSOLVER + +// ROCSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +#endif // HIPMANAGEDSPACE +#endif // ROCSOLVER + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_HPP_ diff --git a/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp new file mode 100644 index 0000000000..5b6db1688c --- /dev/null +++ b/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp @@ -0,0 +1,504 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_SVD_TPL_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_SVD_TPL_SPEC_DECL_HPP_ + +#include "Kokkos_ArithTraits.hpp" + +namespace KokkosLapack { +namespace Impl { +template +inline void svd_print_specialization() { +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + if constexpr (std::is_same_v) { + printf( + "KokkosLapack::svd<> TPL Cusolver specialization for < %s , %s, %s, %s " + ">\n", + typeid(AMatrix).name(), typeid(SVector).name(), typeid(UMatrix).name(), + typeid(VMatrix).name()); + } +#endif +#endif +} +} // namespace Impl +} // namespace KokkosLapack + +// LAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK +#include "KokkosLapack_Host_tpl.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void lapackSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], + const char jobvt[], const AMatrix& A, const SVector& S, + const UMatrix& U, const VMatrix& Vt) { + using memory_space = typename AMatrix::memory_space; + using Scalar = typename AMatrix::non_const_value_type; + using Magnitude = typename SVector::non_const_value_type; + using ALayout_t = typename AMatrix::array_layout; + using ULayout_t = typename UMatrix::array_layout; + using VLayout_t = typename VMatrix::array_layout; + + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = std::is_same_v ? A.stride(0) + : A.stride(1); + const int ldu = std::is_same_v ? U.stride(0) + : U.stride(1); + const int ldvt = std::is_same_v + ? Vt.stride(0) + : Vt.stride(1); + + int lwork = -1, info = 0; + Kokkos::View rwork("svd rwork buffer", + 5 * Kokkos::min(m, n)); + Kokkos::View work("svd work buffer", 1); + HostLapack::gesvd('A', 'A', m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, work.data(), lwork, + rwork.data(), info); + + if constexpr (Kokkos::ArithTraits::is_complex) { + lwork = static_cast(work(0).real()); + } else { + lwork = static_cast(work(0)); + } + work = Kokkos::View("svd work buffer", lwork); + HostLapack::gesvd(jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), + U.data(), ldu, Vt.data(), ldvt, work.data(), lwork, + rwork.data(), info); +} + +#define KOKKOSLAPACK_SVD_LAPACK(SCALAR, LAYOUT, EXEC_SPACE) \ + template <> \ + struct SVD< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const EXEC_SPACE& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_LAPACK," #SCALAR \ + "]"); \ + svd_print_specialization(); \ + \ + lapackSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void cusolverSvdWrapper(const ExecutionSpace& space, const char jobu[], + const char jobvt[], const AMatrix& A, const SVector& S, + const UMatrix& U, const VMatrix& Vt) { + using memory_space = typename AMatrix::memory_space; + using Scalar = typename AMatrix::non_const_value_type; + using Magnitude = typename SVector::non_const_value_type; + using ALayout_t = typename AMatrix::array_layout; + using ULayout_t = typename UMatrix::array_layout; + using VLayout_t = typename VMatrix::array_layout; + + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = std::is_same_v ? A.stride(0) + : A.stride(1); + const int ldu = std::is_same_v ? U.stride(0) + : U.stride(1); + const int ldvt = std::is_same_v + ? Vt.stride(0) + : Vt.stride(1); + + int lwork = 0; + Kokkos::View info("svd info"); + Kokkos::View rwork("svd rwork buffer", + Kokkos::min(m, n) - 1); + + CudaLapackSingleton& s = CudaLapackSingleton::singleton(); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSetStream(s.handle, space.cuda_stream())); + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgesvd_bufferSize(s.handle, m, n, &lwork)); + Kokkos::View work("svd work buffer", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgesvd( + s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), info.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgesvd_bufferSize(s.handle, m, n, &lwork)); + Kokkos::View work("svd work buffer", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgesvd( + s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnCgesvd_bufferSize(s.handle, m, n, &lwork)); + Kokkos::View work("svd work buffer", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnCgesvd(s.handle, jobu[0], jobvt[0], m, n, + reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, + reinterpret_cast(work.data()), lwork, + rwork.data(), info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnZgesvd_bufferSize(s.handle, m, n, &lwork)); + Kokkos::View work("svd work buffer", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnZgesvd(s.handle, jobu[0], jobvt[0], m, n, + reinterpret_cast(A.data()), lda, + S.data(), reinterpret_cast(U.data()), + ldu, reinterpret_cast(Vt.data()), + ldvt, reinterpret_cast(work.data()), + lwork, rwork.data(), info.data())); + } + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_SVD_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct SVD< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const Kokkos::Cuda& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_CUSOLVER," #SCALAR \ + "]"); \ + svd_print_specialization(); \ + \ + cusolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_SVD_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_SVD_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include +#include + +namespace KokkosLapack { +namespace Impl { + +template +void rocsolverSvdWrapper(const ExecutionSpace& space, const char jobu[], + const char jobvt[], const AMatrix& A, const SVector& S, + const UMatrix& U, const VMatrix& Vt) { + using memory_space = typename AMatrix::memory_space; + using Scalar = typename AMatrix::non_const_value_type; + using Magnitude = typename SVector::non_const_value_type; + using ALayout_t = typename AMatrix::array_layout; + using ULayout_t = typename UMatrix::array_layout; + using VLayout_t = typename VMatrix::array_layout; + + const rocblas_int m = A.extent_int(0); + const rocblas_int n = A.extent_int(1); + const rocblas_int lda = std::is_same_v + ? A.stride(0) + : A.stride(1); + const rocblas_int ldu = std::is_same_v + ? U.stride(0) + : U.stride(1); + const rocblas_int ldvt = std::is_same_v + ? Vt.stride(0) + : Vt.stride(1); + + rocblas_svect UVecMode = rocblas_svect_all; + if ((jobu[0] == 'S') || (jobu[0] == 's')) { + UVecMode = rocblas_svect_singular; + } else if ((jobu[0] == 'O') || (jobu[0] == 'o')) { + UVecMode = rocblas_svect_overwrite; + } else if ((jobu[0] == 'N') || (jobu[0] == 'n')) { + UVecMode = rocblas_svect_none; + } + rocblas_svect VVecMode = rocblas_svect_all; + if ((jobvt[0] == 'S') || (jobvt[0] == 's')) { + VVecMode = rocblas_svect_singular; + } else if ((jobvt[0] == 'O') || (jobvt[0] == 'o')) { + VVecMode = rocblas_svect_overwrite; + } else if ((jobvt[0] == 'N') || (jobvt[0] == 'n')) { + VVecMode = rocblas_svect_none; + } + + const rocblas_workmode WorkMode = rocblas_outofplace; + + Kokkos::View info("svd info"); + Kokkos::View rwork("svd rwork buffer", + Kokkos::min(m, n) - 1); + + KokkosBlas::Impl::RocBlasSingleton& s = + KokkosBlas::Impl::RocBlasSingleton::singleton(); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_set_stream(s.handle, space.hip_stream())); + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgesvd( + s.handle, UVecMode, VVecMode, m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, rwork.data(), WorkMode, info.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgesvd( + s.handle, UVecMode, VVecMode, m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, rwork.data(), WorkMode, info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgesvd( + s.handle, UVecMode, VVecMode, m, n, + reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, rwork.data(), + WorkMode, info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgesvd( + s.handle, UVecMode, VVecMode, m, n, + reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, + rwork.data(), WorkMode, info.data())); + } + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_SVD_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct SVD< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const Kokkos::HIP& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_ROCSOLVER," #SCALAR \ + "]"); \ + svd_print_specialization(); \ + \ + rocsolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_SVD_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) +KOKKOSLAPACK_SVD_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif // KOKKOSLAPACK_SVD_TPL_SPEC_DECL_HPP_ diff --git a/lapack/unit_test/Test_Lapack.hpp b/lapack/unit_test/Test_Lapack.hpp index 815c442884..1a717521f8 100644 --- a/lapack/unit_test/Test_Lapack.hpp +++ b/lapack/unit_test/Test_Lapack.hpp @@ -18,5 +18,6 @@ #include "Test_Lapack_gesv.hpp" #include "Test_Lapack_trtri.hpp" +#include "Test_Lapack_svd.hpp" #endif // TEST_LAPACK_HPP diff --git a/lapack/unit_test/Test_Lapack_svd.hpp b/lapack/unit_test/Test_Lapack_svd.hpp new file mode 100644 index 0000000000..d2e17f7d41 --- /dev/null +++ b/lapack/unit_test/Test_Lapack_svd.hpp @@ -0,0 +1,321 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include + +#include + +namespace Test { + +template +void check_triple_product(const AMatrix& A, const SVector& S, const UMatrix& U, + const VMatrix& Vt) { + // After a successful SVD decomposition we have A=U*S*V + // So using gemm we should be able to compare the above + // triple product to the original matrix A. + + using scalar_type = typename AMatrix::non_const_value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + + AMatrix temp("intermediate U*S product", A.extent(0), A.extent(1)); + AMatrix M("U*S*V product", A.extent(0), A.extent(1)); + + // First compute the left side of the product: temp = U*S + KokkosBlas::gemm("N", "N", 1, U, S, 0, temp); + + // Second compute the right side of the product: M = temp*V = U*S*V + KokkosBlas::gemm("N", "C", 1, temp, Vt, 0, M); + + typename AMatrix::HostMirror A_h = Kokkos::create_mirror_view(A); + typename AMatrix::HostMirror M_h = Kokkos::create_mirror_view(M); + Kokkos::deep_copy(A_h, A); + Kokkos::deep_copy(M_h, M); + constexpr mag_type tol = 100 * Kokkos::ArithTraits::eps(); + for (int rowIdx = 0; rowIdx < A.extent_int(0); ++rowIdx) { + for (int colIdx = 0; colIdx < A.extent_int(1); ++colIdx) { + EXPECT_NEAR_KK_REL(A_h(rowIdx, colIdx), M_h(rowIdx, colIdx), tol); + } + } +} + +template +void check_unitary_orthogonal_matrix(const Matrix& M) { + // After a successful SVD decomposition the matrices + // U and V are unitary matrices. Thus we can check + // the property UUt=UtU=I and VVt=VtV=I using gemm. + + using scalar_type = typename Matrix::non_const_value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + constexpr mag_type tol = 100 * Kokkos::ArithTraits::eps(); + + Matrix I0("M*Mt", M.extent(0), M.extent(0)); + KokkosBlas::gemm("N", "C", 1, M, M, 0, I0); + typename Matrix::HostMirror I0_h = Kokkos::create_mirror_view(I0); + Kokkos::deep_copy(I0_h, I0); + for (int rowIdx = 0; rowIdx < M.extent_int(0); ++rowIdx) { + for (int colIdx = 0; colIdx < M.extent_int(0); ++colIdx) { + if (rowIdx == colIdx) { + EXPECT_NEAR_KK_REL(I0_h(rowIdx, colIdx), + Kokkos::ArithTraits::one(), tol); + } else { + EXPECT_NEAR_KK_REL(I0_h(rowIdx, colIdx), + Kokkos::ArithTraits::zero(), tol); + } + } + } + + Matrix I1("Mt*M", M.extent(1), M.extent(1)); + KokkosBlas::gemm("C", "N", 1, M, M, 0, I1); + typename Matrix::HostMirror I1_h = Kokkos::create_mirror_view(I1); + Kokkos::deep_copy(I1_h, I1); + for (int rowIdx = 0; rowIdx < M.extent_int(1); ++rowIdx) { + for (int colIdx = 0; colIdx < M.extent_int(1); ++colIdx) { + if (rowIdx == colIdx) { + EXPECT_NEAR_KK_REL(I1_h(rowIdx, colIdx), + Kokkos::ArithTraits::one(), tol); + } else { + EXPECT_NEAR_KK_REL(I1_h(rowIdx, colIdx), + Kokkos::ArithTraits::zero(), tol); + } + } + } +} + +template +int impl_analytic_svd() { + using scalar_type = typename AMatrix::value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + using vector_type = + Kokkos::View; + using KAT_S = Kokkos::ArithTraits; + using KAT_M = Kokkos::ArithTraits; + + const mag_type mag_one = KAT_M::one(); + const mag_type eps = KAT_S::eps(); + + AMatrix A("A", 2, 2), U("U", 2, 2), Vt("Vt", 2, 2); + vector_type S("S", 2); + + typename AMatrix::HostMirror A_h = Kokkos::create_mirror_view(A); + + // A = [3 0] + // [4 5] + // USV = 1/sqrt(10) [1 -3] * sqrt(5) [3 0] * 1/sqrt(2) [ 1 1] + // [3 1] [0 1] [-1 1] + A_h(0, 0) = 3; + A_h(1, 0) = 4; + A_h(1, 1) = 5; + + Kokkos::deep_copy(A, A_h); + + KokkosLapack::svd("A", "A", A, S, U, Vt); + // Don't really need to fence here as we deep_copy right after... + + typename vector_type::HostMirror S_h = Kokkos::create_mirror_view(S); + Kokkos::deep_copy(S_h, S); + typename AMatrix::HostMirror U_h = Kokkos::create_mirror_view(U); + Kokkos::deep_copy(U_h, U); + typename AMatrix::HostMirror Vt_h = Kokkos::create_mirror_view(Vt); + Kokkos::deep_copy(Vt_h, Vt); + + // The singular values for this problem + // are known: sqrt(45) and sqrt(5) + EXPECT_NEAR_KK_REL(S_h(0), static_cast(Kokkos::sqrt(45)), + 100 * eps); + EXPECT_NEAR_KK_REL(S_h(1), static_cast(Kokkos::sqrt(5)), 100 * eps); + + // The singular vectors should be identical + // or of oposite sign we check the first + // component of the vectors to determine + // the proper signed comparison. + std::vector Uref = { + static_cast(1 / Kokkos::sqrt(10)), + static_cast(3 / Kokkos::sqrt(10)), + static_cast(-3 / Kokkos::sqrt(10)), + static_cast(1 / Kokkos::sqrt(10))}; + std::vector Vtref = { + static_cast(1 / Kokkos::sqrt(2)), + static_cast(1 / Kokkos::sqrt(2)), + static_cast(-1 / Kokkos::sqrt(2)), + static_cast(1 / Kokkos::sqrt(2))}; + + // Both rotations and reflections are valid + // vector basis so we need to check both signs + // to confirm proper SVD was achieved. + if constexpr (KAT_S::is_complex) { + mag_type sign_vec = + (U_h(0, 0).real() > KAT_S::zero().real()) ? mag_one : -mag_one; + EXPECT_NEAR_KK_REL(sign_vec * U_h(0, 0), Uref[0], 100 * eps); + EXPECT_NEAR_KK_REL(sign_vec * U_h(1, 0), Uref[1], 100 * eps); + sign_vec = (U_h(0, 1).real() < KAT_S::zero().real()) ? mag_one : -mag_one; + EXPECT_NEAR_KK_REL(sign_vec * U_h(0, 1), Uref[2], 100 * eps); + EXPECT_NEAR_KK_REL(sign_vec * U_h(1, 1), Uref[3], 100 * eps); + + sign_vec = (Vt_h(0, 0).real() > KAT_S::zero().real()) ? mag_one : -mag_one; + EXPECT_NEAR_KK_REL(sign_vec * Vt_h(0, 0), Vtref[0], 100 * eps); + EXPECT_NEAR_KK_REL(sign_vec * Vt_h(1, 0), Vtref[1], 100 * eps); + sign_vec = (Vt_h(0, 1).real() < KAT_S::zero().real()) ? mag_one : -mag_one; + EXPECT_NEAR_KK_REL(sign_vec * Vt_h(0, 1), Vtref[2], 100 * eps); + EXPECT_NEAR_KK_REL(sign_vec * Vt_h(1, 1), Vtref[3], 100 * eps); + } else { + mag_type sign_vec = (U_h(0, 0) > KAT_S::zero()) ? mag_one : -mag_one; + EXPECT_NEAR_KK_REL(sign_vec * U_h(0, 0), Uref[0], 100 * eps); + EXPECT_NEAR_KK_REL(sign_vec * U_h(1, 0), Uref[1], 100 * eps); + sign_vec = (U_h(0, 1) < KAT_S::zero()) ? mag_one : -mag_one; + EXPECT_NEAR_KK_REL(sign_vec * U_h(0, 1), Uref[2], 100 * eps); + EXPECT_NEAR_KK_REL(sign_vec * U_h(1, 1), Uref[3], 100 * eps); + + sign_vec = (Vt_h(0, 0) > KAT_S::zero()) ? mag_one : -mag_one; + EXPECT_NEAR_KK_REL(sign_vec * Vt_h(0, 0), Vtref[0], 100 * eps); + EXPECT_NEAR_KK_REL(sign_vec * Vt_h(1, 0), Vtref[1], 100 * eps); + sign_vec = (Vt_h(0, 1) < KAT_S::zero()) ? mag_one : -mag_one; + EXPECT_NEAR_KK_REL(sign_vec * Vt_h(0, 1), Vtref[2], 100 * eps); + EXPECT_NEAR_KK_REL(sign_vec * Vt_h(1, 1), Vtref[3], 100 * eps); + } + + return 0; +} + +template +int impl_test_svd(const int m, const int n) { + AMatrix A("A", m, n); + + return 0; +} + +} // namespace Test + +template +int test_svd() { + int ret; + +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_layout_left = + Kokkos::View; + + ret = Test::impl_analytic_svd(); + + ret = Test::impl_test_svd(0, 0); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(1, 1); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(15, 15); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(100, 100); + EXPECT_EQ(ret, 0); +#endif + +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_layout_right = + Kokkos::View; + + ret = Test::impl_analytic_svd(); + + ret = Test::impl_test_svd(0, 0); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(1, 1); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(15, 15); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(100, 100); + EXPECT_EQ(ret, 0); +#endif + + return 1; +} + +template +int test_wrapper() { +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_MKL) + if constexpr (std::is_same_v) { + // Using a device side space with LAPACK/MKL + return test_svd(); + } +#endif + +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) + if constexpr (std::is_same_v) { + // Using a Cuda device with CUSOLVER + return test_svd(); + } +#endif + +#if defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER) + if constexpr (std::is_same_v) { + // Using a HIP device with ROCSOLVER + return test_svd(); + } +#endif + + std::cout << "No TPL support enabled, svd is not tested" << std::endl; + return 0; +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, svd_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_float"); + test_wrapper(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, svd_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_double"); + test_wrapper(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, svd_complex_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_complex_float"); + test_wrapper, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, svd_complex_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_complex_double"); + test_wrapper, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif