From 1b9efc8440f08690520d63b844e5b5dedc87c698 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 23 Jan 2024 16:36:40 -0700 Subject: [PATCH] Lapack - svd: interface with flags for singular vector compute modes This should be the final interface now. Users can path two string literals to specify the compute mode for the singular vector and these modes are forwarded to the TPLs. --- lapack/impl/KokkosLapack_svd_spec.hpp | 11 +-- lapack/src/KokkosLapack_svd.hpp | 44 +++++++-- .../tpls/KokkosLapack_svd_tpl_spec_avail.hpp | 6 +- .../tpls/KokkosLapack_svd_tpl_spec_decl.hpp | 89 +++++++++++-------- lapack/unit_test/Test_Lapack_svd.hpp | 3 +- 5 files changed, 101 insertions(+), 52 deletions(-) diff --git a/lapack/impl/KokkosLapack_svd_spec.hpp b/lapack/impl/KokkosLapack_svd_spec.hpp index 165c526334..75f25c7254 100644 --- a/lapack/impl/KokkosLapack_svd_spec.hpp +++ b/lapack/impl/KokkosLapack_svd_spec.hpp @@ -51,7 +51,7 @@ struct svd_eti_spec_avail { Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type *, LAYOUT_TYPE, \ Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View struct SVD { - static void svd(const ExecutionSpace & /* space */, const AMatrix & /* A */, - const SVector & /* S */, const UMatrix & /* U */, const VMatrix & /* Vt */) { + static void svd(const ExecutionSpace & /* space */, const char* /* jobu */, const char* /* jobvt */, + const AMatrix & /* A */, const SVector & /* S */, const UMatrix & /* U */, + const VMatrix & /* Vt */) { // NOTE: Might add the implementation of KokkosLapack::svd later throw std::runtime_error( "No fallback implementation of SVD (singular value decomposition) " @@ -116,7 +117,7 @@ struct SVD, \ Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type *, LAYOUT_TYPE, \ Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type *, LAYOUT_TYPE, \ Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View -void svd(const ExecutionSpace& space, const AMatrix& A, const SVector& S, - const UMatrix& U, const VMatrix& Vt) { +void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], + const AMatrix& A, const SVector& S, const UMatrix& U, const VMatrix& Vt) { static_assert( Kokkos::SpaceAccessibility::svd(space, A_i, S_i, U_i, Vt_i); + UMatrix_Internal, VMatrix_Internal>::svd(space, jobu, jobvt, A_i, S_i, U_i, Vt_i); } /// \brief Compute the Singular Value Decomposition of A = U*S*Vt @@ -146,10 +180,10 @@ void svd(const ExecutionSpace& space, const AMatrix& A, const SVector& S, /// \param Vt [out] the first min(m, n) columns of Vt are the right singular vectors of A. /// template -void svd(const AMatrix& A, const SVector& S, const UMatrix& U, +void svd(const char jobu[], const char jobvt[], const AMatrix& A, const SVector& S, const UMatrix& U, const VMatrix& Vt) { typename AMatrix::execution_space space{}; - svd(space, A, S, U, Vt); + svd(space, jobu, jobvt, A, S, U, Vt); } } // namespace KokkosLapack diff --git a/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp index 9788ea8c88..887afb6ac4 100644 --- a/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp @@ -34,7 +34,7 @@ struct svd_tpl_spec_avail { EXECSPACE, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -74,7 +74,7 @@ KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLe Kokkos::Cuda, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -105,7 +105,7 @@ KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::Layout Kokkos::HIP, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ + Kokkos::View::mag_type*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ diff --git a/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp index 4783717f40..28c4f6ed1b 100644 --- a/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp @@ -45,8 +45,8 @@ namespace KokkosLapack { namespace Impl { template -void lapackSvdWrapper(const ExecutionSpace& /* space */, const AMatrix& A, const SVector& S, - const UMatrix& U, const VMatrix& Vt) { +void lapackSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], const char jobvt[], + const AMatrix& A, const SVector& S, const UMatrix& U, const VMatrix& Vt) { using memory_space = typename AMatrix::memory_space; using Scalar = typename AMatrix::non_const_value_type; using Magnitude = typename SVector::non_const_value_type; @@ -75,7 +75,7 @@ void lapackSvdWrapper(const ExecutionSpace& /* space */, const AMatrix& A, const lwork = static_cast(work(0)); } work = Kokkos::View("svd work buffer", lwork); - HostLapack::gesvd('A', 'A', m, n, A.data(), lda, S.data(), U.data(), ldu, + HostLapack::gesvd(jobu, jobvt, m, n, A.data(), lda, S.data(), U.data(), ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), info); } @@ -124,15 +124,15 @@ void lapackSvdWrapper(const ExecutionSpace& /* space */, const AMatrix& A, const Kokkos::Device,\ Kokkos::MemoryTraits>; \ \ - static void svd(const EXEC_SPACE& space, const AMatrix& A, \ - const SVector& S, const UMatrix& U, \ - const VMatrix& Vt) { \ + static void svd(const EXEC_SPACE& space, const char jobu[], \ + const char jobvt[], const AMatrix& A,const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_LAPACK," #SCALAR \ "]"); \ svd_print_specialization(); \ \ - lapackSvdWrapper(space, A, S, U, Vt); \ + lapackSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -177,8 +177,9 @@ namespace Impl { template -void cusolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, - const SVector& S, const UMatrix& U, const VMatrix& Vt) { +void cusolverSvdWrapper(const ExecutionSpace& space, const char jobu[], + const char jobvt[], const AMatrix& A, + const SVector& S, const UMatrix& U, const VMatrix& Vt) { using memory_space = typename AMatrix::memory_space; using Scalar = typename AMatrix::non_const_value_type; using Magnitude = typename SVector::non_const_value_type; @@ -209,7 +210,7 @@ void cusolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, cusolverDnSgesvd_bufferSize(s.handle, m, n, &lwork)); Kokkos::View work("svd work buffer", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgesvd(s.handle, 'A', 'A', m, n, + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgesvd(s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), ldu, Vt.data(), ldvt, work.data(), lwork, @@ -220,7 +221,7 @@ void cusolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, cusolverDnDgesvd_bufferSize(s.handle, m, n, &lwork)); Kokkos::View work("svd work buffer", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgesvd(s.handle, 'A', 'A', m, n, + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgesvd(s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), ldu, Vt.data(), ldvt, work.data(), lwork, @@ -231,7 +232,7 @@ void cusolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, cusolverDnCgesvd_bufferSize(s.handle, m, n, &lwork)); Kokkos::View work("svd work buffer", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgesvd(s.handle, 'A', 'A', m, n, + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgesvd(s.handle, jobu[0], jobvt[0], m, n, reinterpret_cast(A.data()), lda, S.data(), reinterpret_cast(U.data()), ldu, @@ -244,7 +245,7 @@ void cusolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, cusolverDnZgesvd_bufferSize(s.handle, m, n, &lwork)); Kokkos::View work("svd work buffer", lwork); - KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgesvd(s.handle, 'A', 'A', m, n, + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgesvd(s.handle, jobu[0], jobvt[0], m, n, reinterpret_cast(A.data()), lda, S.data(), reinterpret_cast(U.data()), @@ -300,15 +301,15 @@ void cusolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, Kokkos::Device, \ Kokkos::MemoryTraits>; \ \ - static void svd(const Kokkos::Cuda& space, const AMatrix& A, \ - const SVector& S, const UMatrix& U, \ - const VMatrix& Vt) { \ + static void svd(const Kokkos::Cuda& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_CUSOLVER," #SCALAR \ "]"); \ svd_print_specialization(); \ \ - cusolverSvdWrapper(space, A, S, U, Vt); \ + cusolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -343,8 +344,8 @@ namespace Impl { template -void rocsolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, - const SVector& S, const UMatrix& U, const VMatrix& Vt) { +void rocsolverSvdWrapper(const ExecutionSpace& space, const char jobu[], const char jobvt[], + const AMatrix& A, const SVector& S, const UMatrix& U, const VMatrix& Vt) { using memory_space = typename AMatrix::memory_space; using Scalar = typename AMatrix::non_const_value_type; using Magnitude = typename SVector::non_const_value_type; @@ -358,8 +359,22 @@ void rocsolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, const rocblas_int ldu = std::is_same_v ? U.stride(0) : U.stride(1); const rocblas_int ldvt = std::is_same_v ? Vt.stride(0) : Vt.stride(1); - const rocblas_svect UVecMode = rocblas_svect_all; - const rocblas_svect VVecMode = rocblas_svect_all; + rocblas_svect UVecMode = rocblas_svect_all; + if((jobu[0] == 'S') || (jobu[0] == 's')) { + UVecMode = rocblas_svect_singular; + } else if((jobu[0] == 'O') || (jobu[0] == 'o')) { + UVecMode = rocblas_svect_overwrite; + } else if((jobu[0] == 'N') || (jobu[0] == 'n')) { + UVecMode = rocblas_svect_none; + } + rocblas_svect VVecMode = rocblas_svect_all; + if((jobvt[0] == 'S') || (jobvt[0] == 's')) { + VVecMode = rocblas_svect_singular; + } else if((jobvt[0] == 'O') || (jobvt[0] == 'o')) { + VVecMode = rocblas_svect_overwrite; + } else if((jobvt[0] == 'N') || (jobvt[0] == 'n')) { + VVecMode = rocblas_svect_none; + } const rocblas_workmode WorkMode = rocblas_outofplace; @@ -419,46 +434,46 @@ void rocsolverSvdWrapper(const ExecutionSpace& space, const AMatrix& A, Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits>, \ true, \ svd_eti_spec_avail< \ - Kokkos::HIP, \ + Kokkos::HIP, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View::mag_type*, LAYOUT, \ - Kokkos::Device, \ + Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>>::value> { \ using AMatrix = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ using SVector = Kokkos::View::mag_type*, \ LAYOUT, \ - Kokkos::Device, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ using UMatrix = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ using VMatrix = Kokkos::View, \ + Kokkos::Device, \ Kokkos::MemoryTraits>; \ \ - static void svd(const Kokkos::HIP& space, const AMatrix& A, \ - const SVector& S, const UMatrix& U, \ - const VMatrix& Vt) { \ - Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_ROCSOLVER," #SCALAR \ + static void svd(const Kokkos::HIP& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_ROCSOLVER," #SCALAR \ "]"); \ - svd_print_specialization(); \ \ - rocsolverSvdWrapper(space, A, S, U, Vt); \ + rocsolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ Kokkos::Profiling::popRegion(); \ } \ }; diff --git a/lapack/unit_test/Test_Lapack_svd.hpp b/lapack/unit_test/Test_Lapack_svd.hpp index 5e8b6377be..c23c6ab899 100644 --- a/lapack/unit_test/Test_Lapack_svd.hpp +++ b/lapack/unit_test/Test_Lapack_svd.hpp @@ -100,7 +100,6 @@ int impl_analytic_svd() { using KAT_S = Kokkos::ArithTraits; using KAT_M = Kokkos::ArithTraits; - const mag_type mag_zero = KAT_M::zero(); const mag_type mag_one = KAT_M::one(); const mag_type eps = KAT_S::eps(); @@ -119,7 +118,7 @@ int impl_analytic_svd() { Kokkos::deep_copy(A, A_h); - KokkosLapack::svd(A, S, U, Vt); + KokkosLapack::svd("A", "A", A, S, U, Vt); // Don't really need to fence here as we deep_copy right after... typename vector_type::HostMirror S_h = Kokkos::create_mirror_view(S);