diff --git a/src/batched/dense/KokkosBatched_SVD_Decl.hpp b/src/batched/dense/KokkosBatched_SVD_Decl.hpp new file mode 100644 index 0000000000..f727d00a35 --- /dev/null +++ b/src/batched/dense/KokkosBatched_SVD_Decl.hpp @@ -0,0 +1,66 @@ +#ifndef __KOKKOSBATCHED_SVD_DECL_HPP__ +#define __KOKKOSBATCHED_SVD_DECL_HPP__ + +/// \author Brian Kelley (bmkelle@sandia.gov) + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Vector.hpp" + +namespace KokkosBatched { + + /// Given a general matrix A (m x n), compute the full singular value decomposition (SVD): + /// U * diag(s) * V^T = A. U/V are orthogonal and s contains nonnegative values in descending order. + /// + /// Currently only supports real-valued matrices. + /// + /// Parameters: + /// [in] A + /// General matrix (rank 2 view), m x n. + /// The contents of A are overwritten and undefined after calling this function. + /// [out] U + /// m left singular vectors (in columns). Dimensions m*m. + /// [out] Vt + /// n right singular vectors (in rows). Dimensions n*n. + /// [out] s + /// min(m, n) singular values. + /// [in] W + /// 1D contiguous workspace. The required size is max(m, n). + /// + /// Preconditions: + /// m == A.extent(0) == U.extent(0) == U.extent(1) + /// n == A.extent(1) == V.extent(0) == V.extent(1) + /// min(m, n) == s.extent(0) + /// W.extent(0) >= max(m, n) + /// W.stride(0) == 1 (contiguous) + + struct SVD_USV_Tag {}; + struct SVD_S_Tag {}; + // Note: Could easily add SV or US tags later if needed + + struct SerialSVD { + //Version to compute full factorization: A == U * diag(s) * Vt + template + KOKKOS_INLINE_FUNCTION + static int + invoke(SVD_USV_Tag, const AViewType &A, + const UViewType &U, const SViewType &s, + const VtViewType &Vt, const WViewType &W); + + //Version which computes only singular values + template + KOKKOS_INLINE_FUNCTION + static int + invoke(SVD_S_Tag, const AViewType &A, const SViewType &s, const WViewType &W); + }; + +} /// end namespace KokkosBatched + +#include "KokkosBatched_SVD_Serial_Impl.hpp" + +#endif diff --git a/src/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp b/src/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp new file mode 100644 index 0000000000..cd943e71b9 --- /dev/null +++ b/src/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp @@ -0,0 +1,51 @@ +#ifndef __KOKKOSBATCHED_SVD_SERIAL_IMPL_HPP__ +#define __KOKKOSBATCHED_SVD_SERIAL_IMPL_HPP__ + +/// \author Brian Kelley (bmkelle@sandia.gov) + +#include "KokkosBatched_SVD_Serial_Internal.hpp" + +namespace KokkosBatched { + //Version which computes the full factorization + template + KOKKOS_INLINE_FUNCTION + int SerialSVD:: + invoke(SVD_USV_Tag, const AViewType &A, + const UViewType &U, const SViewType &sigma, + const VViewType &Vt, const WViewType &work) + { + using value_type = typename AViewType::non_const_value_type; + return KokkosBatched::SerialSVDInternal::invoke + (A.extent(0), A.extent(1), + A.data(), A.stride(0), A.stride(1), + U.data(), U.stride(0), U.stride(1), + Vt.data(), Vt.stride(0), Vt.stride(1), + sigma.data(), sigma.stride(0), + work.data()); + } + + //Version which computes only singular values + template + KOKKOS_INLINE_FUNCTION + int SerialSVD:: + invoke(SVD_S_Tag, const AViewType &A, const SViewType &sigma, const WViewType &work) + { + using value_type = typename AViewType::non_const_value_type; + return KokkosBatched::SerialSVDInternal::invoke + (A.extent(0), A.extent(1), + A.data(), A.stride(0), A.stride(1), + nullptr, 0, 0, + nullptr, 0, 0, + sigma.data(), sigma.stride(0), + work.data()); + } + +} /// end namespace KokkosBatched + +#endif diff --git a/src/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp b/src/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp new file mode 100644 index 0000000000..1a5ca961b6 --- /dev/null +++ b/src/batched/dense/impl/KokkosBatched_SVD_Serial_Internal.hpp @@ -0,0 +1,355 @@ +#ifndef __KOKKOSBATCHED_SVD_SERIAL_INTERNAL_HPP__ +#define __KOKKOSBATCHED_SVD_SERIAL_INTERNAL_HPP__ + + +/// \author Brian Kelley (bmkelle@sandia.gov) + +#include "Kokkos_MathematicalFunctions.hpp" +#include "KokkosBatched_SetIdentity_Internal.hpp" +#include "KokkosBatched_Givens_Serial_Internal.hpp" +#include "KokkosBatched_ApplyGivens_Serial_Internal.hpp" +#include "KokkosBatched_Householder_Serial_Internal.hpp" +#include "KokkosBatched_ApplyHouseholder_Serial_Internal.hpp" + +//Use this macro to handle raw pointer/stride based 2D indexing in this file (just for readability) +//Requires that for pointer X, the corresponding row/col strides are named Xs0 and Xs1. +#define SVDIND(arr, i, j) arr[(i) * arr##s0 + (j) * arr##s1] +#define SVDSWAP(a, b) {auto tmp = a; a = b; b = tmp;} + +namespace KokkosBatched { + + /// + /// Serial Internal Impl + /// ==================== + + struct SerialSVDInternal { + + //Find the two eigenvalues of [a11 a21 ; a21 a22] by solving the characteristic quadratic. + //Since matrix is symmetric these will be real. + //NOTE: this is essentially the Wilkinson shift routine already in Batched, + //however this is simpler because it exploits the symmetric structure, and the realness of the eigenvalues. + template + KOKKOS_INLINE_FUNCTION static void symEigen2x2(value_type a11, value_type a21, value_type a22, value_type& e1, value_type& e2) + { + value_type a = Kokkos::ArithTraits::one(); + value_type b = -a11 - a22; + value_type c = a11 * a22 - a21 * a21; + value_type sqrtDet = Kokkos::Experimental::sqrt(b * b - 4 * a * c); + e1 = (-b + sqrtDet) / (2 * a); + e2 = (-b - sqrtDet) / (2 * a); + } + + // B is a square submatrix on the diagonal. + // Usub is a subset of columns of U + // Vtsub is a subset of rows of Vt + // + // B22 is nsub * nsub, Usub is m * nsub, and Vtsub is nsub * n + template + KOKKOS_INLINE_FUNCTION static void svdStep(value_type* B, value_type* U, value_type* Vt, int um, int vn, int n, int Bs0, int Bs1, int Us0, int Us1, int Vts0, int Vts1) + { + using KAT = Kokkos::ArithTraits; + //Compute the eigenvalues of trailing 2x2 + value_type dn = SVDIND(B, n-1, n-1); + value_type dm = SVDIND(B, n-2, n-2); + value_type fm = SVDIND(B, n-2, n-1); + value_type fmm1 = (n > 2) ? SVDIND(B, n-3, n-2) : KAT::zero(); + value_type target = dn * dn + fm * fm; + value_type e1, e2, mu; + symEigen2x2(dm * dm + fmm1 * fmm1, dm * fm, target, e1, e2); + //the shift is the eigenvalue closer to the last diagonal entry of B^T*B + if(fabs(e1 - target) < fabs(e2 - target)) + mu = e1; + else + mu = e2; + value_type y = SVDIND(B, 0, 0) * SVDIND(B, 0, 0) - mu; + value_type z = SVDIND(B, 0, 0) * SVDIND(B, 0, 1); + for(int k = 0; k < n - 1; k++) + { + //Use Givens to zero out z in [y; z] + Kokkos::pair G; + value_type discard; //Don't actually write [alpha; 0] anywhere + KokkosBatched::SerialGivensInternal::invoke(y, z, &G, &discard); + //apply the Givens transformation to B on the right, to columns k,k+1 + //B := BG(k, k+1, theta) + int minrow = KOKKOSKERNELS_MACRO_MAX(0, k - 1); + int maxrow = KOKKOSKERNELS_MACRO_MIN(n, k + 2); + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, maxrow - minrow, &SVDIND(B, minrow, k + 1), Bs0, &SVDIND(B, minrow, k), Bs0); + if(Vt) + { + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, vn, &SVDIND(Vt, k + 1, 0), Vts1, &SVDIND(Vt, k, 0), Vts1); + } + y = SVDIND(B, k, k); + z = SVDIND(B, k + 1, k); + KokkosBatched::SerialGivensInternal::invoke(y, z, &G, &SVDIND(B, k, k)); + SVDIND(B, k + 1, k) = KAT::zero(); + int mincol = k + 1; + int maxcol = KOKKOSKERNELS_MACRO_MIN(n, k + 3); + //apply Givens transformation to B on the left, to rows k, k + 1 + //B := G(k, k+1, theta)^T * B + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, maxcol - mincol, &SVDIND(B, k + 1, mincol), Bs1, &SVDIND(B, k, mincol), Bs1); + if(U) + { + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, um, &SVDIND(U, 0, k + 1), Us0, &SVDIND(U, 0, k), Us0); + } + if(k < n - 2) + { + y = SVDIND(B, k, k + 1); + z = SVDIND(B, k, k + 2); + } + } + } + + //Deal with B(i, i) = 0, by chasing superdiagonal nonzero across row i. + //Assumes i is not the last row. + //U is m*m, B is n*n + template + KOKKOS_INLINE_FUNCTION static void svdZeroRow(int i, value_type* B, int n, int Bs0, int Bs1, value_type* U, int m, int Us0, int Us1) + { + Kokkos::pair G; + for(int j = i + 1; j < n; j++) + { + //Zero out B(i, j) against diagonal j, introducing nonzero in B(i, j + 1) + KokkosBatched::SerialGivensInternal::invoke(SVDIND(B, j, j), SVDIND(B, i, j), &G, &SVDIND(B, j, j)); + SVDIND(B, i, j) = Kokkos::ArithTraits::zero(); + //Now, only need to apply givens to a single column (if not already at the end), + //introducing the next nonzero + if(j < n - 1) + { + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, 1, &SVDIND(B, i, j + 1), Bs1, &SVDIND(B, j, j + 1), Bs1); + } + if(U) + { + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, m, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, j), Us0); + } + } + } + + template + KOKKOS_INLINE_FUNCTION static void svdZeroLastColumn(value_type* B, int n, int Bs0, int Bs1, value_type* Vt, int Vts0, int Vts1) + { + //Deal with B(n-1, n-1) = 0, by chasing the superdiagonal nonzero up the last column. + Kokkos::pair G; + for(int j = n - 2; j >= 0; j--) + { + KokkosBatched::SerialGivensInternal::invoke(SVDIND(B, j, j), SVDIND(B, j, n - 1), &G, &SVDIND(B, j, j)); + SVDIND(B, j, n - 1) = Kokkos::ArithTraits::zero(); + if(j != 0) + { + KokkosBatched::SerialApplyRightGivensInternal::invoke(G, 1, &SVDIND(B, j - 1, n - 1), Bs0, &SVDIND(B, j - 1, j), Bs0); + } + if(Vt) + { + KokkosBatched::SerialApplyLeftGivensInternal::invoke(G, n, &SVDIND(Vt, n - 1, 0), Vts1, &SVDIND(Vt, j, 0), Vts1); + } + } + } + + template + KOKKOS_INLINE_FUNCTION static void bidiagonalize(int m, int n, value_type* A, int As0, int As1, value_type* U, int Us0, int Us1, value_type* Vt, int Vts0, int Vts1, value_type* work) + { + using KAT = Kokkos::ArithTraits; + value_type tau; + for(int i = 0; i < n; i++) + { + //Eliminating column i of A below the diagonal + KokkosBatched::SerialLeftHouseholderInternal::invoke(m - i - 1, &SVDIND(A, i, i), &SVDIND(A, i + 1, i), As0, &tau); + if(n - i > 1) + { + KokkosBatched::SerialApplyLeftHouseholderInternal::invoke(m - i - 1, n - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(A, i, i + 1), As1, &SVDIND(A, i + 1, i + 1), As0, As1, work); + } + if(U) + { + KokkosBatched::SerialApplyRightHouseholderInternal::invoke(m, m - i - 1, &tau, &SVDIND(A, i + 1, i), As0, &SVDIND(U, 0, i), Us0, &SVDIND(U, 0, i + 1), Us0, Us1, work); + } + //Zero out A subdiag explicitly (NOTE: may not be necessary...) + for(int j = i + 1; j < m; j++) + { + SVDIND(A, j, i) = KAT::zero(); + } + if(i < n - 2) + { + //Eliminating row i of A to the right of the 1st superdiagonal + KokkosBatched::SerialLeftHouseholderInternal::invoke(n - i - 2, &SVDIND(A, i, i + 1), &SVDIND(A, i, i + 2), As1, &tau); + if(m - i > 1) + { + KokkosBatched::SerialApplyRightHouseholderInternal::invoke(m - i - 1, n - i - 2, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(A, i + 1, i + 1), As0, &SVDIND(A, i + 1, i + 2), As0, As1, work); + } + if(Vt) + { + KokkosBatched::SerialApplyLeftHouseholderInternal::invoke(n - i - 2, n, &tau, &SVDIND(A, i, i + 2), As1, &SVDIND(Vt, i + 1, 0), Vts1, &SVDIND(Vt, i + 2, 0), Vts0, Vts1, work); + } + //Zero out A superdiag row explicitly + for(int j = i + 2; j < n; j++) + { + SVDIND(A, i, j) = KAT::zero(); + } + } + } + } + + //Compute the SVD of a bidiagonal matrix B. Apply inverse transformations to U and Vt to maintain the product U*B*Vt. + //At the end, the singular values are copied to sigma. + template + KOKKOS_INLINE_FUNCTION static void bidiSVD(int m, int n, value_type* B, int Bs0, int Bs1, value_type* U, int Us0, int Us1, value_type* Vt, int Vts0, int Vts1, value_type* sigma, int ss) + { + using KAT = Kokkos::ArithTraits; + const value_type eps = Kokkos::ArithTraits::epsilon(); + int p = 0; + int q = 0; + while(true) + { + //Zero out tiny superdiagonal entries + for(int i = 0; i < n - 1; i++) + { + if(fabs(SVDIND(B, i, i + 1)) < eps * (fabs(SVDIND(B, i, i)) + fabs(SVDIND(B, i + 1, i + 1)))) + { + SVDIND(B, i, i + 1) = KAT::zero(); + } + } + //Find q: first column from the end with nonzero superdiagonal. + //If no such columns, will be 0. + for(q = n - 1; q > 0; q--) + { + if(SVDIND(B, q - 1, q) != KAT::zero()) + break; + } + if(q == 0) + { + //B is completely diagonal, so it contains singular values and we are done. + break; + } + q++; + //now, q is the upper (exclusive) bound of submatrix on which to do SVD step. + //Find min p, so that [p, q) x [p, q) submatrix has all nonzero superdiagonals. + for(p = q - 1; p > 0; p--) + { + if(SVDIND(B, p - 1, p) == KAT::zero()) + break; + } + //If there are zero diagonals in this range, eliminate the entire row + //(effectively decoupling into two subproblems) + for(int i = q - 1; i >= p; i--) + { + if(SVDIND(B, i, i) == KAT::zero()) + { + if(i == n - 1) + { + //Last diagonal entry being 0 is a special case. + //Zero out the superdiagonal above it. + //Deal with B(n-1, n-1) = 0, by chasing the superdiagonal nonzero up the last column. + svdZeroLastColumn(B, n, Bs0, Bs1, Vt, Vts0, Vts1); + } + else if(SVDIND(B, i, i + 1) != KAT::zero()) + { + svdZeroRow(i, B, n, Bs0, Bs1, U, m, Us0, Us1); + } + } + continue; + } + int nsub = q - p; + //B22 is nsub * nsub, Usub is m * nsub, and Vtsub is nsub * n + svdStep(&SVDIND(B, p, p), &SVDIND(U, 0, p), &SVDIND(Vt, p, 0), m, n, nsub, Bs0, Bs1, Us0, Us1, Vts0, Vts1); + } + for(int i = 0; i < n; i++) + { + sigma[i * ss] = SVDIND(B, i, i); + } + } + + //Convert SVD into conventional form: singular values positive and in descending order + template + KOKKOS_INLINE_FUNCTION static void postprocessSVD(int m, int n, value_type* U, int Us0, int Us1, value_type* Vt, int Vts0, int Vts1, value_type* sigma, int ss) + { + //First step: flip signs on negative singular values + for(int i = 0; i < n; i++) + { + if(sigma[i * ss] < 0) + { + sigma[i * ss] = -sigma[i * ss]; + if(Vt) + { + for(int j = 0; j < n; j++) + SVDIND(Vt, i, j) = -SVDIND(Vt, i, j); + } + } + } + //Second step: stable selection sort to put singular values in order. + //Using selection sort because the quadratic part only applies to sigma (O(n^2) total), and it minimizes column swaps in U,V (O(mn) total movement). + for(int i = 0; i < n - 1; i++) + { + //find the proper singular value to go in position i + value_type maxval = sigma[i * ss]; + int maxloc = i; + for(int j = i + 1; j < n; j++) + { + if(sigma[j * ss] > maxval) + { + maxval = sigma[j * ss]; + maxloc = j; + } + } + //swap singular values and U/V columns i and maxloc (if maxloc is not already in the right place) + if(i != maxloc) + { + SVDSWAP(sigma[i * ss], sigma[maxloc * ss]); + if(U) + { + for(int j = 0; j < m; j++) + SVDSWAP(SVDIND(U, j, i), SVDIND(U, j, maxloc)) + } + if(Vt) + { + for(int j = 0; j < n; j++) + SVDSWAP(SVDIND(Vt, i, j), SVDIND(Vt, maxloc, j)) + } + } + } + } + + template + KOKKOS_INLINE_FUNCTION static int + invoke(int m, int n, + value_type* A, int As0, int As1, + value_type* U, int Us0, int Us1, + value_type* Vt, int Vts0, int Vts1, + value_type* sigma, int ss, + value_type* work) + { + //First, if m < n, need to instead compute (V, s, U^T) = A^T. + //This just means swapping U & Vt, and implicitly transposing A, U and Vt. + if(m < n) + { + //Transpose A + SVDSWAP(m, n); + SVDSWAP(As0, As1); + //Transpose and swap U, Vt + SVDSWAP(U, Vt); + SVDSWAP(Us0, Vts1); + SVDSWAP(Us1, Vts0); + } + if(U) + { + KokkosBatched::SerialSetIdentityInternal::invoke(m, m, U, Us0, Us1); + } + if(Vt) + { + KokkosBatched::SerialSetIdentityInternal::invoke(n, n, Vt, Vts0, Vts1); + } + if(m == 0 || n == 0) + { + //sigma is length 0, so there's nothing left to compute + return 0; + } + bidiagonalize(m, n, A, As0, As1, U, Us0, Us1, Vt, Vts0, Vts1, work); + bidiSVD(m, n, A, As0, As1, U, Us0, Us1, Vt, Vts0, Vts1, sigma, ss); + postprocessSVD(m, n, U, Us0, Us1, Vt, Vts0, Vts1, sigma, ss); + return 0; + } + }; + +} /// end namespace KokkosBatched + +#undef SVDIND +#undef SVDSWAP + +#endif diff --git a/test_common/KokkosKernels_TestUtils.hpp b/test_common/KokkosKernels_TestUtils.hpp index 3e4344ea84..d472e2cee9 100644 --- a/test_common/KokkosKernels_TestUtils.hpp +++ b/test_common/KokkosKernels_TestUtils.hpp @@ -243,6 +243,31 @@ struct Functor_BatchedVanillaGEMM { } }; +//Compute C := alpha * AB + beta * C +template +void vanillaGEMM(typename ViewTypeC::non_const_value_type alpha, + const ViewTypeA& A, const ViewTypeB& B, + typename ViewTypeC::non_const_value_type beta, + const ViewTypeC& C) { + using value_type = typename ViewTypeC::non_const_value_type; + using KAT = Kokkos::ArithTraits; + int m = A.extent(0); + int k = A.extent(1); + int n = B.extent(1); + for(int i = 0; i < m; i++) + { + for(int j = 0; j < n; j++) + { + value_type sum = KAT::zero(); + for(int ii = 0; ii < k; ii++) + { + sum += A(i, ii) * B(ii, j); + } + C(i, j) = alpha * sum + beta * C(i, j); + } + } +} + template void vanillaGEMV(char mode, typename ViewTypeA::non_const_value_type alpha, const ViewTypeA& A, const ViewTypeX& x, diff --git a/unit_test/batched/dense/Test_Batched_Dense.hpp b/unit_test/batched/dense/Test_Batched_Dense.hpp index 0541b88d7e..60dcda20cc 100644 --- a/unit_test/batched/dense/Test_Batched_Dense.hpp +++ b/unit_test/batched/dense/Test_Batched_Dense.hpp @@ -37,6 +37,7 @@ #include "Test_Batched_SerialTrtri.hpp" #include "Test_Batched_SerialTrtri_Real.hpp" #include "Test_Batched_SerialTrtri_Complex.hpp" +#include "Test_Batched_SerialSVD.hpp" // Team Kernels #include "Test_Batched_TeamGemm.hpp" diff --git a/unit_test/batched/dense/Test_Batched_SerialSVD.hpp b/unit_test/batched/dense/Test_Batched_SerialSVD.hpp new file mode 100644 index 0000000000..fbdfbc207a --- /dev/null +++ b/unit_test/batched/dense/Test_Batched_SerialSVD.hpp @@ -0,0 +1,409 @@ +/// \author Brian Kelley (bmkelle@sandia.gov) + +#include "KokkosBatched_SVD_Decl.hpp" //For testing overall kernel +#include "KokkosBatched_SVD_Serial_Internal.hpp" //For unit testing individual components +#include "KokkosBatched_SetIdentity_Decl.hpp" + +namespace Test +{ + template + Scalar svdEpsilon() + {throw std::runtime_error("Unsupported scalar type");} + + template<> + double svdEpsilon() + {return 1e-13;} + + template<> + float svdEpsilon() + {return 2e-6f;} +} + +template +double simpleNorm2(const Vector& v) +{ + using Scalar = typename Vector::non_const_value_type; + using KAT = Kokkos::ArithTraits; + auto vhost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + double d = 0; + for(size_t i = 0; i < v.extent(0); i++) + { + double m = KAT::abs(vhost(i)); + d += m * m; + } + return Kokkos::Experimental::sqrt(d); +} + +template +typename V1::non_const_value_type simpleDot(const V1& v1, const V2& v2) +{ + using Scalar = typename V1::non_const_value_type; + using KAT = Kokkos::ArithTraits; + auto v1host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v1); + auto v2host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v2); + typename V1::non_const_value_type val = KAT::zero(); + for(size_t i = 0; i < v1.extent(0); i++) + { + val += v1host(i) * v2host(i); + } + return val; +} + +//Check that all columns of X are unit length and pairwise orthogonal +template +void verifyOrthogonal(const Mat& X) +{ + using value_type = typename Mat::non_const_value_type; + int k = X.extent(1); + for(int i = 0; i < k; i++) + { + auto col1 = Kokkos::subview(X, Kokkos::ALL(), i); + double len = simpleNorm2(col1); + Test::EXPECT_NEAR_KK(len, 1.0, Test::svdEpsilon()); + for(int j = 0; j < i; j++) + { + auto col2 = Kokkos::subview(X, Kokkos::ALL(), j); + double d = Kokkos::ArithTraits::abs(simpleDot(col1, col2)); + Test::EXPECT_NEAR_KK(d, 0.0, Test::svdEpsilon()); + } + } +} + +template +void verifySVD(const AView& A, const UView& U, const VtView& Vt, const SigmaView& sigma) +{ + using value_type = typename AView::non_const_value_type; + using KAT = Kokkos::ArithTraits; + //Check that U/V columns are unit length and orthogonal, and that U * diag(sigma) * V^T == A + int m = A.extent(0); + int n = A.extent(1); + int maxrank = std::min(m, n); + verifyOrthogonal(U); + //NOTE: V^T being square and orthonormal implies that V is, so we don't have to transpose it here. + verifyOrthogonal(Vt); + AView usvt("USV^T", m, n); + for(int i = 0; i < maxrank; i++) + { + auto Ucol = Kokkos::subview(U, Kokkos::ALL(), Kokkos::make_pair(i, i + 1)); + auto Vtrow = Kokkos::subview(Vt, Kokkos::make_pair(i, i + 1), Kokkos::ALL()); + Test::vanillaGEMM(sigma(i), Ucol, Vtrow, 1.0, usvt); + } + for(int i = 0; i < m; i++) + { + for(int j = 0; j < n; j++) + { + Test::EXPECT_NEAR_KK(usvt(i, j), A(i, j), Test::svdEpsilon()); + } + } + //Make sure all singular values are positive + for(int i = 0; i < maxrank; i++) + { + EXPECT_GE(sigma(i), KAT::zero()); + } + //Make sure singular values are in descending order + for(int i = 0; i < maxrank - 1; i++) + { + EXPECT_GE(sigma(i), sigma(i + 1)); + } +} + +template +Matrix createRandomMatrix(int m, int n, int deficiency, double maxval = 1.0) +{ + using Scalar = typename Matrix::non_const_value_type; + Matrix mat("A", m, n); + auto mhost = Kokkos::create_mirror_view(mat); + //Fill mat with random values first + if(maxval != 0.0) + { + Kokkos::Random_XorShift64_Pool rand_pool(13718); + Scalar minrand, maxrand; + Test::getRandomBounds(maxval, minrand, maxrand); + Kokkos::fill_random(mhost, rand_pool, minrand, maxrand); + } + //Apply the rank deficiency. + //If m < n, make some rows a multiple of the first row. + //Otherwise, make some columns a multiple of the first column. + if(m < n) + { + for(int i = 0; i < deficiency; i++) + { + //make row i + 1 a multiple of row 0 + for(int j = 0; j < n; j++) + { + mhost(i + 1, j) = (double) (i + 2) * mhost(0, j); + } + } + } + else + { + for(int i = 0; i < deficiency; i++) + { + //make col i + 1 a multiple of col 0 + for(int j = 0; j < m; j++) + { + mhost(j, i + 1) = (double) (i + 2) * mhost(j, 0); + } + } + } + Kokkos::deep_copy(mat, mhost); + return mat; +} + +template +struct SerialSVDFunctor_Full +{ + SerialSVDFunctor_Full(const Matrix& A_, const Matrix& U_, const Matrix& Vt_, const Vector& sigma_, const Vector& work_) + : A(A_), U(U_), Vt(Vt_), sigma(sigma_), work(work_) + {} + + //NOTE: this functor is only meant to be launched with a single element range policy + KOKKOS_INLINE_FUNCTION void operator()(int) const + { + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, sigma, Vt, work); + } + + Matrix A; + Matrix U; + Matrix Vt; + Vector sigma; + Vector work; +}; + +template +struct SerialSVDFunctor_SingularValuesOnly +{ + SerialSVDFunctor_SingularValuesOnly(const Matrix& A_, const Vector& sigma_, const Vector& work_) + : A(A_), sigma(sigma_), work(work_) + {} + + //NOTE: this functor is only meant to be launched with a single element range policy + KOKKOS_INLINE_FUNCTION void operator()(int) const + { + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_S_Tag(), A, sigma, work); + } + + Matrix A; + Vector sigma; + Vector work; +}; + +template +void testSerialSVD(int m, int n, int deficiency, double maxval = 1.0) +{ + using Matrix = Kokkos::View; + using Vector = Kokkos::View; + using ExecSpace = typename Device::execution_space; + Matrix A = createRandomMatrix(m, n, deficiency, maxval); + //Fill U, Vt, sigma with nonzeros as well to make sure they are properly overwritten + Matrix U("U", m, m); + Matrix Vt("Vt", n, n); + int maxrank = std::min(m, n); + Vector sigma("sigma", maxrank); + Vector work("work", std::max(m, n)); + Kokkos::deep_copy(U, -5.0); + Kokkos::deep_copy(Vt, -5.0); + Kokkos::deep_copy(sigma, -5.0); + Kokkos::deep_copy(work, -5.0); + //Make a copy of A (before SVD) for verification, since the original will be overwritten + typename Matrix::HostMirror Acopy("Acopy", m, n); + Kokkos::deep_copy(Acopy, A); + //Run the SVD + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), + SerialSVDFunctor_Full(A, U, Vt, sigma, work)); + //Get the results back + auto Uhost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), U); + auto Vthost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Vt); + auto sigmaHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma); + //Verify the SVD is correct + verifySVD(Acopy, Uhost, Vthost, sigmaHost); +} + +template +void testSerialSVDSingularValuesOnly(int m, int n) +{ + using Matrix = Kokkos::View; + using Vector = Kokkos::View; + using ExecSpace = typename Device::execution_space; + Matrix A = createRandomMatrix(m, n, 0); + //Fill U, Vt, sigma with nonzeros as well to make sure they are properly overwritten + Matrix U("U", m, m); + Matrix Vt("Vt", n, n); + int maxrank = std::min(m, n); + Vector sigma1("sigma", maxrank); + Vector sigma2("sigma", maxrank); + Vector work("work", std::max(m, n)); + Kokkos::deep_copy(U, -5.0); + Kokkos::deep_copy(Vt, -5.0); + Kokkos::deep_copy(sigma1, -5.0); + Kokkos::deep_copy(sigma2, -7.0); + Kokkos::deep_copy(work, -5.0); + //Make a copy of A (before SVD) for verification, since the original will be overwritten + typename Matrix::HostMirror Acopy("Acopy", m, n); + Kokkos::deep_copy(Acopy, A); + //Run the SVD (full mode) + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), + SerialSVDFunctor_Full(A, U, Vt, sigma1, work)); + Kokkos::deep_copy(A, Acopy); + //Run the same SVD (singular values only mode) + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), + SerialSVDFunctor_SingularValuesOnly(A, sigma2, work)); + auto sigma1Host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma1); + auto sigma2Host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), sigma2); + //Make sure they match + for(int i = 0; i < maxrank; i++) + { + Test::EXPECT_NEAR_KK(sigma1Host(i), sigma2Host(i), Test::svdEpsilon()); + } +} + +//Test the bidiagonal n*n SVD step where the last diagonal entry is 0 +template +void testSerialSVDZeroLastRow(int n) +{ + //Generate a bidiagonal matrix + using Matrix = Kokkos::View; + using KAT = Kokkos::ArithTraits; + Matrix B = createRandomMatrix(n, n, 0, 1.0); + //Zero out entries to make B bidiagonal + for(int i = 0; i < n; i++) + { + for(int j = 0; j < n; j++) + { + if(i != j && i + 1 != j) + { + B(i, j) = KAT::zero(); + } + } + } + //Also zero out the final diagonal to test this routine + B(n - 1, n - 1) = KAT::zero(); + Matrix Vt("Vt", n, n); + KokkosBatched::SerialSetIdentity::invoke(Vt); + //Compute the initial product to make sure it's maintained by the routine + Matrix BVt("UBVt", n, n); + Test::vanillaGEMM(1.0, B, Vt, 0.0, BVt); + //Run the routine (just on host) + KokkosBatched::SerialSVDInternal::svdZeroLastColumn(B.data(), n, B.stride(0), B.stride(1), Vt.data(), Vt.stride(0), Vt.stride(1)); + //Check that B is still bidiagonal (to a tight tolerance, but not exactly zero) + for(int i = 0; i < n; i++) + { + for(int j = 0; j < n; j++) + { + if(i != j && i + 1 != j) + { + Test::EXPECT_NEAR_KK(B(i, j), KAT::zero(), Test::svdEpsilon()); + } + } + } + //Check that the last superdiagonal is now zero + Test::EXPECT_NEAR_KK(B(n - 2, n - 1), KAT::zero(), Test::svdEpsilon()); + //Check that the product is still maintained + Matrix BVt2("UBVt", n, n); + Test::vanillaGEMM(1.0, B, Vt, 0.0, BVt2); + for(int i = 0; i < n; i++) + { + for(int j = 0; j < n; j++) + { + Test::EXPECT_NEAR_KK(BVt(i, j), BVt2(i, j), Test::svdEpsilon()); + } + } + //Check that Vt is still orthogonal + verifyOrthogonal(Vt); +} + +//Test bidiagonal n*n SVD step where some diagonal i (not the last) is 0. +template +void testSerialSVDZeroDiagonal(int n, int row) +{ + //Generate a bidiagonal matrix + using Matrix = Kokkos::View; + using KAT = Kokkos::ArithTraits; + int m = n + 2; //Make U somewhat bigger to make sure the Givens transforms are applied correctly + Matrix B = createRandomMatrix(m, n, 0, 1.0); + //Zero out entries to make B bidiagonal + for(int i = 0; i < m; i++) + { + for(int j = 0; j < n; j++) + { + if(i != j && i + 1 != j) + { + B(i, j) = KAT::zero(); + } + } + } + //Also zero out a diagonal to test this routine + B(row, row) = KAT::zero(); + Matrix U("U", m, m); + KokkosBatched::SerialSetIdentity::invoke(U); + //Compute the initial product to make sure it's maintained by the routine + Matrix UB("UB", m, n); + Test::vanillaGEMM(1.0, U, B, 0.0, UB); + //Run the routine (just on host) + KokkosBatched::SerialSVDInternal::svdZeroRow(row, B.data(), n, B.stride(0), B.stride(1), U.data(), m, U.stride(0), U.stride(1)); + //Check that B is still bidiagonal (to a tight tolerance, but not exactly zero) + for(int i = 0; i < m; i++) + { + for(int j = 0; j < n; j++) + { + if(i != j && i + 1 != j) + { + Test::EXPECT_NEAR_KK(B(i, j), KAT::zero(), Test::svdEpsilon()); + } + } + } + //Check that row's diagonal is now zero + Test::EXPECT_NEAR_KK(B(row, row), KAT::zero(), Test::svdEpsilon()); + //Check that the product is still maintained + Matrix UB2("UB", m, n); + Test::vanillaGEMM(1.0, U, B, 0.0, UB2); + for(int i = 0; i < m; i++) + { + for(int j = 0; j < n; j++) + { + Test::EXPECT_NEAR_KK(UB(i, j), UB2(i, j), Test::svdEpsilon()); + } + } + //Check that U is still orthogonal + verifyOrthogonal(U); +} + +template +void testSVD() +{ + testSerialSVD(0, 0, 0); + testSerialSVD(1, 0, 0); + testSerialSVD(0, 1, 0); + testSerialSVD(2, 2, 0); + testSerialSVD(2, 2, 1); + testSerialSVD(10, 8, 0); + testSerialSVD(8, 10, 0); + testSerialSVD(10, 1, 0); + testSerialSVD(1, 10, 0); + testSerialSVD(10, 8, 3); + testSerialSVD(8, 10, 4); + //Test with all-zero matrix + testSerialSVD(8, 10, 0, 0.0); + //Test some important internal routines which are not called often + testSerialSVDZeroLastRow(10); + testSerialSVDZeroDiagonal(10, 3); + //Test the mode that just computes singular values + testSerialSVDSingularValuesOnly(10, 8); +} + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +TEST_F( TestCategory, batched_scalar_serial_svd_double ) { + //Test general SVD on a few different input sizes (full rank randomized) + testSVD(); + testSVD(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_FLOAT) +TEST_F( TestCategory, batched_scalar_serial_svd_float ) { + //Test general SVD on a few different input sizes (full rank randomized) + testSVD(); + testSVD(); +} +#endif +