kokkos · e10harvey · Jul 28, 2021 · Jul 23, 2021 · jennloe · Jul 27, 2021
diff --git a/example/gmres/ex_real_A.cpp b/example/gmres/ex_real_A.cpp
@@ -53,9 +53,8 @@
 #include"gmres.hpp"
 
 int main(int argc, char *argv[]) {
-
-  typedef double                            ST;
-  typedef int                               OT;
+  typedef double ST;
+  typedef int OT;
   typedef Kokkos::DefaultExecutionSpace     EXSP;
 
   using ViewVectorType = Kokkos::View<ST*,Kokkos::LayoutLeft, EXSP>;
@@ -125,10 +124,10 @@ int main(int argc, char *argv[]) {
   GmresStats solveStats = gmres<ST, Kokkos::LayoutLeft, EXSP>(A, B, X, solverOpts);
 
   // Double check residuals at end of solve:
-  double nrmB = KokkosBlas::nrm2(B);
+  ST nrmB = KokkosBlas::nrm2(B);
   KokkosSparse::spmv("N", 1.0, A, X, 0.0, Wj); // wj = Ax
   KokkosBlas::axpy(-1.0, Wj, B); // b = b-Ax. 
-  double endRes = KokkosBlas::nrm2(B)/nrmB;
+  ST endRes = KokkosBlas::nrm2(B)/nrmB;
   std::cout << "=========================================" << std::endl;
   std::cout << "Verify from main: Ending residual is " << endRes << std::endl;
   std::cout << "Number of iterations is: " << solveStats.numIters << std::endl;

diff --git a/example/gmres/gmres.hpp b/example/gmres/gmres.hpp
@@ -50,8 +50,22 @@
 #include<KokkosBlas3_trsm.hpp>
 #include<KokkosSparse_spmv.hpp>
 
+
+////////////////////////////////////////////////////////////////////////////////
+// libstdc++ half_t overloads
+////////////////////////////////////////////////////////////////////////////////
+#if !KOKKOS_HALF_T_IS_FLOAT
+Kokkos::Experimental::half_t abs(Kokkos::Experimental::half_t arg) {
+  return arg < 0.0 ? -arg : arg;
+}
+
+Kokkos::complex<Kokkos::Experimental::half_t> abs(Kokkos::complex<Kokkos::Experimental::half_t> arg) {
+  return Kokkos::complex<Kokkos::Experimental::half_t>(abs(Kokkos::complex<double>((double) arg.real(), (double) arg.imag())));
+}
+#endif  // KOKKOS_HALF_T_IS_FLOAT
+
 // This struct is returned to the user to give solver
-// statistics and convergence status. 
+// statistics and convergence status.
 struct GmresStats {
   int numIters;
   double endRelRes;
@@ -71,7 +85,7 @@ struct GmresStats {
   }
 };
 
-// This struct allows the user to pass in several 
+// This struct allows the user to pass in several
 // options to the solver. 
 template< class ScalarType > 
 struct GmresOpts
@@ -96,12 +110,12 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
   Kokkos::Profiling::pushRegion("GMRES::TotalTime:");
   typedef Kokkos::Details::ArithTraits<ScalarType> AT;
   typedef typename AT::val_type ST; // So this code will run with ScalarType = std::complex<T>.
-  typedef typename AT::mag_type MT; 
+  typedef typename AT::mag_type MT;
   ST one = AT::one();
   ST zero = AT::zero();
 
   typedef Kokkos::View<ST*, Layout, EXSP> ViewVectorType;
-  typedef Kokkos::View<ST*, Kokkos::LayoutRight, Kokkos::HostSpace> ViewHostVectorType; 
+  typedef Kokkos::View<ST*, Kokkos::LayoutRight, Kokkos::HostSpace> ViewHostVectorType;
   typedef Kokkos::View<ST**, Layout, EXSP> ViewMatrixType;
 
   unsigned int n = A.numRows();
@@ -112,7 +126,7 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
   // Check compatibility of dimensions at run time.
   if ( n != unsigned(A.numCols()) ){
     std::ostringstream os;
-    os << "gmres: A must be a square matrix: " 
+    os << "gmres: A must be a square matrix: "
       << "numRows: " << n << "  numCols: " << A.numCols();
       Kokkos::Impl::throw_runtime_exception (os.str ());
   }
@@ -121,7 +135,7 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
       X.extent(0) != n ) {
     std::ostringstream os;
     os << "gmres: Dimensions of A, X, and B do not match: "
-       << "A: " << n << " x " << n << ", X: " << X.extent(0) 
+       << "A: " << n << " x " << n << ", X: " << X.extent(0)
        << "x 1, B: " << B.extent(0) << " x 1";
     Kokkos::Impl::throw_runtime_exception (os.str ());
   }
@@ -134,42 +148,42 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
   }
 
   bool converged = false;
-  int cycle = 0; // How many times have we restarted? 
+  int cycle = 0; // How many times have we restarted?
   int numIters = 0;  //Number of iterations within the cycle before convergence.
   MT nrmB, trueRes, relRes, shortRelRes;
   GmresStats myStats;
-  
+
   std::cout << "Convergence tolerance is: " << opts.tol << std::endl;
 
-  ViewVectorType Xiter("Xiter",n); //Intermediate solution at iterations before restart. 
+  ViewVectorType Xiter("Xiter",n); //Intermediate solution at iterations before restart.
   ViewVectorType Res(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Res"),n); //Residual vector
   ViewVectorType Wj(Kokkos::view_alloc(Kokkos::WithoutInitializing, "W_j"),n); //Tmp work vector 1
   ViewHostVectorType GVec_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "GVec"),m+1);
-  ViewMatrixType GLsSoln("GLsSoln",m,1);//LS solution vec for Givens Rotation. Must be 2-D for trsm. 
-  typename ViewMatrixType::HostMirror GLsSoln_h = Kokkos::create_mirror_view(GLsSoln); //This one is needed for triangular solve. 
+  ViewMatrixType GLsSoln("GLsSoln",m,1);//LS solution vec for Givens Rotation. Must be 2-D for trsm.
+  typename ViewMatrixType::HostMirror GLsSoln_h = Kokkos::create_mirror_view(GLsSoln); //This one is needed for triangular solve.
   ViewHostVectorType CosVal_h("CosVal",m);
   ViewHostVectorType SinVal_h("SinVal",m);
   ViewMatrixType V(Kokkos::view_alloc(Kokkos::WithoutInitializing, "V"),n,m+1);
-  ViewMatrixType VSub; //Subview of 1st m cols for updating soln. 
-  ViewVectorType orthoTmp(Kokkos::view_alloc(Kokkos::WithoutInitializing, "orthoTmp"),m); 
+  ViewMatrixType VSub; //Subview of 1st m cols for updating soln.
+  ViewVectorType orthoTmp(Kokkos::view_alloc(Kokkos::WithoutInitializing, "orthoTmp"),m);
 
-  ViewMatrixType H("H",m+1,m); //H matrix on device. Also used in Arn Rec debug. 
-  typename ViewMatrixType::HostMirror H_h = Kokkos::create_mirror_view(H); //Make H into a host view of H. 
+  ViewMatrixType H("H",m+1,m); //H matrix on device. Also used in Arn Rec debug.
+  typename ViewMatrixType::HostMirror H_h = Kokkos::create_mirror_view(H); //Make H into a host view of H.
 
   //Compute initial residuals:
   nrmB = KokkosBlas::nrm2(B);
   Kokkos::deep_copy(Res,B);
   KokkosSparse::spmv("N", one, A, X, zero, Wj); // wj = Ax
-  KokkosBlas::axpy(-one, Wj, Res); // res = res-Wj = b-Ax. 
+  KokkosBlas::axpy(-one, Wj, Res); // res = res-Wj = b-Ax.
   trueRes = KokkosBlas::nrm2(Res);
   relRes = trueRes/nrmB;
   shortRelRes = relRes;
-    
+
   while( !converged && cycle <= opts.maxRestart){
     GVec_h(0) = trueRes;
 
     // Run Arnoldi iteration:
-    auto Vj = Kokkos::subview(V,Kokkos::ALL,0); 
+    auto Vj = Kokkos::subview(V,Kokkos::ALL,0);
     Kokkos::deep_copy(Vj,Res);
     KokkosBlas::scal(Vj,one/trueRes,Vj); //V0 = V0/norm(V0)
 
@@ -178,21 +192,21 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
       Kokkos::Profiling::pushRegion("GMRES::Orthog:");
       if( opts.ortho == "MGS"){
         for (int i = 0; i <= j; i++){
-          auto Vi = Kokkos::subview(V,Kokkos::ALL,i); 
-          H_h(i,j) = KokkosBlas::dot(Vi,Wj);  //Vi^* Wj  
-          KokkosBlas::axpy(-H_h(i,j),Vi,Wj);//wj = wj-Hij*Vi 
+          auto Vi = Kokkos::subview(V,Kokkos::ALL,i);
+          H_h(i,j) = KokkosBlas::dot(Vi,Wj);  //Vi^* Wj
+          KokkosBlas::axpy(-H_h(i,j),Vi,Wj);//wj = wj-Hij*Vi
         }
         auto Hj_h = Kokkos::subview(H_h,Kokkos::make_pair(0,j+1) ,j);
       }
       else if( opts.ortho == "CGS2"){
-        auto V0j = Kokkos::subview(V,Kokkos::ALL,Kokkos::make_pair(0,j+1)); 
+        auto V0j = Kokkos::subview(V,Kokkos::ALL,Kokkos::make_pair(0,j+1));
         auto Hj = Kokkos::subview(H,Kokkos::make_pair(0,j+1) ,j);
         auto Hj_h = Kokkos::subview(H_h,Kokkos::make_pair(0,j+1) ,j);
         KokkosBlas::gemv("C", one, V0j, Wj, zero, Hj); // Hj = Vj^T * wj
         KokkosBlas::gemv("N", -one, V0j, Hj, one, Wj); // wj = wj - Vj * Hj
 
         //Re-orthog CGS:
-        auto orthoTmpSub = Kokkos::subview(orthoTmp,Kokkos::make_pair(0,j+1)); 
+        auto orthoTmpSub = Kokkos::subview(orthoTmp,Kokkos::make_pair(0,j+1));
         KokkosBlas::gemv("C", one, V0j, Wj, zero, orthoTmpSub); // tmp (Hj) = Vj^T * wj
         KokkosBlas::gemv("N", -one, V0j, orthoTmpSub, one, Wj); // wj = wj - Vj * tmp 
         KokkosBlas::axpy(one, orthoTmpSub, Hj); // Hj = Hj + tmp
@@ -203,12 +217,12 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
       }
 
       MT tmpNrm = KokkosBlas::nrm2(Wj);
-      H_h(j+1,j) = tmpNrm; 
-      if(tmpNrm < 1e-14){ 
-        throw std::runtime_error("GMRES lucky breakdown. Solver terminated without convergence."); 
+      H_h(j+1,j) = tmpNrm;
+      if(tmpNrm < 1e-14){
+        throw std::runtime_error("GMRES lucky breakdown. Solver terminated without convergence.");
       }
 
-      Vj = Kokkos::subview(V,Kokkos::ALL,j+1); 
+      Vj = Kokkos::subview(V,Kokkos::ALL,j+1);
       KokkosBlas::scal(Vj,one/H_h(j+1,j),Wj); // Wj = Vj/H(j+1,j)
       Kokkos::Profiling::popRegion();
 
@@ -222,41 +236,41 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
       }
       ST f = H_h(j,j);
       ST g = H_h(j+1,j);
-      MT f2 = AT::real(f)*AT::real(f) + AT::imag(f)*AT::imag(f); 
+      MT f2 = AT::real(f)*AT::real(f) + AT::imag(f)*AT::imag(f);
       MT g2 = AT::real(g)*AT::real(g) + AT::imag(g)*AT::imag(g);
       ST fg2 = f2 + g2;
-      ST D1 = one / AT::sqrt(f2*fg2); 
+      ST D1 = one / AT::sqrt(f2*fg2);
       CosVal_h(j) = f2*D1;
       fg2 = fg2 * D1;
       H_h(j,j) = f*fg2;
       SinVal_h(j) = f*D1*AT::conj(g);
-      H_h(j+1,j) = zero; 
+      H_h(j+1,j) = zero;
 
       GVec_h(j+1) = GVec_h(j)*(-AT::conj(SinVal_h(j)));
       GVec_h(j) = GVec_h(j)*CosVal_h(j);
-      shortRelRes = abs(GVec_h(j+1))/nrmB;
+      shortRelRes = abs(GVec_h(j+1))/nrmB; // this abs is in libstdc++
 
       std::cout << "Shortcut relative residual for iteration " << j+(cycle*m) << " is: " << shortRelRes << std::endl;
 
       //If short residual converged, or time to restart, check true residual
       if( shortRelRes < opts.tol || j == m-1 ) {
         //Compute least squares soln with Givens rotation:
-        auto GLsSolnSub_h = Kokkos::subview(GLsSoln_h,Kokkos::ALL,0); //Original view has rank 2, need a rank 1 here. 
+        auto GLsSolnSub_h = Kokkos::subview(GLsSoln_h,Kokkos::ALL,0); //Original view has rank 2, need a rank 1 here.
         auto GVecSub_h = Kokkos::subview(GVec_h, Kokkos::make_pair(0,m));
         Kokkos::deep_copy(GLsSolnSub_h, GVecSub_h); //Copy LS rhs vec for triangle solve.
         auto GLsSolnSub2_h = Kokkos::subview(GLsSoln_h,Kokkos::make_pair(0,j+1),Kokkos::ALL);
-        auto H_Sub_h = Kokkos::subview(H_h, Kokkos::make_pair(0,j+1), Kokkos::make_pair(0,j+1)); 
+        auto H_Sub_h = Kokkos::subview(H_h, Kokkos::make_pair(0,j+1), Kokkos::make_pair(0,j+1));
         KokkosBlas::trsm("L", "U", "N", "N", one, H_Sub_h, GLsSolnSub2_h); //GLsSoln = H\GLsSoln
         Kokkos::deep_copy(GLsSoln, GLsSoln_h);
 
         //Update solution and compute residual with Givens:
-        VSub = Kokkos::subview(V,Kokkos::ALL,Kokkos::make_pair(0,j+1)); 
+        VSub = Kokkos::subview(V,Kokkos::ALL,Kokkos::make_pair(0,j+1));
         Kokkos::deep_copy(Xiter,X); //Can't overwrite X with intermediate solution.
         auto GLsSolnSub3 = Kokkos::subview(GLsSoln,Kokkos::make_pair(0,j+1),0);
         KokkosBlas::gemv ("N", one, VSub, GLsSolnSub3, one, Xiter); //x_iter = x + V(1:j+1)*lsSoln
         KokkosSparse::spmv("N", one, A, Xiter, zero, Wj); // wj = Ax
         Kokkos::deep_copy(Res,B); // Reset r=b.
-        KokkosBlas::axpy(-one, Wj, Res); // r = b-Ax. 
+        KokkosBlas::axpy(-one, Wj, Res); // r = b-Ax.
         trueRes = KokkosBlas::nrm2(Res);
         relRes = trueRes/nrmB;
         std::cout << "True relative residual for iteration " << j+(cycle*m) << " is : " << relRes << std::endl;
@@ -265,7 +279,7 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
         if(relRes < opts.tol){
           converged = true;
           Kokkos::deep_copy(X, Xiter); //Final solution is the iteration solution.
-          break; //End Arnoldi iteration. 
+          break; //End Arnoldi iteration.
         }
       }
 
@@ -278,7 +292,7 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
   }
 
   std::cout << "Ending relative residual is: " << relRes << std::endl;
-  myStats.endRelRes = relRes;
+  myStats.endRelRes = static_cast<double>(relRes);
   if( converged ){
     std::cout << "Solver converged! " << std::endl;
     myStats.convFlagVal = GmresStats::FLAG::Conv;
@@ -297,4 +311,3 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
   Kokkos::Profiling::popRegion();
   return myStats;
 }
-
diff --git a/example/gmres/test_real_A.cpp b/example/gmres/test_real_A.cpp
@@ -54,39 +54,40 @@
 #include"gmres.hpp"
 
 int main(int /*argc*/, char ** /*argv[]*/) {
+  typedef double ST;
+  typedef int OT;
+  typedef Kokkos::DefaultExecutionSpace EXSP;
 
-  typedef double                             ST;
-  typedef int                               OT;
-  typedef Kokkos::DefaultExecutionSpace     EXSP;
+  //  std::cout << "ST: " << typeid(ST).name() << std::endl;
+  //  std::cout << "EXSP: " << typeid(EXSP).name() << std::endl;
 
-  using sp_matrix_type = KokkosSparse::CrsMatrix<ST, OT, EXSP>; 
-  using ViewVectorType = Kokkos::View<ST*,Kokkos::LayoutLeft, EXSP>;
+  using sp_matrix_type = KokkosSparse::CrsMatrix<ST, OT, EXSP>;
+  using ViewVectorType = Kokkos::View<ST*, Kokkos::LayoutLeft, EXSP>;
   typedef sp_matrix_type::non_const_ordinal_type ncOT;
   typedef sp_matrix_type::const_ordinal_type cOT;
   typedef sp_matrix_type::non_const_size_type ncST;
 
   GmresOpts<ST> solverOpts;
-  solverOpts.ortho="CGS2"; //orthog type
-  solverOpts.m = 15; //Max subspace size before restarting.
+  solverOpts.ortho      = "CGS2";  // orthog type
+  solverOpts.m          = 15;      // Max subspace size before restarting.
   solverOpts.tol = 1e-10; //Relative residual convergence tolerance.
   solverOpts.maxRestart = 50;
   bool pass1 = false;
   bool pass2 = false;
 
   std::cout << "Convergence tolerance is: " << solverOpts.tol << std::endl;
 
-  //Initialize Kokkos AFTER parsing parameters:
+  // Initialize Kokkos AFTER parsing parameters:
   Kokkos::initialize();
   {
-
   // Create a diagonally dominant sparse matrix to test:
   ncST nnz;
   cOT n = 5000;
   cOT numRows = n;
   cOT numCols = n;
   cOT diagDominance = 1;
   nnz = 10 * numRows;
-  sp_matrix_type A = KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix<sp_matrix_type> 
+  sp_matrix_type A = KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix<sp_matrix_type>
                                                 (numRows, numCols, nnz, 0, ncOT(0.01 * numRows), diagDominance);
 
   // Set initial vectors:
@@ -101,23 +102,25 @@ int main(int /*argc*/, char ** /*argv[]*/) {
   GmresStats solveStats = gmres<ST, Kokkos::LayoutLeft, EXSP>(A, B, X, solverOpts);
 
   // Double check residuals at end of solve:
-  double nrmB = KokkosBlas::nrm2(B);
-  KokkosSparse::spmv("N", 1.0, A, X, 0.0, Wj); // wj = Ax
-  KokkosBlas::axpy(-1.0, Wj, B); // b = b-Ax. 
+  double nrmB = static_cast<double>(KokkosBlas::nrm2(B));
+  KokkosSparse::spmv("N", ST(1.0), A, X, ST(0.0), Wj); // wj = Ax
+  KokkosBlas::axpy(ST(-1.0), Wj, B); // b = b-Ax.
   double endRes = KokkosBlas::nrm2(B)/nrmB;
   std::cout << "=======================================" << std::endl;
   std::cout << "Verify from main: Ending residual is " << endRes << std::endl;
   std::cout << "Number of iterations is: " << solveStats.numIters << std::endl;
   std::cout << "Diff of residual from main - residual from solver: " << solveStats.endRelRes - endRes << std::endl;
   std::cout << "Convergence flag is : " << solveStats.convFlag() << std::endl;
-
-  if( solveStats.numIters < 40 && solveStats.numIters > 20 && endRes < solverOpts.tol){
+
+  if (solveStats.numIters < 40 && solveStats.numIters > 20 &&
+      endRes < static_cast<double>(solverOpts.tol)) {
     std::cout << "Test CGS2 Passed!" << std::endl;
     pass1 = true;
-  }
-  else{
-    std::cout << "Solver did not converge within the expected number of iterations. " << std::endl
-              << "CGS2 Test Failed." << std::endl;
+  } else {
+    std::cout
+        << "Solver did not converge within the expected number of iterations. "
+        << std::endl
+        << "CGS2 Test Failed." << std::endl;
   }
   std::cout << "=======================================" << std::endl << std::endl << std::endl;
 
@@ -129,29 +132,32 @@ int main(int /*argc*/, char ** /*argv[]*/) {
   solveStats = gmres<ST, Kokkos::LayoutLeft, EXSP>(A, B, X, solverOpts);
 
   // Double check residuals at end of solve:
-  nrmB = KokkosBlas::nrm2(B);
-  KokkosSparse::spmv("N", 1.0, A, X, 0.0, Wj); // wj = Ax
-  KokkosBlas::axpy(-1.0, Wj, B); // b = b-Ax. 
+  nrmB = static_cast<double>(KokkosBlas::nrm2(B));
+  KokkosSparse::spmv("N", ST(1.0), A, X, ST(0.0), Wj); // wj = Ax
+  KokkosBlas::axpy(ST(-1.0), Wj, B); // b = b-Ax.
   endRes = KokkosBlas::nrm2(B)/nrmB;
   std::cout << "=======================================" << std::endl;
   std::cout << "Verify from main: Ending residual is " << endRes << std::endl;
   std::cout << "Number of iterations is: " << solveStats.numIters << std::endl;
   std::cout << "Diff of residual from main - residual from solver: " << solveStats.endRelRes - endRes << std::endl;
   std::cout << "Convergence flag is : " << solveStats.convFlag() << std::endl;
-
-  if( solveStats.numIters < 40 && solveStats.numIters > 20 && endRes < solverOpts.tol){
+
+  if (solveStats.numIters < 40 && solveStats.numIters > 20 &&
+      endRes < static_cast<double>(solverOpts.tol)) {
     std::cout << "Test MGS Passed!" << std::endl;
-    if( pass1 ){ pass2 = true; };
+    if (pass1) {
+      pass2 = true;
+    };
+  } else {
+    std::cout
+        << "Solver did not converge within the expected number of iterations. "
+        << std::endl
+        << "MGS Test Failed." << std::endl;
   }
-  else{
-    std::cout << "Solver did not converge within the expected number of iterations. " << std::endl
-              << "MGS Test Failed." << std::endl;
-      }
   std::cout << "=======================================" << std::endl << std::endl << std::endl;
 
   }
   Kokkos::finalize();
 
   return ( pass2 ? EXIT_SUCCESS : EXIT_FAILURE );
 }
-