Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for running GMRES example with half precision #1067

Merged
merged 1 commit into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions example/gmres/ex_real_A.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@
#include"gmres.hpp"

int main(int argc, char *argv[]) {

typedef double ST;
typedef int OT;
typedef double ST;
typedef int OT;
typedef Kokkos::DefaultExecutionSpace EXSP;

using ViewVectorType = Kokkos::View<ST*,Kokkos::LayoutLeft, EXSP>;
Expand Down Expand Up @@ -125,10 +124,10 @@ int main(int argc, char *argv[]) {
GmresStats solveStats = gmres<ST, Kokkos::LayoutLeft, EXSP>(A, B, X, solverOpts);

// Double check residuals at end of solve:
double nrmB = KokkosBlas::nrm2(B);
ST nrmB = KokkosBlas::nrm2(B);
KokkosSparse::spmv("N", 1.0, A, X, 0.0, Wj); // wj = Ax
KokkosBlas::axpy(-1.0, Wj, B); // b = b-Ax.
double endRes = KokkosBlas::nrm2(B)/nrmB;
ST endRes = KokkosBlas::nrm2(B)/nrmB;
std::cout << "=========================================" << std::endl;
std::cout << "Verify from main: Ending residual is " << endRes << std::endl;
std::cout << "Number of iterations is: " << solveStats.numIters << std::endl;
Expand Down
89 changes: 51 additions & 38 deletions example/gmres/gmres.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,22 @@
#include<KokkosBlas3_trsm.hpp>
#include<KokkosSparse_spmv.hpp>


////////////////////////////////////////////////////////////////////////////////
// libstdc++ half_t overloads
////////////////////////////////////////////////////////////////////////////////
#if !KOKKOS_HALF_T_IS_FLOAT
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm okay with putting this here, but would it make more sense in a separate file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should reside in kokkos core.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i will leave this here for now but remove it once we add it to kokkos core.

Kokkos::Experimental::half_t abs(Kokkos::Experimental::half_t arg) {
return arg < 0.0 ? -arg : arg;
}

Kokkos::complex<Kokkos::Experimental::half_t> abs(Kokkos::complex<Kokkos::Experimental::half_t> arg) {
return Kokkos::complex<Kokkos::Experimental::half_t>(abs(Kokkos::complex<double>((double) arg.real(), (double) arg.imag())));
}
#endif // KOKKOS_HALF_T_IS_FLOAT

// This struct is returned to the user to give solver
// statistics and convergence status.
// statistics and convergence status.
struct GmresStats {
int numIters;
double endRelRes;
Expand All @@ -71,7 +85,7 @@ struct GmresStats {
}
};

// This struct allows the user to pass in several
// This struct allows the user to pass in several
// options to the solver.
template< class ScalarType >
struct GmresOpts
Expand All @@ -96,12 +110,12 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
Kokkos::Profiling::pushRegion("GMRES::TotalTime:");
typedef Kokkos::Details::ArithTraits<ScalarType> AT;
typedef typename AT::val_type ST; // So this code will run with ScalarType = std::complex<T>.
typedef typename AT::mag_type MT;
typedef typename AT::mag_type MT;
ST one = AT::one();
ST zero = AT::zero();

typedef Kokkos::View<ST*, Layout, EXSP> ViewVectorType;
typedef Kokkos::View<ST*, Kokkos::LayoutRight, Kokkos::HostSpace> ViewHostVectorType;
typedef Kokkos::View<ST*, Kokkos::LayoutRight, Kokkos::HostSpace> ViewHostVectorType;
typedef Kokkos::View<ST**, Layout, EXSP> ViewMatrixType;

unsigned int n = A.numRows();
Expand All @@ -112,7 +126,7 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
// Check compatibility of dimensions at run time.
if ( n != unsigned(A.numCols()) ){
std::ostringstream os;
os << "gmres: A must be a square matrix: "
os << "gmres: A must be a square matrix: "
<< "numRows: " << n << " numCols: " << A.numCols();
Kokkos::Impl::throw_runtime_exception (os.str ());
}
Expand All @@ -121,7 +135,7 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
X.extent(0) != n ) {
std::ostringstream os;
os << "gmres: Dimensions of A, X, and B do not match: "
<< "A: " << n << " x " << n << ", X: " << X.extent(0)
<< "A: " << n << " x " << n << ", X: " << X.extent(0)
<< "x 1, B: " << B.extent(0) << " x 1";
Kokkos::Impl::throw_runtime_exception (os.str ());
}
Expand All @@ -134,42 +148,42 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
}

bool converged = false;
int cycle = 0; // How many times have we restarted?
int cycle = 0; // How many times have we restarted?
int numIters = 0; //Number of iterations within the cycle before convergence.
MT nrmB, trueRes, relRes, shortRelRes;
GmresStats myStats;

std::cout << "Convergence tolerance is: " << opts.tol << std::endl;

ViewVectorType Xiter("Xiter",n); //Intermediate solution at iterations before restart.
ViewVectorType Xiter("Xiter",n); //Intermediate solution at iterations before restart.
ViewVectorType Res(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Res"),n); //Residual vector
ViewVectorType Wj(Kokkos::view_alloc(Kokkos::WithoutInitializing, "W_j"),n); //Tmp work vector 1
ViewHostVectorType GVec_h(Kokkos::view_alloc(Kokkos::WithoutInitializing, "GVec"),m+1);
ViewMatrixType GLsSoln("GLsSoln",m,1);//LS solution vec for Givens Rotation. Must be 2-D for trsm.
typename ViewMatrixType::HostMirror GLsSoln_h = Kokkos::create_mirror_view(GLsSoln); //This one is needed for triangular solve.
ViewMatrixType GLsSoln("GLsSoln",m,1);//LS solution vec for Givens Rotation. Must be 2-D for trsm.
typename ViewMatrixType::HostMirror GLsSoln_h = Kokkos::create_mirror_view(GLsSoln); //This one is needed for triangular solve.
ViewHostVectorType CosVal_h("CosVal",m);
ViewHostVectorType SinVal_h("SinVal",m);
ViewMatrixType V(Kokkos::view_alloc(Kokkos::WithoutInitializing, "V"),n,m+1);
ViewMatrixType VSub; //Subview of 1st m cols for updating soln.
ViewVectorType orthoTmp(Kokkos::view_alloc(Kokkos::WithoutInitializing, "orthoTmp"),m);
ViewMatrixType VSub; //Subview of 1st m cols for updating soln.
ViewVectorType orthoTmp(Kokkos::view_alloc(Kokkos::WithoutInitializing, "orthoTmp"),m);

ViewMatrixType H("H",m+1,m); //H matrix on device. Also used in Arn Rec debug.
typename ViewMatrixType::HostMirror H_h = Kokkos::create_mirror_view(H); //Make H into a host view of H.
ViewMatrixType H("H",m+1,m); //H matrix on device. Also used in Arn Rec debug.
typename ViewMatrixType::HostMirror H_h = Kokkos::create_mirror_view(H); //Make H into a host view of H.

//Compute initial residuals:
nrmB = KokkosBlas::nrm2(B);
Kokkos::deep_copy(Res,B);
KokkosSparse::spmv("N", one, A, X, zero, Wj); // wj = Ax
KokkosBlas::axpy(-one, Wj, Res); // res = res-Wj = b-Ax.
KokkosBlas::axpy(-one, Wj, Res); // res = res-Wj = b-Ax.
trueRes = KokkosBlas::nrm2(Res);
relRes = trueRes/nrmB;
shortRelRes = relRes;

while( !converged && cycle <= opts.maxRestart){
GVec_h(0) = trueRes;

// Run Arnoldi iteration:
auto Vj = Kokkos::subview(V,Kokkos::ALL,0);
auto Vj = Kokkos::subview(V,Kokkos::ALL,0);
Kokkos::deep_copy(Vj,Res);
KokkosBlas::scal(Vj,one/trueRes,Vj); //V0 = V0/norm(V0)

Expand All @@ -178,21 +192,21 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
Kokkos::Profiling::pushRegion("GMRES::Orthog:");
if( opts.ortho == "MGS"){
for (int i = 0; i <= j; i++){
auto Vi = Kokkos::subview(V,Kokkos::ALL,i);
H_h(i,j) = KokkosBlas::dot(Vi,Wj); //Vi^* Wj
KokkosBlas::axpy(-H_h(i,j),Vi,Wj);//wj = wj-Hij*Vi
auto Vi = Kokkos::subview(V,Kokkos::ALL,i);
H_h(i,j) = KokkosBlas::dot(Vi,Wj); //Vi^* Wj
KokkosBlas::axpy(-H_h(i,j),Vi,Wj);//wj = wj-Hij*Vi
}
auto Hj_h = Kokkos::subview(H_h,Kokkos::make_pair(0,j+1) ,j);
}
else if( opts.ortho == "CGS2"){
auto V0j = Kokkos::subview(V,Kokkos::ALL,Kokkos::make_pair(0,j+1));
auto V0j = Kokkos::subview(V,Kokkos::ALL,Kokkos::make_pair(0,j+1));
auto Hj = Kokkos::subview(H,Kokkos::make_pair(0,j+1) ,j);
auto Hj_h = Kokkos::subview(H_h,Kokkos::make_pair(0,j+1) ,j);
KokkosBlas::gemv("C", one, V0j, Wj, zero, Hj); // Hj = Vj^T * wj
KokkosBlas::gemv("N", -one, V0j, Hj, one, Wj); // wj = wj - Vj * Hj

//Re-orthog CGS:
auto orthoTmpSub = Kokkos::subview(orthoTmp,Kokkos::make_pair(0,j+1));
auto orthoTmpSub = Kokkos::subview(orthoTmp,Kokkos::make_pair(0,j+1));
KokkosBlas::gemv("C", one, V0j, Wj, zero, orthoTmpSub); // tmp (Hj) = Vj^T * wj
KokkosBlas::gemv("N", -one, V0j, orthoTmpSub, one, Wj); // wj = wj - Vj * tmp
KokkosBlas::axpy(one, orthoTmpSub, Hj); // Hj = Hj + tmp
Expand All @@ -203,12 +217,12 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
}

MT tmpNrm = KokkosBlas::nrm2(Wj);
H_h(j+1,j) = tmpNrm;
if(tmpNrm < 1e-14){
throw std::runtime_error("GMRES lucky breakdown. Solver terminated without convergence.");
H_h(j+1,j) = tmpNrm;
if(tmpNrm < 1e-14){
throw std::runtime_error("GMRES lucky breakdown. Solver terminated without convergence.");
}

Vj = Kokkos::subview(V,Kokkos::ALL,j+1);
Vj = Kokkos::subview(V,Kokkos::ALL,j+1);
KokkosBlas::scal(Vj,one/H_h(j+1,j),Wj); // Wj = Vj/H(j+1,j)
Kokkos::Profiling::popRegion();

Expand All @@ -222,41 +236,41 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
}
ST f = H_h(j,j);
ST g = H_h(j+1,j);
MT f2 = AT::real(f)*AT::real(f) + AT::imag(f)*AT::imag(f);
MT f2 = AT::real(f)*AT::real(f) + AT::imag(f)*AT::imag(f);
MT g2 = AT::real(g)*AT::real(g) + AT::imag(g)*AT::imag(g);
ST fg2 = f2 + g2;
ST D1 = one / AT::sqrt(f2*fg2);
ST D1 = one / AT::sqrt(f2*fg2);
CosVal_h(j) = f2*D1;
fg2 = fg2 * D1;
H_h(j,j) = f*fg2;
SinVal_h(j) = f*D1*AT::conj(g);
H_h(j+1,j) = zero;
H_h(j+1,j) = zero;

GVec_h(j+1) = GVec_h(j)*(-AT::conj(SinVal_h(j)));
GVec_h(j) = GVec_h(j)*CosVal_h(j);
shortRelRes = abs(GVec_h(j+1))/nrmB;
shortRelRes = abs(GVec_h(j+1))/nrmB; // this abs is in libstdc++

std::cout << "Shortcut relative residual for iteration " << j+(cycle*m) << " is: " << shortRelRes << std::endl;

//If short residual converged, or time to restart, check true residual
if( shortRelRes < opts.tol || j == m-1 ) {
//Compute least squares soln with Givens rotation:
auto GLsSolnSub_h = Kokkos::subview(GLsSoln_h,Kokkos::ALL,0); //Original view has rank 2, need a rank 1 here.
auto GLsSolnSub_h = Kokkos::subview(GLsSoln_h,Kokkos::ALL,0); //Original view has rank 2, need a rank 1 here.
auto GVecSub_h = Kokkos::subview(GVec_h, Kokkos::make_pair(0,m));
Kokkos::deep_copy(GLsSolnSub_h, GVecSub_h); //Copy LS rhs vec for triangle solve.
auto GLsSolnSub2_h = Kokkos::subview(GLsSoln_h,Kokkos::make_pair(0,j+1),Kokkos::ALL);
auto H_Sub_h = Kokkos::subview(H_h, Kokkos::make_pair(0,j+1), Kokkos::make_pair(0,j+1));
auto H_Sub_h = Kokkos::subview(H_h, Kokkos::make_pair(0,j+1), Kokkos::make_pair(0,j+1));
KokkosBlas::trsm("L", "U", "N", "N", one, H_Sub_h, GLsSolnSub2_h); //GLsSoln = H\GLsSoln
Kokkos::deep_copy(GLsSoln, GLsSoln_h);

//Update solution and compute residual with Givens:
VSub = Kokkos::subview(V,Kokkos::ALL,Kokkos::make_pair(0,j+1));
VSub = Kokkos::subview(V,Kokkos::ALL,Kokkos::make_pair(0,j+1));
Kokkos::deep_copy(Xiter,X); //Can't overwrite X with intermediate solution.
auto GLsSolnSub3 = Kokkos::subview(GLsSoln,Kokkos::make_pair(0,j+1),0);
KokkosBlas::gemv ("N", one, VSub, GLsSolnSub3, one, Xiter); //x_iter = x + V(1:j+1)*lsSoln
KokkosSparse::spmv("N", one, A, Xiter, zero, Wj); // wj = Ax
Kokkos::deep_copy(Res,B); // Reset r=b.
KokkosBlas::axpy(-one, Wj, Res); // r = b-Ax.
KokkosBlas::axpy(-one, Wj, Res); // r = b-Ax.
trueRes = KokkosBlas::nrm2(Res);
relRes = trueRes/nrmB;
std::cout << "True relative residual for iteration " << j+(cycle*m) << " is : " << relRes << std::endl;
Expand All @@ -265,7 +279,7 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
if(relRes < opts.tol){
converged = true;
Kokkos::deep_copy(X, Xiter); //Final solution is the iteration solution.
break; //End Arnoldi iteration.
break; //End Arnoldi iteration.
}
}

Expand All @@ -278,7 +292,7 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
}

std::cout << "Ending relative residual is: " << relRes << std::endl;
myStats.endRelRes = relRes;
myStats.endRelRes = static_cast<double>(relRes);
if( converged ){
std::cout << "Solver converged! " << std::endl;
myStats.convFlagVal = GmresStats::FLAG::Conv;
Expand All @@ -297,4 +311,3 @@ template< class ScalarType, class Layout, class EXSP, class OrdinalType = int >
Kokkos::Profiling::popRegion();
return myStats;
}

66 changes: 36 additions & 30 deletions example/gmres/test_real_A.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,39 +54,40 @@
#include"gmres.hpp"

int main(int /*argc*/, char ** /*argv[]*/) {
typedef double ST;
typedef int OT;
typedef Kokkos::DefaultExecutionSpace EXSP;

typedef double ST;
typedef int OT;
typedef Kokkos::DefaultExecutionSpace EXSP;
// std::cout << "ST: " << typeid(ST).name() << std::endl;
// std::cout << "EXSP: " << typeid(EXSP).name() << std::endl;

using sp_matrix_type = KokkosSparse::CrsMatrix<ST, OT, EXSP>;
using ViewVectorType = Kokkos::View<ST*,Kokkos::LayoutLeft, EXSP>;
using sp_matrix_type = KokkosSparse::CrsMatrix<ST, OT, EXSP>;
using ViewVectorType = Kokkos::View<ST*, Kokkos::LayoutLeft, EXSP>;
typedef sp_matrix_type::non_const_ordinal_type ncOT;
typedef sp_matrix_type::const_ordinal_type cOT;
typedef sp_matrix_type::non_const_size_type ncST;

GmresOpts<ST> solverOpts;
solverOpts.ortho="CGS2"; //orthog type
solverOpts.m = 15; //Max subspace size before restarting.
solverOpts.ortho = "CGS2"; // orthog type
solverOpts.m = 15; // Max subspace size before restarting.
solverOpts.tol = 1e-10; //Relative residual convergence tolerance.
solverOpts.maxRestart = 50;
bool pass1 = false;
bool pass2 = false;

std::cout << "Convergence tolerance is: " << solverOpts.tol << std::endl;

//Initialize Kokkos AFTER parsing parameters:
// Initialize Kokkos AFTER parsing parameters:
Kokkos::initialize();
{

// Create a diagonally dominant sparse matrix to test:
ncST nnz;
cOT n = 5000;
cOT numRows = n;
cOT numCols = n;
cOT diagDominance = 1;
nnz = 10 * numRows;
sp_matrix_type A = KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix<sp_matrix_type>
sp_matrix_type A = KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix<sp_matrix_type>
(numRows, numCols, nnz, 0, ncOT(0.01 * numRows), diagDominance);

// Set initial vectors:
Expand All @@ -101,23 +102,25 @@ int main(int /*argc*/, char ** /*argv[]*/) {
GmresStats solveStats = gmres<ST, Kokkos::LayoutLeft, EXSP>(A, B, X, solverOpts);

// Double check residuals at end of solve:
double nrmB = KokkosBlas::nrm2(B);
KokkosSparse::spmv("N", 1.0, A, X, 0.0, Wj); // wj = Ax
KokkosBlas::axpy(-1.0, Wj, B); // b = b-Ax.
double nrmB = static_cast<double>(KokkosBlas::nrm2(B));
KokkosSparse::spmv("N", ST(1.0), A, X, ST(0.0), Wj); // wj = Ax
KokkosBlas::axpy(ST(-1.0), Wj, B); // b = b-Ax.
double endRes = KokkosBlas::nrm2(B)/nrmB;
std::cout << "=======================================" << std::endl;
std::cout << "Verify from main: Ending residual is " << endRes << std::endl;
std::cout << "Number of iterations is: " << solveStats.numIters << std::endl;
std::cout << "Diff of residual from main - residual from solver: " << solveStats.endRelRes - endRes << std::endl;
std::cout << "Convergence flag is : " << solveStats.convFlag() << std::endl;

if( solveStats.numIters < 40 && solveStats.numIters > 20 && endRes < solverOpts.tol){

if (solveStats.numIters < 40 && solveStats.numIters > 20 &&
endRes < static_cast<double>(solverOpts.tol)) {
std::cout << "Test CGS2 Passed!" << std::endl;
pass1 = true;
}
else{
std::cout << "Solver did not converge within the expected number of iterations. " << std::endl
<< "CGS2 Test Failed." << std::endl;
} else {
std::cout
<< "Solver did not converge within the expected number of iterations. "
<< std::endl
<< "CGS2 Test Failed." << std::endl;
}
std::cout << "=======================================" << std::endl << std::endl << std::endl;

Expand All @@ -129,29 +132,32 @@ int main(int /*argc*/, char ** /*argv[]*/) {
solveStats = gmres<ST, Kokkos::LayoutLeft, EXSP>(A, B, X, solverOpts);

// Double check residuals at end of solve:
nrmB = KokkosBlas::nrm2(B);
KokkosSparse::spmv("N", 1.0, A, X, 0.0, Wj); // wj = Ax
KokkosBlas::axpy(-1.0, Wj, B); // b = b-Ax.
nrmB = static_cast<double>(KokkosBlas::nrm2(B));
KokkosSparse::spmv("N", ST(1.0), A, X, ST(0.0), Wj); // wj = Ax
KokkosBlas::axpy(ST(-1.0), Wj, B); // b = b-Ax.
endRes = KokkosBlas::nrm2(B)/nrmB;
std::cout << "=======================================" << std::endl;
std::cout << "Verify from main: Ending residual is " << endRes << std::endl;
std::cout << "Number of iterations is: " << solveStats.numIters << std::endl;
std::cout << "Diff of residual from main - residual from solver: " << solveStats.endRelRes - endRes << std::endl;
std::cout << "Convergence flag is : " << solveStats.convFlag() << std::endl;

if( solveStats.numIters < 40 && solveStats.numIters > 20 && endRes < solverOpts.tol){

if (solveStats.numIters < 40 && solveStats.numIters > 20 &&
endRes < static_cast<double>(solverOpts.tol)) {
std::cout << "Test MGS Passed!" << std::endl;
if( pass1 ){ pass2 = true; };
if (pass1) {
pass2 = true;
};
} else {
std::cout
<< "Solver did not converge within the expected number of iterations. "
<< std::endl
<< "MGS Test Failed." << std::endl;
}
else{
std::cout << "Solver did not converge within the expected number of iterations. " << std::endl
<< "MGS Test Failed." << std::endl;
}
std::cout << "=======================================" << std::endl << std::endl << std::endl;

}
Kokkos::finalize();

return ( pass2 ? EXIT_SUCCESS : EXIT_FAILURE );
}

Loading