Skip to content

Commit

Permalink
Update of the GMRES PR
Browse files Browse the repository at this point in the history
  • Loading branch information
kliegeois committed Dec 2, 2021
1 parent df2b9a5 commit 8c3d535
Show file tree
Hide file tree
Showing 44 changed files with 366 additions and 774 deletions.
12 changes: 7 additions & 5 deletions src/batched/dense/impl/KokkosBatched_Dot_Internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ struct SerialDotInternal {
// j \in [0,n), i \in [0,m)
// C(j) = conj(A(:,j))*B(:,j)
template <typename ValueType, typename MagnitudeType>
KOKKOS_FORCEINLINE_FUNCTION static int invoke(
const int m, const int n, const ValueType *__restrict__ A, const int as0,
const int as1, const ValueType *__restrict__ B, const int bs0,
const int bs1,
/* */ MagnitudeType *__restrict__ C, const int cs) {
KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n,
const ValueType *__restrict__ A,
const int as0, const int as1,
const ValueType *__restrict__ B,
const int bs0, const int bs1,
/* */ MagnitudeType *__restrict__ C,
const int cs) {
for (int j = 0; j < n; ++j)
invoke(m, A + j * as1, as0, B + j * bs1, bs0, C + j * cs);
return 0;
Expand Down
4 changes: 2 additions & 2 deletions src/batched/sparse/KokkosBatched_CG.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@

/// \brief Batched CG: Selective Interface
///
/// \tparam OperatorType: The type of the opertator of the system
/// \tparam OperatorType: The type of the operator of the system
/// \tparam VectorViewType: Input type for the right-hand side and the solution,
/// needs to be a 2D view
///
/// \param member [in]: TeamPolicy member
/// \param A [in]: batched operator (can be a batched matrix or a (left or right
/// or both) preconditioned batched matrix) \param B [in]: right-hand side, a
/// rank 2 view \param X [in/out]: initial guess and solutin, a rank 2 view
/// rank 2 view \param X [in/out]: initial guess and solution, a rank 2 view
/// \param handle [in]: a handle which provides different information such as
/// the tolerance or the maximal number of iterations of the solver.

Expand Down
12 changes: 9 additions & 3 deletions src/batched/sparse/KokkosBatched_CrsMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,24 @@ class CrsMatrix {
IntViewType row_ptr;
IntViewType colIndices;
int n_operators;
int n_rows;
int n_colums;

public:
KOKKOS_INLINE_FUNCTION
CrsMatrix(const ValuesViewType &_values, const IntViewType &_row_ptr,
const IntViewType &_colIndices)
: values(_values), row_ptr(_row_ptr), colIndices(_colIndices) {
n_operators = _values.extent(0);
n_rows = _row_ptr.extent(0) - 1;
n_colums = n_rows;
}

KOKKOS_INLINE_FUNCTION
~CrsMatrix() {}

/// \brief apply
/// \brief apply version that uses constant coefficients alpha and beta
///
/// y_l <- alpha * A_l * x_l + beta * y_l for all l = 1, ..., N
/// where:
/// * N is the number of matrices,
Expand Down Expand Up @@ -115,7 +121,7 @@ class CrsMatrix {
member, alpha, values, row_ptr, colIndices, X, beta, Y);
}

/// \brief apply
/// \brief apply version that uses variable coefficient alpha and no beta
/// y_l <- alpha_l * A_l * x_l for all l = 1, ..., N
/// where:
/// * N is the number of matrices,
Expand Down Expand Up @@ -147,7 +153,7 @@ class CrsMatrix {
Y);
}

/// \brief apply
/// \brief apply version that uses variable coefficients alpha and beta
/// y_l <- alpha_l * A_l * x_l + beta_l * y_l for all l = 1, ..., N
/// where:
/// * N is the number of matrices,
Expand Down
4 changes: 2 additions & 2 deletions src/batched/sparse/KokkosBatched_GMRES.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@

/// \brief Batched GMRES: Selective Interface
///
/// \tparam OperatorType: The type of the opertator of the system
/// \tparam OperatorType: The type of the operator of the system
/// \tparam VectorViewType: Input type for the right-hand side and the solution,
/// needs to be a 2D view
///
/// \param member [in]: TeamPolicy member
/// \param A [in]: batched operator (can be a batched matrix or a (left or right
/// or both) preconditioned batched matrix) \param B [in]: right-hand side, a
/// rank 2 view \param X [in/out]: initial guess and solutin, a rank 2 view
/// rank 2 view \param X [in/out]: initial guess and solution, a rank 2 view
/// \param handle [in]: a handle which provides different information such as
/// the tolerance or the maximal number of iterations of the solver.

Expand Down
12 changes: 6 additions & 6 deletions src/batched/sparse/impl/KokkosBatched_CG_TeamVector_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ struct TeamVectorCG {
typedef int OrdinalType;
typedef typename Kokkos::Details::ArithTraits<
typename VectorViewType::non_const_value_type>::mag_type MagnitudeType;
typedef Kokkos::View<MagnitudeType*, Kokkos::LayoutLeft,
typename VectorViewType::device_type>
NormViewType;

const size_t maximum_iteration = handle->get_max_iteration();
const MagnitudeType tolerance = handle->get_tolerance();
Expand Down Expand Up @@ -104,9 +101,6 @@ struct TeamVectorCG {
// Deep copy of b into r_0:
TeamVectorCopy<MemberType>::invoke(member, _B, R);

Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices),
[&](const OrdinalType& i) { mask(i) = 1.; });

// r_0 := b - A x_0
member.team_barrier();
A.template apply<MemberType, ScratchPadVectorViewType,
Expand All @@ -120,6 +114,12 @@ struct TeamVectorCG {
TeamVectorDot<MemberType>::invoke(member, R, R, sqr_norm_0);
member.team_barrier();

Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices),
[&](const OrdinalType& i) {
mask(i) =
sqr_norm_0(i) > tolerance * tolerance ? 1. : 0;
});

TeamVectorCopy1D::invoke(member, sqr_norm_0, sqr_norm_j);

int status = 1;
Expand Down
14 changes: 7 additions & 7 deletions src/batched/sparse/impl/KokkosBatched_CG_Team_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,8 @@ struct TeamCG {
typedef int OrdinalType;
typedef typename Kokkos::Details::ArithTraits<
typename VectorViewType::non_const_value_type>::mag_type MagnitudeType;
typedef Kokkos::View<MagnitudeType*, Kokkos::LayoutLeft,
typename VectorViewType::device_type>
NormViewType;

int maximum_iteration = handle->get_max_iteration();
size_t maximum_iteration = handle->get_max_iteration();
const MagnitudeType tolerance = handle->get_tolerance();

using ScratchPadNormViewType = Kokkos::View<
Expand Down Expand Up @@ -103,9 +100,6 @@ struct TeamCG {
// Deep copy of b into r_0:
TeamCopy<MemberType>::invoke(member, _B, R);

Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices),
[&](const OrdinalType& i) { mask(i) = 1.; });

// r_0 := b - A x_0
member.team_barrier();
A.template apply<MemberType, ScratchPadVectorViewType,
Expand All @@ -119,6 +113,12 @@ struct TeamCG {
TeamDot<MemberType>::invoke(member, R, R, sqr_norm_0);
member.team_barrier();

Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices),
[&](const OrdinalType& i) {
mask(i) =
sqr_norm_0(i) > tolerance * tolerance ? 1. : 0;
});

TeamCopy1D::invoke(member, sqr_norm_0, sqr_norm_j);

int status = 1;
Expand Down
94 changes: 48 additions & 46 deletions src/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ struct TeamVectorGMRES {
typedef typename Kokkos::Details::ArithTraits<
typename VectorViewType::non_const_value_type>::mag_type MagnitudeType;
typedef Kokkos::Details::ArithTraits<MagnitudeType> ATM;
typedef Kokkos::View<MagnitudeType*, Kokkos::LayoutLeft,
typename VectorViewType::device_type>
NormViewType;

int maximum_iteration = handle->get_max_iteration();
const MagnitudeType tolerance = handle->get_tolerance();

using ScratchPadNormViewType = Kokkos::View<
MagnitudeType*,
Expand All @@ -96,14 +90,20 @@ struct TeamVectorGMRES {
const OrdinalType numMatrices = _X.extent(0);
const OrdinalType numRows = _X.extent(1);

size_t maximum_iteration = handle->get_max_iteration() < numRows
? handle->get_max_iteration()
: numRows;
const MagnitudeType tolerance = handle->get_tolerance();
const MagnitudeType max_tolerance = 0.;

ScratchPadMultiVectorViewType V(member.team_scratch(1), numMatrices,
maximum_iteration + 1, numRows);
ScratchPadMultiVectorViewType H(member.team_scratch(1), numMatrices,
maximum_iteration + 1, maximum_iteration);
ScratchPadMultiVectorViewType Givens(member.team_scratch(1), numMatrices,
maximum_iteration, 2);
ScratchPadVectorViewType G(member.team_scratch(1), numMatrices,
maximum_iteration);
maximum_iteration + 1);

ScratchPadVectorViewType W(member.team_scratch(0), numMatrices, numRows);
ScratchPadVectorViewType Q(member.team_scratch(0), numMatrices, numRows);
Expand Down Expand Up @@ -134,8 +134,8 @@ struct TeamVectorGMRES {
Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices),
[&](const OrdinalType& i) {
beta(i) = ATM::sqrt(beta(i));
G(i, 0) = beta(i);
tmp(i) = 1. / beta(i);
G(i, 0) = beta(i) > max_tolerance ? beta(i) : 0.;
tmp(i) = beta(i) > max_tolerance ? 1. / beta(i) : 0.;
});

Kokkos::parallel_for(
Expand Down Expand Up @@ -166,21 +166,22 @@ struct TeamVectorGMRES {
TeamVectorCopy1D::invoke(member, tmp,
Kokkos::subview(H, Kokkos::ALL, i, j));

Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices),
[&](const OrdinalType& i) { tmp(i) = -tmp(i); });
Kokkos::parallel_for(
Kokkos::TeamVectorRange(member, 0, numMatrices),
[&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); });

TeamVectorAxpy<MemberType>::invoke(member, tmp, V_i, W);
}

TeamVectorDot<MemberType>::invoke(member, W, W, tmp);
member.team_barrier();
Kokkos::parallel_for(
Kokkos::TeamVectorRange(member, 0, numMatrices),
[&](const OrdinalType& i) { tmp(i) = ATM::sqrt(tmp(i)); });
[&](const OrdinalType& i) {
H(i, j + 1, j) = ATM::sqrt(tmp(i));
tmp(i) = H(i, j + 1, j) > max_tolerance ? 1. / H(i, j + 1, j) : 0.;
});
member.team_barrier();
TeamVectorCopy1D::invoke(member, tmp,
Kokkos::subview(H, Kokkos::ALL, j + 1, j));
Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices),
[&](const OrdinalType& i) { tmp(i) = 1. / tmp(i); });
Kokkos::parallel_for(
Kokkos::TeamVectorRange(member, 0, numMatrices * numRows),
[&](const OrdinalType& iTemp) {
Expand All @@ -196,38 +197,39 @@ struct TeamVectorGMRES {
// Apply the previous Givens rotations:
auto H_j = Kokkos::subview(H, l, Kokkos::ALL, j);

for (size_t i = 0; i < j; ++i) {
if (mask(l) == 1.) {
for (size_t i = 0; i < j; ++i) {
auto tmp1 =
Givens(l, i, 0) * H_j(i) + Givens(l, i, 1) * H_j(i + 1);
auto tmp2 =
-Givens(l, i, 1) * H_j(i) + Givens(l, i, 0) * H_j(i + 1);
H_j(i) = tmp1;
H_j(i + 1) = tmp2;
}

// Compute the new Givens rotation:
Kokkos::pair<typename VectorViewType::non_const_value_type,
typename VectorViewType::non_const_value_type>
G_new;
typename VectorViewType::non_const_value_type alpha;
SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha);

Givens(l, j, 0) = G_new.first;
Givens(l, j, 1) = G_new.second;

// Apply the new Givens rotation:
auto tmp1 =
Givens(l, i, 0) * H_j(i) + Givens(l, i, 1) * H_j(i + 1);
Givens(l, j, 0) * H_j(j) + Givens(l, j, 1) * H_j(j + 1);
auto tmp2 =
-Givens(l, i, 1) * H_j(i) + Givens(l, i, 0) * H_j(i + 1);
H_j(i) = tmp1;
H_j(i + 1) = tmp2;
}

// Compute the new Givens rotation:
Kokkos::pair<typename VectorViewType::non_const_value_type,
typename VectorViewType::non_const_value_type>
G_new;
typename VectorViewType::non_const_value_type alpha;
SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha);

Givens(l, j, 0) = G_new.first;
Givens(l, j, 1) = G_new.second;

// Apply the new Givens rotation:
auto tmp1 = Givens(l, j, 0) * H_j(j) + Givens(l, j, 1) * H_j(j + 1);
auto tmp2 =
-Givens(l, j, 1) * H_j(j) + Givens(l, j, 0) * H_j(j + 1);
H_j(j) = tmp1;
H_j(j + 1) = tmp2;

G(l, j + 1) = -Givens(l, j, 1) * G(l, j);
G(l, j) *= Givens(l, j, 0);

if (mask(l) == 0.) {
H_j(j) = 1.;
G(l, j) = 0.;
-Givens(l, j, 1) * H_j(j) + Givens(l, j, 0) * H_j(j + 1);
H_j(j) = tmp1;
H_j(j + 1) = tmp2;

G(l, j + 1) = -Givens(l, j, 1) * G(l, j);
G(l, j) *= Givens(l, j, 0);
} else {
H_j(j) = 1.;
G(l, j + 1) = 0.;
}

if (mask(l) == 1. && std::abs(G(l, j + 1)) / beta(l) < tolerance) {
Expand Down
Loading

0 comments on commit 8c3d535

Please sign in to comment.