Skip to content

Commit

Permalink
Merge pull request kokkos#1155 from kliegeois/GMRES
Browse files Browse the repository at this point in the history
Add Batched CG and Batched GMRES
  • Loading branch information
lucbv authored Dec 3, 2021
2 parents 7ff0b5c + 8c3d535 commit 133b7fc
Show file tree
Hide file tree
Showing 62 changed files with 4,143 additions and 778 deletions.
27 changes: 15 additions & 12 deletions src/batched/dense/KokkosBatched_Axpy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ namespace KokkosBatched {
/// * y_1, ..., y_N are the N output vectors,
/// * alpha_1, ..., alpha_N are N scaling factors for x_1, ..., x_N.
///
/// \tparam ViewType: Input type for X and Y, needs to be a 2D view
/// \tparam XViewType: Input type for X, needs to be a 2D view
/// \tparam YViewType: Input type for Y, needs to be a 2D view
/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view
///
/// \param alpha [in]: input coefficient for X, a rank 1 view
Expand All @@ -68,10 +69,10 @@ namespace KokkosBatched {
///

struct SerialAxpy {
template <typename ViewType, typename alphaViewType>
template <typename XViewType, typename YViewType, typename alphaViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha,
const ViewType &X,
const ViewType &Y);
const XViewType &X,
const YViewType &Y);
};

/// \brief Team Batched AXPY:
Expand All @@ -82,7 +83,8 @@ struct SerialAxpy {
/// * y_1, ..., y_N are the N output vectors,
/// * alpha_1, ..., alpha_N are N scaling factors for x_1, ..., x_N.
///
/// \tparam ViewType: Input type for X and Y, needs to be a 2D view
/// \tparam XViewType: Input type for X, needs to be a 2D view
/// \tparam YViewType: Input type for Y, needs to be a 2D view
/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view
///
/// \param member [in]: TeamPolicy member
Expand All @@ -95,11 +97,11 @@ struct SerialAxpy {

template <typename MemberType>
struct TeamAxpy {
template <typename ViewType, typename alphaViewType>
template <typename XViewType, typename YViewType, typename alphaViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
const alphaViewType &alpha,
const ViewType &X,
const ViewType &Y);
const XViewType &X,
const YViewType &Y);
};

/// \brief TeamVector Batched AXPY:
Expand All @@ -110,7 +112,8 @@ struct TeamAxpy {
/// * y_1, ..., y_N are the N output vectors,
/// * alpha_1, ..., alpha_N are N scaling factors for x_1, ..., x_N.
///
/// \tparam ViewType: Input type for X and Y, needs to be a 2D view
/// \tparam XViewType: Input type for X, needs to be a 2D view
/// \tparam YViewType: Input type for Y, needs to be a 2D view
/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view
///
/// \param member [in]: TeamPolicy member
Expand All @@ -124,11 +127,11 @@ struct TeamAxpy {

template <typename MemberType>
struct TeamVectorAxpy {
template <typename ViewType, typename alphaViewType>
template <typename XViewType, typename YViewType, typename alphaViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
const alphaViewType &alpha,
const ViewType &X,
const ViewType &Y);
const XViewType &X,
const YViewType &Y);
};

} // namespace KokkosBatched
Expand Down
17 changes: 10 additions & 7 deletions src/batched/dense/KokkosBatched_Copy_Decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace KokkosBatched {
/// Serial Copy
///

template <typename ArgTrans>
template <typename ArgTrans, int rank = 2>
struct SerialCopy {
template <typename AViewType, typename BViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A,
Expand All @@ -22,7 +22,8 @@ struct SerialCopy {
/// Team Copy
///

template <typename MemberType, typename ArgTrans>
template <typename MemberType, typename ArgTrans = Trans::NoTranspose,
int rank = 2>
struct TeamCopy {
template <typename AViewType, typename BViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
Expand All @@ -34,7 +35,8 @@ struct TeamCopy {
/// TeamVector Copy
///

template <typename MemberType, typename ArgTrans>
template <typename MemberType, typename ArgTrans = Trans::NoTranspose,
int rank = 2>
struct TeamVectorCopy {
template <typename AViewType, typename BViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
Expand All @@ -45,19 +47,20 @@ struct TeamVectorCopy {
///
/// Selective Interface
///
template <typename MemberType, typename ArgTrans, typename ArgMode>
template <typename MemberType, typename ArgTrans, typename ArgMode,
int rank = 2>
struct Copy {
template <typename AViewType, typename BViewType>
KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member,
const AViewType &A,
const BViewType &B) {
int r_val = 0;
if (std::is_same<ArgMode, Mode::Serial>::value) {
r_val = SerialCopy<ArgTrans>::invoke(A, B);
r_val = SerialCopy<ArgTrans, rank>::invoke(A, B);
} else if (std::is_same<ArgMode, Mode::Team>::value) {
r_val = TeamCopy<MemberType, ArgTrans>::invoke(member, A, B);
r_val = TeamCopy<MemberType, ArgTrans, rank>::invoke(member, A, B);
} else if (std::is_same<ArgMode, Mode::TeamVector>::value) {
r_val = TeamVectorCopy<MemberType, ArgTrans>::invoke(member, A, B);
r_val = TeamVectorCopy<MemberType, ArgTrans, rank>::invoke(member, A, B);
}
return r_val;
}
Expand Down
161 changes: 161 additions & 0 deletions src/batched/dense/KokkosBatched_Dot.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.4
// Copyright (2021) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Siva Rajamanickam (srajama@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef __KOKKOSBATCHED_DOT_HPP__
#define __KOKKOSBATCHED_DOT_HPP__

/// \author Kim Liegeois (knliege@sandia.gov)

#include "KokkosBatched_Util.hpp"
#include "KokkosBatched_Vector.hpp"

namespace KokkosBatched {

/// \brief Serial Batched DOT:
///
/// Depending on the ArgTrans template, the dot product is
/// row-based (ArgTrans == Trans::NoTranspose):
///
/// dot_l <- (x_l:, y_l:) for all l = 1, ..., N
/// where:
/// * N is the second dimension of X.
///
/// Or column-based:
/// dot_l <- (x_:l, y_:l) for all l = 1, ..., n
/// where:
/// * n is the second dimension of X.
///
/// \tparam ArgTrans: type of dot product (Trans::NoTranspose by default)
/// \tparam XViewType: Input type for X, needs to be a 2D view
/// \tparam YViewType: Input type for Y, needs to be a 2D view
/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view
///
/// \param X [in]: Input vector X, a rank 2 view
/// \param Y [in]: Input vector Y, a rank 2 view
/// \param dot [out]: Computed dot product, a rank 1 view
///
/// No nested parallel_for is used inside of the function.
///

template <typename ArgTrans = Trans::NoTranspose>
struct SerialDot {
template <typename XViewType, typename YViewType, typename NormViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X,
const YViewType &Y,
const NormViewType &dot);
};

/// \brief Team Batched DOT:
///
/// Depending on the ArgTrans template, the dot product is
/// row-based (ArgTrans == Trans::NoTranspose):
///
/// dot_l <- (x_l:, y_l:) for all l = 1, ..., N
/// where:
/// * N is the second dimension of X.
///
/// Or column-based:
/// dot_l <- (x_:l, y_:l) for all l = 1, ..., n
/// where:
/// * n is the second dimension of X.
///
/// \tparam ArgTrans: type of dot product (Trans::NoTranspose by default)
/// \tparam XViewType: Input type for X, needs to be a 2D view
/// \tparam YViewType: Input type for Y, needs to be a 2D view
/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view
///
/// \param X [in]: Input vector X, a rank 2 view
/// \param Y [in]: Input vector Y, a rank 2 view
/// \param dot [out]: Computed dot product, a rank 1 view
///
/// A nested parallel_for with TeamThreadRange is used.
///

template <typename MemberType, typename ArgTrans = Trans::NoTranspose>
struct TeamDot {
template <typename XViewType, typename YViewType, typename NormViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
const XViewType &X,
const YViewType &Y,
const NormViewType &dot);
};

/// \brief TeamVector Batched DOT:
///
/// Depending on the ArgTrans template, the dot product is
/// row-based (ArgTrans == Trans::NoTranspose):
///
/// dot_l <- (x_l:, y_l:) for all l = 1, ..., N
/// where:
/// * N is the second dimension of X.
///
/// Or column-based:
/// dot_l <- (x_:l, y_:l) for all l = 1, ..., n
/// where:
/// * n is the second dimension of X.
///
/// \tparam ArgTrans: type of dot product (Trans::NoTranspose by default)
/// \tparam XViewType: Input type for X, needs to be a 2D view
/// \tparam YViewType: Input type for Y, needs to be a 2D view
/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view
///
/// \param X [in]: Input vector X, a rank 2 view
/// \param Y [in]: Input vector Y, a rank 2 view
/// \param dot [out]: Computed dot product, a rank 1 view
///
/// Two nested parallel_for with both TeamThreadRange and ThreadVectorRange
/// (or one with TeamVectorRange) are used inside.
///

template <typename MemberType, typename ArgTrans = Trans::NoTranspose>
struct TeamVectorDot {
template <typename XViewType, typename YViewType, typename NormViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
const XViewType &X,
const YViewType &Y,
const NormViewType &dot);
};

} // namespace KokkosBatched

#include "KokkosBatched_Dot_Internal.hpp"

#endif
Loading

0 comments on commit 133b7fc

Please sign in to comment.