Skip to content

Commit

Permalink
Merge pull request #306 from kokkos/getri
Browse files Browse the repository at this point in the history
GETRI implementation
  • Loading branch information
ndellingwood authored Oct 12, 2018
2 parents 428efff + b8247b2 commit 69e8469
Show file tree
Hide file tree
Showing 23 changed files with 831 additions and 0 deletions.
39 changes: 39 additions & 0 deletions src/batched/KokkosBatched_InverseLU_Decl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef __KOKKOSBATCHED_INVERSELU_DECL_HPP__
#define __KOKKOSBATCHED_INVERSELU_DECL_HPP__


/// \author Vinh Dang (vqdang@sandia.gov)

#include "KokkosBatched_Vector.hpp"

namespace KokkosBatched {
namespace Experimental {

template<typename ArgAlgo>
struct SerialInverseLU {
// no piv version
template<typename AViewType,
typename WViewType>
KOKKOS_INLINE_FUNCTION
static int
invoke(const AViewType &A,
const WViewType &W);
};

template<typename MemberType,
typename ArgAlgo>
struct TeamInverseLU {
// no piv version
template<typename AViewType,
typename WViewType>
KOKKOS_INLINE_FUNCTION
static int
invoke(const MemberType &member,
const AViewType &A,
const WViewType &W);
};

}
}

#endif
160 changes: 160 additions & 0 deletions src/batched/KokkosBatched_InverseLU_Serial_Impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#ifndef __KOKKOSBATCHED_INVERSELU_SERIAL_IMPL_HPP__
#define __KOKKOSBATCHED_INVERSELU_SERIAL_IMPL_HPP__


/// \author Vinh Dang (vqdang@sandia.gov)

#include "KokkosBatched_Util.hpp"
#include "KokkosBatched_Trsm_Decl.hpp"
#include "KokkosBatched_Trsm_Serial_Impl.hpp"

namespace KokkosBatched {
namespace Experimental {
///
/// Serial Impl
/// =========

///
/// InverseLU no piv
///

#if \
defined(__KOKKOSBATCHED_INTEL_MKL__) && \
defined(__KOKKOSBATCHED_INTEL_MKL_BATCHED__) && \
defined(__KOKKOSBATCHED_INTEL_MKL_COMPACT_BATCHED__)
template<>
template<typename AViewType,
typename WViewType>
KOKKOS_INLINE_FUNCTION
int
SerialInverseLU<Algo::InverseLU::CompactMKL>::
invoke(const AViewType &A,
const WViewType &W) {
typedef typename AViewType::value_type vector_type;
//typedef typename vector_type::value_type value_type;

const int
m = A.extent(0),
n = A.extent(1);

static_assert(is_vector<vector_type>::value, "value type is not vector type");
static_assert(vector_type::vector_length == 4 || vector_type::vector_length == 8,
"AVX, AVX2 and AVX512 is supported");
static_assert(AViewType::rank == 2, "A should have two dimensions");
static_assert(WViewType::rank == 1, "W should have one dimension");
static_assert(std::is_same<typename AViewType::memory_space, typename WViewType::memory_space>::value, "A and W should be on the same memory space");
static_assert(!std::is_same<typename WViewType::array_layout, Kokkos::LayoutStride>::value, "W should be an contiguous 1D array");
assert(A.extent(0)*A.extent(1)*sizeof(typename AViewType::value_type) <= W.span()*sizeof(typename WViewType::value_type));
assert(m==n);

const MKL_COMPACT_PACK format = vector_type::vector_length == 8 ? MKL_COMPACT_AVX512 : MKL_COMPACT_AVX;

int r_val = 0;
if (A.stride(0) == 1) {
mkl_dgetrinp_compact (MKL_COL_MAJOR, n,
(double*)A.data(), A.stride(1),
(double*)W.data(), (MKL_INT)(n*n*vector_type::vector_length),
(MKL_INT*)&r_val, format, (MKL_INT)vector_type::vector_length);

} else if (A.stride(1) == 1) {
mkl_dgetrinp_compact (MKL_ROW_MAJOR, n,
(double*)A.data(), A.stride(0),
(double*)W.data(), (MKL_INT)(n*n*vector_type::vector_length),
(MKL_INT*)&r_val, format, (MKL_INT)vector_type::vector_length);
} else {
r_val = -1;
}
return r_val;
}
#endif

template<>
template<typename AViewType,
typename WViewType>
KOKKOS_INLINE_FUNCTION
int
SerialInverseLU<Algo::InverseLU::Unblocked>::
invoke(const AViewType &A,
const WViewType &W) {
static_assert(AViewType::rank == 2, "A should have two dimensions");
static_assert(WViewType::rank == 1, "W should have one dimension");
static_assert(std::is_same<typename AViewType::memory_space, typename WViewType::memory_space>::value, "A and W should be on the same memory space");
static_assert(!std::is_same<typename WViewType::array_layout, Kokkos::LayoutStride>::value, "W should be an contiguous 1D array");
assert(A.extent(0)*A.extent(1)*sizeof(typename AViewType::value_type) <= W.span()*sizeof(typename WViewType::value_type));
assert(A.extent(0)==A.extent(1));

typedef typename AViewType::value_type ScalarType;

auto B = Kokkos::View<ScalarType**, Kokkos::LayoutLeft, typename WViewType::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >(W.data(), A.extent(0), A.extent(1));

const ScalarType one(1.0);

#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
#pragma unroll
#endif
for (size_t i=0;i<A.extent(1);++i) {
B(i,i) = one;
}

//First, compute L inverse by solving the system L*Linv = I for Linv
SerialTrsm<Side::Left,Uplo::Lower,Trans::NoTranspose,Diag::Unit,Algo::Trsm::Unblocked>::invoke(one, A, B);
//Second, compute A inverse by solving the system U*Ainv = Linv for Ainv
SerialTrsm<Side::Left,Uplo::Upper,Trans::NoTranspose,Diag::NonUnit,Algo::Trsm::Unblocked>::invoke(one, A, B);

#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
#pragma unroll
#endif
for (size_t i=0;i<A.extent(0);++i)
for (size_t j=0;j<A.extent(1);++j)
A(i,j) = B(i,j);

return 0;
}

template<>
template<typename AViewType,
typename WViewType>
KOKKOS_INLINE_FUNCTION
int
SerialInverseLU<Algo::InverseLU::Blocked>::
invoke(const AViewType &A,
const WViewType &W) {
static_assert(AViewType::rank == 2, "A should have two dimensions");
static_assert(WViewType::rank == 1, "W should have one dimension");
static_assert(std::is_same<typename AViewType::memory_space, typename WViewType::memory_space>::value, "A and W should be on the same memory space");
static_assert(!std::is_same<typename WViewType::array_layout, Kokkos::LayoutStride>::value, "W should be an contiguous 1D array");
assert(A.extent(0)*A.extent(1)*sizeof(typename AViewType::value_type) <= W.span()*sizeof(typename WViewType::value_type));
assert(A.extent(0)==A.extent(1));

typedef typename AViewType::value_type ScalarType;

auto B = Kokkos::View<ScalarType**, Kokkos::LayoutLeft, typename WViewType::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >(W.data(), A.extent(0), A.extent(1));

const ScalarType one(1.0);

#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
#pragma unroll
#endif
for (size_t i=0;i<A.extent(1);++i) {
B(i,i) = one;
}

//First, compute L inverse by solving the system L*Linv = I for Linv
SerialTrsm<Side::Left,Uplo::Lower,Trans::NoTranspose,Diag::Unit,Algo::Trsm::Blocked>::invoke(one, A, B);
//Second, compute A inverse by solving the system U*Ainv = Linv for Ainv
SerialTrsm<Side::Left,Uplo::Upper,Trans::NoTranspose,Diag::NonUnit,Algo::Trsm::Blocked>::invoke(one, A, B);

#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
#pragma unroll
#endif
for (size_t i=0;i<A.extent(0);++i)
for (size_t j=0;j<A.extent(1);++j)
A(i,j) = B(i,j);

return 0;
}

}
}

#endif
102 changes: 102 additions & 0 deletions src/batched/KokkosBatched_InverseLU_Team_Impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#ifndef __KOKKOSBATCHED_INVERSELU_TEAM_IMPL_HPP__
#define __KOKKOSBATCHED_INVERSELU_TEAM_IMPL_HPP__


/// \author Vinh Dang (vqdang@sandia.gov)

#include "KokkosBatched_Util.hpp"
#include "KokkosBatched_Trsm_Decl.hpp"
#include "KokkosBatched_Trsm_Team_Impl.hpp"

namespace KokkosBatched {
namespace Experimental {
///
/// Team Impl
/// =========

///
/// InverseLU no piv
///

template<typename MemberType>
struct TeamInverseLU<MemberType,Algo::InverseLU::Unblocked> {
template<typename AViewType,
typename WViewType>
KOKKOS_INLINE_FUNCTION
static int
invoke(const MemberType &member, const AViewType &A, const WViewType &W) {
static_assert(AViewType::rank == 2, "A should have two dimensions");
static_assert(WViewType::rank == 1, "W should have one dimension");
static_assert(std::is_same<typename AViewType::memory_space, typename WViewType::memory_space>::value, "A and W should be on the same memory space");
static_assert(!std::is_same<typename WViewType::array_layout, Kokkos::LayoutStride>::value, "W should be an contiguous 1D array");
assert(A.extent(0)*A.extent(1)*sizeof(typename AViewType::value_type) <= W.span()*sizeof(typename WViewType::value_type));
assert(A.extent(0)==A.extent(1));

typedef typename AViewType::value_type ScalarType;

auto B = Kokkos::View<ScalarType**, Kokkos::LayoutLeft, typename WViewType::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >(W.data(), A.extent(0), A.extent(1));

const ScalarType one(1.0);

Kokkos::parallel_for(Kokkos::TeamThreadRange(member,A.extent(1)),[&](const int &i) {
B(i,i) = one;
});

//First, compute L inverse by solving the system L*Linv = I for Linv
TeamTrsm<MemberType,Side::Left,Uplo::Lower,Trans::NoTranspose,Diag::Unit,Algo::Trsm::Unblocked>::invoke(member, one, A, B);
//Second, compute A inverse by solving the system U*Ainv = Linv for Ainv
TeamTrsm<MemberType,Side::Left,Uplo::Upper,Trans::NoTranspose,Diag::NonUnit,Algo::Trsm::Unblocked>::invoke(member, one, A, B);

Kokkos::parallel_for(Kokkos::TeamThreadRange(member,A.extent(0)*A.extent(1)),[&](const int &tid) {
int i = tid/A.extent(1);
int j = tid%A.extent(1);
A(i,j) = B(i,j);
});

return 0;
}
};

template<typename MemberType>
struct TeamInverseLU<MemberType,Algo::InverseLU::Blocked> {
template<typename AViewType,
typename WViewType>
KOKKOS_INLINE_FUNCTION
static int
invoke(const MemberType &member, const AViewType &A, const WViewType &W) {
static_assert(AViewType::rank == 2, "A should have two dimensions");
static_assert(WViewType::rank == 1, "W should have one dimension");
static_assert(std::is_same<typename AViewType::memory_space, typename WViewType::memory_space>::value, "A and W should be on the same memory space");
static_assert(!std::is_same<typename WViewType::array_layout, Kokkos::LayoutStride>::value, "W should be an contiguous 1D array");
assert(A.extent(0)*A.extent(1)*sizeof(typename AViewType::value_type) <= W.span()*sizeof(typename WViewType::value_type));
assert(A.extent(0)==A.extent(1));

typedef typename AViewType::value_type ScalarType;

auto B = Kokkos::View<ScalarType**, Kokkos::LayoutLeft, typename WViewType::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >(W.data(), A.extent(0), A.extent(1));

const ScalarType one(1.0);

Kokkos::parallel_for(Kokkos::TeamThreadRange(member,A.extent(1)),[&](const int &i) {
B(i,i) = one;
});

//First, compute L inverse by solving the system L*Linv = I for Linv
TeamTrsm<MemberType,Side::Left,Uplo::Lower,Trans::NoTranspose,Diag::Unit,Algo::Trsm::Blocked>::invoke(member, one, A, B);
//Second, compute A inverse by solving the system U*Ainv = Linv for Ainv
TeamTrsm<MemberType,Side::Left,Uplo::Upper,Trans::NoTranspose,Diag::NonUnit,Algo::Trsm::Blocked>::invoke(member, one, A, B);

Kokkos::parallel_for(Kokkos::TeamThreadRange(member,A.extent(0)*A.extent(1)),[&](const int &tid) {
int i = tid/A.extent(1);
int j = tid%A.extent(1);
A(i,j) = B(i,j);
});

return 0;
}
};

}
}

#endif
1 change: 1 addition & 0 deletions src/batched/KokkosBatched_Util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ namespace KokkosBatched {
using Gemm = Level3;
using Trsm = Level3;
using LU = Level3;
using InverseLU = Level3;

struct Level2 {
struct Unblocked {};
Expand Down
12 changes: 12 additions & 0 deletions unit_test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ ifeq ($(KOKKOSKERNELS_INTERNAL_TEST_OPENMP), 1)
OBJ_OPENMP += Test_OpenMP_Batched_TeamLU_Real.o
OBJ_OPENMP += Test_OpenMP_Batched_TeamGemv_Real.o
OBJ_OPENMP += Test_OpenMP_Batched_TeamTrsv_Real.o
OBJ_OPENMP += Test_OpenMP_Batched_SerialInverseLU_Real.o
OBJ_OPENMP += Test_OpenMP_Batched_TeamInverseLU_Real.o
# Complex
OBJ_OPENMP += Test_OpenMP_Batched_SerialMatUtil_Complex.o
OBJ_OPENMP += Test_OpenMP_Batched_SerialGemm_Complex.o
Expand All @@ -154,6 +156,8 @@ ifeq ($(KOKKOSKERNELS_INTERNAL_TEST_OPENMP), 1)
OBJ_OPENMP += Test_OpenMP_Batched_TeamLU_Complex.o
OBJ_OPENMP += Test_OpenMP_Batched_TeamGemv_Complex.o
OBJ_OPENMP += Test_OpenMP_Batched_TeamTrsv_Complex.o
OBJ_OPENMP += Test_OpenMP_Batched_SerialInverseLU_Complex.o
OBJ_OPENMP += Test_OpenMP_Batched_TeamInverseLU_Complex.o
# Vector
OBJ_OPENMP += Test_OpenMP_Batched_VectorArithmatic.o
OBJ_OPENMP += Test_OpenMP_Batched_VectorMath.o
Expand Down Expand Up @@ -243,6 +247,8 @@ ifeq ($(KOKKOSKERNELS_INTERNAL_TEST_CUDA), 1)
OBJ_CUDA += Test_Cuda_Batched_TeamLU_Real.o
OBJ_CUDA += Test_Cuda_Batched_TeamGemv_Real.o
OBJ_CUDA += Test_Cuda_Batched_TeamTrsv_Real.o
OBJ_CUDA += Test_Cuda_Batched_SerialInverseLU_Real.o
OBJ_CUDA += Test_Cuda_Batched_TeamInverseLU_Real.o
# Complex
OBJ_CUDA += Test_Cuda_Batched_SerialMatUtil_Complex.o
OBJ_CUDA += Test_Cuda_Batched_SerialGemm_Complex.o
Expand All @@ -256,6 +262,8 @@ ifeq ($(KOKKOSKERNELS_INTERNAL_TEST_CUDA), 1)
OBJ_CUDA += Test_Cuda_Batched_TeamLU_Complex.o
OBJ_CUDA += Test_Cuda_Batched_TeamGemv_Complex.o
OBJ_CUDA += Test_Cuda_Batched_TeamTrsv_Complex.o
OBJ_CUDA += Test_Cuda_Batched_SerialInverseLU_Complex.o
OBJ_CUDA += Test_Cuda_Batched_TeamInverseLU_Complex.o
TARGETS += KokkosKernels_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
Expand Down Expand Up @@ -340,6 +348,8 @@ ifeq ($(KOKKOSKERNELS_INTERNAL_TEST_SERIAL), 1)
OBJ_SERIAL += Test_Serial_Batched_TeamLU_Real.o
OBJ_SERIAL += Test_Serial_Batched_TeamGemv_Real.o
OBJ_SERIAL += Test_Serial_Batched_TeamTrsv_Real.o
OBJ_SERIAL += Test_Serial_Batched_SerialInverseLU_Real.o
OBJ_SERIAL += Test_Serial_Batched_TeamInverseLU_Real.o
# Complex
OBJ_SERIAL += Test_Serial_Batched_SerialMatUtil_Complex.o
OBJ_SERIAL += Test_Serial_Batched_SerialGemm_Complex.o
Expand All @@ -353,6 +363,8 @@ ifeq ($(KOKKOSKERNELS_INTERNAL_TEST_SERIAL), 1)
OBJ_SERIAL += Test_Serial_Batched_TeamLU_Complex.o
OBJ_SERIAL += Test_Serial_Batched_TeamGemv_Complex.o
OBJ_SERIAL += Test_Serial_Batched_TeamTrsv_Complex.o
OBJ_SERIAL += Test_Serial_Batched_SerialInverseLU_Complex.o
OBJ_SERIAL += Test_Serial_Batched_TeamInverseLU_Complex.o
# Vector
OBJ_SERIAL += Test_Serial_Batched_VectorArithmatic.o
OBJ_SERIAL += Test_Serial_Batched_VectorMath.o
Expand Down
Loading

0 comments on commit 69e8469

Please sign in to comment.