Skip to content

Commit

Permalink
Merge pull request QMCPACK#3514 from ye-luo/fix-omp-nocuda
Browse files Browse the repository at this point in the history
Fix omp offload without cuda
  • Loading branch information
ye-luo authored Oct 6, 2021
2 parents 319938f + 3a1fc75 commit f7874e4
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 29 deletions.
4 changes: 2 additions & 2 deletions src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ void DiracDeterminantBatched<DET_ENGINE>::invertPsiM(const DualMatrix<Value>& ps

template<typename DET_ENGINE>
void DiracDeterminantBatched<DET_ENGINE>::mw_invertPsiM(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
RefVector<const DualMatrix<Value>>& logdetT_list,
RefVector<DualMatrix<Value>>& a_inv_list,
const RefVector<const DualMatrix<Value>>& logdetT_list,
const RefVector<DualMatrix<Value>>& a_inv_list,
const std::vector<bool>& compute_mask) const
{
auto& wfc_leader = wfc_list.getCastedLeader<DiracDeterminantBatched<DET_ENGINE>>();
Expand Down
4 changes: 2 additions & 2 deletions src/QMCWaveFunctions/Fermion/DiracDeterminantBatched.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,8 @@ class DiracDeterminantBatched : public DiracDeterminantBase
* the compute mask. See future PR for those changes, or drop of compute_mask argument.
*/
void mw_invertPsiM(const RefVectorWithLeader<WaveFunctionComponent>& wfc_list,
RefVector<const DualMatrix<Value>>& logdetT_list,
RefVector<DualMatrix<Value>>& a_inv_list,
const RefVector<const DualMatrix<Value>>& logdetT_list,
const RefVector<DualMatrix<Value>>& a_inv_list,
const std::vector<bool>& compute_mask) const;

/** Does a Phi->mw_evaluate_notranspose then mw_invertPsiM over a set of
Expand Down
22 changes: 16 additions & 6 deletions src/QMCWaveFunctions/Fermion/DiracMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,14 @@ class DiracMatrix
* @tparam TMAT matrix value type
* @tparam TREAL real type
*/
template<typename TMAT, typename TREAL>
inline std::enable_if_t<std::is_same<T_FP, TMAT>::value> invert_transpose(const Matrix<TMAT>& amat,
Matrix<TMAT>& invMat,
template<typename TMAT,
typename ALLOC1,
typename ALLOC2,
typename TREAL,
typename = std::enable_if_t<qmc_allocator_traits<ALLOC1>::is_host_accessible>,
typename = std::enable_if_t<qmc_allocator_traits<ALLOC2>::is_host_accessible>>
inline std::enable_if_t<std::is_same<T_FP, TMAT>::value> invert_transpose(const Matrix<TMAT, ALLOC1>& amat,
Matrix<TMAT, ALLOC2>& invMat,
std::complex<TREAL>& LogDet)
{
const int n = invMat.rows();
Expand All @@ -196,9 +201,14 @@ class DiracMatrix
* @tparam TMAT matrix value type
* @tparam TREAL real type
*/
template<typename TMAT, typename TREAL>
inline std::enable_if_t<!std::is_same<T_FP, TMAT>::value> invert_transpose(const Matrix<TMAT>& amat,
Matrix<TMAT>& invMat,
template<typename TMAT,
typename ALLOC1,
typename ALLOC2,
typename TREAL,
typename = std::enable_if_t<qmc_allocator_traits<ALLOC1>::is_host_accessible>,
typename = std::enable_if_t<qmc_allocator_traits<ALLOC2>::is_host_accessible>>
inline std::enable_if_t<!std::is_same<T_FP, TMAT>::value> invert_transpose(const Matrix<TMAT, ALLOC1>& amat,
Matrix<TMAT, ALLOC2>& invMat,
std::complex<TREAL>& LogDet)
{
const int n = invMat.rows();
Expand Down
12 changes: 6 additions & 6 deletions src/QMCWaveFunctions/Fermion/DiracMatrixComputeCUDA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ class DiracMatrixComputeCUDA : public Resource
*/
template<typename TMAT>
inline void mw_computeInvertAndLog(CUDALinearAlgebraHandles& cuda_handles,
RefVector<DualMatrix<TMAT>>& a_mats,
RefVector<DualMatrix<TMAT>>& inv_a_mats,
const RefVector<DualMatrix<TMAT>>& a_mats,
const RefVector<DualMatrix<TMAT>>& inv_a_mats,
const int n,
const int lda,
DualVector<LogValue>& log_values)
Expand Down Expand Up @@ -276,8 +276,8 @@ class DiracMatrixComputeCUDA : public Resource
template<typename TMAT>
inline std::enable_if_t<!std::is_same<VALUE_FP, TMAT>::value> mw_invertTranspose(
CUDALinearAlgebraHandles& cuda_handles,
RefVector<DualMatrix<TMAT>>& a_mats,
RefVector<DualMatrix<TMAT>>& inv_a_mats,
const RefVector<DualMatrix<TMAT>>& a_mats,
const RefVector<DualMatrix<TMAT>>& inv_a_mats,
DualVector<LogValue>& log_values,
const std::vector<bool>& compute_mask)
{
Expand Down Expand Up @@ -319,8 +319,8 @@ class DiracMatrixComputeCUDA : public Resource
template<typename TMAT>
inline std::enable_if_t<std::is_same<VALUE_FP, TMAT>::value> mw_invertTranspose(
CUDALinearAlgebraHandles& cuda_handles,
RefVector<DualMatrix<TMAT>>& a_mats,
RefVector<DualMatrix<TMAT>>& inv_a_mats,
const RefVector<DualMatrix<TMAT>>& a_mats,
const RefVector<DualMatrix<TMAT>>& inv_a_mats,
DualVector<LogValue>& log_values,
const std::vector<bool>& compute_mask)
{
Expand Down
4 changes: 2 additions & 2 deletions src/QMCWaveFunctions/Fermion/DiracMatrixComputeOMPTarget.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ class DiracMatrixComputeOMPTarget : public Resource
*/
template<typename TMAT>
inline void mw_invertTranspose(HandleResource& resource,
RefVector<const OffloadPinnedMatrix<TMAT>>& a_mats,
RefVector<OffloadPinnedMatrix<TMAT>>& inv_a_mats,
const RefVector<const OffloadPinnedMatrix<TMAT>>& a_mats,
const RefVector<OffloadPinnedMatrix<TMAT>>& inv_a_mats,
OffloadPinnedVector<LogValue>& log_values,
const std::vector<bool>& recompute)
{
Expand Down
4 changes: 2 additions & 2 deletions src/QMCWaveFunctions/Fermion/MatrixDelayedUpdateCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,8 @@ class MatrixDelayedUpdateCUDA
}

static void mw_invertTranspose(const RefVectorWithLeader<This_t>& engines,
RefVector<const DualMatrix<Value>>& psiM_list,
RefVector<DualMatrix<Value>>& psiMinv_list,
const RefVector<const DualMatrix<Value>>& psiM_list,
const RefVector<DualMatrix<Value>>& psiMinv_list,
DualVector<LogValue>& log_values,
const std::vector<bool>& compute_mask)
{
Expand Down
20 changes: 11 additions & 9 deletions src/QMCWaveFunctions/Fermion/MatrixUpdateOMPTarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,24 +178,26 @@ class MatrixUpdateOMPTarget
}

static void mw_invertTranspose(const RefVectorWithLeader<This_t>& engines,
RefVector<const OffloadMatrix<Value>>& logdetT_list,
RefVector<OffloadMatrix<Value>>& a_inv_refs,
const RefVector<const OffloadMatrix<Value>>& psiM_list,
const RefVector<OffloadMatrix<Value>>& a_inv_refs,
OffloadVector<LogValue>& log_values,
const std::vector<bool>& compute_mask)
{
auto& engine_leader = engines.getLeader();
auto& det_inverter = engine_leader.get_det_inverter();

a_inv_refs.reserve(engines.size());

for (int iw = 0; iw < engines.size(); iw++)
{
a_inv_refs.emplace_back(engines[iw].get_ref_psiMinv());
const Value* a_inv_ptr = a_inv_refs.back().get().data();
PRAGMA_OFFLOAD("omp target update to(a_inv_ptr[:a_inv_refs.back().get().size()])")
auto& Ainv = a_inv_refs[iw].get();
engine_leader.detEng.invert_transpose(psiM_list[iw].get(), Ainv, log_values[iw]);
Value* Ainv_ptr = Ainv.data();
PRAGMA_OFFLOAD("omp target update to(Ainv_ptr[:Ainv.size()])")
}
typename DetInverter::HandleResource dummy;
det_inverter.mw_invertTranspose(dummy, logdetT_list, a_inv_refs, log_values, compute_mask);
PRAGMA_OFFLOAD("omp taskwait")

//FIXME DiracMatrixComputeOMPTarget is either broken or connected incorrectly
//typename DetInverter::HandleResource dummy;
//det_inverter.mw_invertTranspose(dummy, psiM_list, a_inv_refs, log_values, compute_mask);
}

template<typename GT>
Expand Down

0 comments on commit f7874e4

Please sign in to comment.