Skip to content

Commit

Permalink
Merge branch 'develop' into lingering-noasan
Browse files Browse the repository at this point in the history
  • Loading branch information
prckent authored Jul 27, 2021
2 parents 8db9ab6 + bdef538 commit 9012718
Show file tree
Hide file tree
Showing 44 changed files with 1,720 additions and 590 deletions.
8 changes: 6 additions & 2 deletions src/Particle/DTModes.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,17 @@ enum class DTModes : uint_fast8_t
* Optimization can be implemented during forward PbyP move when the full table is not needed all the time.
* DT consumers should know if full table is needed or not and request via addTable.
*/
NEED_FULL_TABLE_ANYTIME = 0x1,
NEED_FULL_TABLE_ANYTIME = 0x1,
/** whether temporary data set on the host is updated or not when a move is proposed.
* Considering transferring data from accelerator to host is relatively expensive,
* only request this when data on host is needed for unoptimized code path.
* This flag affects three subroutines mw_move, mw_updatePartial, mw_finalizePbyP in DistanceTableData.
*/
NEED_TEMP_DATA_ON_HOST = 0x2
NEED_TEMP_DATA_ON_HOST = 0x2,
/** skip data transfer back to host after mw_evalaute full distance table.
* this optimization can be used for distance table consumed directly on the device without copying back to the host.
*/
MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST = 0x4
};

constexpr bool operator&(DTModes x, DTModes y)
Expand Down
22 changes: 18 additions & 4 deletions src/Particle/DistanceTableData.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,20 @@ class DistanceTableData
///returns the number of source particles
inline IndexType sources() const { return N_sources; }

/// return multi_walker full distance table data ptr
virtual const RealType* getMultiWalkerDataPtr() const
{
throw std::runtime_error(name_ + " multi waler data pointer not supported");
return nullptr;
}

/// return stride of per target pctl data. full table data = stride * num of target particles
virtual size_t getPerTargetPctlStrideSize() const
{
throw std::runtime_error(name_ + " getPerTargetPctlStrideSize not supported");
return 0;
}

/** return full table distances
*/
const std::vector<DistRow>& getDistances() const { return distances_; }
Expand All @@ -145,15 +159,15 @@ class DistanceTableData
*/
virtual const DistRow& getOldDists() const
{
APP_ABORT("DistanceTableData::getOldDists is used incorrectly! Contact developers on github.");
throw std::runtime_error("DistanceTableData::getOldDists is used incorrectly! Contact developers on github.");
return temp_r_; // dummy return to avoid compiler warning.
}

/** return old displacements set up by move() for optimized distance table consumers
*/
virtual const DisplRow& getOldDispls() const
{
APP_ABORT("DistanceTableData::getOldDispls is used incorrectly! Contact developers on github.");
throw std::runtime_error("DistanceTableData::getOldDispls is used incorrectly! Contact developers on github.");
return temp_dr_; // dummy return to avoid compiler warning.
}

Expand Down Expand Up @@ -293,13 +307,13 @@ class DistanceTableData
*/
virtual int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const
{
APP_ABORT("DistanceTableData::get_first_neighbor is not implemented in calling base class");
throw std::runtime_error("DistanceTableData::get_first_neighbor is not implemented in calling base class");
return 0;
}

inline void print(std::ostream& os)
{
APP_ABORT("DistanceTableData::print is not supported")
throw std::runtime_error("DistanceTableData::print is not supported");
//os << "Table " << Origin->getName() << std::endl;
//for (int i = 0; i < r_m.size(); i++)
// os << r_m[i] << " ";
Expand Down
28 changes: 23 additions & 5 deletions src/Particle/SoaDistanceTableABOMPTarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance

// initialize memory containers and views
const int N_sources_padded = getAlignedSize<T>(N_sources);
const int stride_size = N_sources_padded * (D + 1);
const int stride_size = getPerTargetPctlStrideSize();
r_dr_memorypool_.resize(stride_size * N_targets);

distances_.resize(N_targets);
Expand Down Expand Up @@ -175,6 +175,15 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
}
}

const T* getMultiWalkerDataPtr() const override
{
if (!mw_mem_)
throw std::runtime_error("SoaDistanceTableABOMPTarget mw_mem_ is nullptr");
return mw_mem_->mw_r_dr.data();
}

size_t getPerTargetPctlStrideSize() const override { return getAlignedSize<T>(N_sources) * (D + 1); }

/** evaluate the full table */
inline void evaluate(ParticleSet& P) override
{
Expand All @@ -199,7 +208,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
// To maximize thread usage, the loop over electrons is chunked. Each chunk is sent to an OpenMP offload thread team.
const int ChunkSizePerTeam = 256;
const int num_teams = (N_sources + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
const size_t stride_size = N_sources_padded * (D + 1);
const size_t stride_size = getPerTargetPctlStrideSize();

{
ScopedTimer offload(offload_timer_);
Expand Down Expand Up @@ -258,8 +267,8 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
const int N_sources_padded = getAlignedSize<T>(N_sources);

#ifndef NDEBUG
const int stride_size = N_sources_padded * (D + 1);
count_targets = 0;
const int stride_size = getPerTargetPctlStrideSize();
count_targets = 0;
for (size_t iw = 0; iw < dt_list.size(); iw++)
{
auto& dt = dt_list.getCastedElement<SoaDistanceTableABOMPTarget>(iw);
Expand Down Expand Up @@ -316,7 +325,7 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
ScopedTimer offload(dt_leader.offload_timer_);
PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(total_targets*num_teams) \
map(always, to: input_ptr[:offload_input.size()]) \
map(always, from: r_dr_ptr[:mw_r_dr.size()])")
depend(out:r_dr_ptr[:mw_r_dr.size()]) nowait")
for (int iat = 0; iat < total_targets; ++iat)
for (int team_id = 0; team_id < num_teams; team_id++)
{
Expand All @@ -339,6 +348,15 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
DTD_BConds<T, D, SC>::computeDistancesOffload(pos, source_pos_ptr, r_iat_ptr, dr_iat_ptr, N_sources_padded,
iel);
}

if (!(modes_ & DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST))
{
PRAGMA_OFFLOAD(
"omp target update from(r_dr_ptr[:mw_r_dr.size()]) depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait")
}
// wait for computing and (optional) transfering back to host.
// It can potentially be moved to ParticleSet to fuse multiple similar taskwait
PRAGMA_OFFLOAD("omp taskwait")
}
}

Expand Down
88 changes: 79 additions & 9 deletions src/Particle/VirtualParticleSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
Expand All @@ -20,9 +20,23 @@
#include "Particle/DistanceTableData.h"
#include "Particle/createDistanceTable.h"
#include "QMCHamiltonians/NLPPJob.h"
#include "ResourceCollection.h"

namespace qmcplusplus
{

struct VPMultiWalkerMem : public Resource
{
/// multi walker reference particle
Vector<int, OffloadPinnedAllocator<int>> mw_refPctls;

VPMultiWalkerMem() : Resource("VPMultiWalkerMem") {}

VPMultiWalkerMem(const VPMultiWalkerMem&) : VPMultiWalkerMem() {}

Resource* makeClone() const override { return new VPMultiWalkerMem(*this); }
};

VirtualParticleSet::VirtualParticleSet(const ParticleSet& p, int nptcl) : refPS(p)
{
setName("virtual");
Expand All @@ -38,6 +52,48 @@ VirtualParticleSet::VirtualParticleSet(const ParticleSet& p, int nptcl) : refPS(
addTable(refPS.getDistTable(i).origin());
}

VirtualParticleSet::~VirtualParticleSet() = default;

Vector<int, OffloadPinnedAllocator<int>>& VirtualParticleSet::getMultiWalkerRefPctls()
{
assert(mw_mem_ != nullptr);
return mw_mem_->mw_refPctls;
}

const Vector<int, OffloadPinnedAllocator<int>>& VirtualParticleSet::getMultiWalkerRefPctls() const
{
assert(mw_mem_ != nullptr);
return mw_mem_->mw_refPctls;
}

void VirtualParticleSet::createResource(ResourceCollection& collection) const
{
collection.addResource(std::make_unique<VPMultiWalkerMem>());

ParticleSet::createResource(collection);
}

void VirtualParticleSet::acquireResource(ResourceCollection& collection,
const RefVectorWithLeader<VirtualParticleSet>& vp_list)
{
auto& vp_leader = vp_list.getLeader();
auto res_ptr = dynamic_cast<VPMultiWalkerMem*>(collection.lendResource().release());
if (!res_ptr)
throw std::runtime_error("VirtualParticleSet::acquireResource dynamic_cast failed");
vp_leader.mw_mem_.reset(res_ptr);

auto p_list = RefVectorWithLeaderParticleSet(vp_list);
ParticleSet::acquireResource(collection, p_list);
}

void VirtualParticleSet::releaseResource(ResourceCollection& collection,
const RefVectorWithLeader<VirtualParticleSet>& vp_list)
{
collection.takebackResource(std::move(vp_list.getLeader().mw_mem_));
auto p_list = RefVectorWithLeaderParticleSet(vp_list);
ParticleSet::releaseResource(collection, p_list);
}

/// move virtual particles to new postions and update distance tables
void VirtualParticleSet::makeMoves(int jel,
const PosType& ref_pos,
Expand All @@ -46,7 +102,8 @@ void VirtualParticleSet::makeMoves(int jel,
int iat)
{
if (sphere && iat < 0)
APP_ABORT("VirtualParticleSet::makeMoves is invoked incorrectly, the flag sphere=true requires iat specified!");
throw std::runtime_error(
"VirtualParticleSet::makeMoves is invoked incorrectly, the flag sphere=true requires iat specified!");
onSphere = sphere;
refPtcl = jel;
refSourcePtcl = iat;
Expand All @@ -57,13 +114,21 @@ void VirtualParticleSet::makeMoves(int jel,
}

void VirtualParticleSet::mw_makeMoves(const RefVectorWithLeader<VirtualParticleSet>& vp_list,
const RefVector<const std::vector<PosType>>& deltaV_list,
const RefVector<const NLPPJob<RealType>>& joblist,
bool sphere)
const RefVector<const std::vector<PosType>>& deltaV_list,
const RefVector<const NLPPJob<RealType>>& joblist,
bool sphere)
{
RefVectorWithLeader<ParticleSet> p_list(vp_list.getLeader());
auto& vp_leader = vp_list.getLeader();
vp_leader.onSphere = sphere;

const size_t nVPs = countVPs(vp_list);
auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
mw_refPctls.resize(nVPs);

RefVectorWithLeader<ParticleSet> p_list(vp_leader);
p_list.reserve(vp_list.size());

size_t ivp = 0;
for (int iw = 0; iw < vp_list.size(); iw++)
{
VirtualParticleSet& vp(vp_list[iw]);
Expand All @@ -74,12 +139,17 @@ void VirtualParticleSet::mw_makeMoves(const RefVectorWithLeader<VirtualParticleS
vp.refPtcl = job.electron_id;
vp.refSourcePtcl = job.ion_id;
assert(vp.R.size() == deltaV.size());
for (size_t ivp = 0; ivp < vp.R.size(); ivp++)
vp.R[ivp] = job.elec_pos + deltaV[ivp];
for (size_t k = 0; k < vp.R.size(); k++, ivp++)
{
vp.R[k] = job.elec_pos + deltaV[k];
mw_refPctls[ivp] = vp.refPtcl;
}
p_list.push_back(vp);
}
assert(ivp == nVPs);

mw_update(p_list);
mw_refPctls.updateTo();
ParticleSet::mw_update(p_list);
}

} // namespace qmcplusplus
39 changes: 38 additions & 1 deletion src/Particle/VirtualParticleSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
Expand All @@ -19,13 +19,15 @@

#include "Configuration.h"
#include "Particle/ParticleSet.h"
#include "OMPTarget/OMPAlignedAllocator.hpp"

namespace qmcplusplus
{
// forward declaration.
class NonLocalECPComponent;
template<typename T>
struct NLPPJob;
struct VPMultiWalkerMem;

/** Introduced to handle virtual moves and ratio computations, e.g. for non-local PP evaluations.
*/
Expand All @@ -34,6 +36,10 @@ class VirtualParticleSet : public ParticleSet
private:
/// true, if virtual particles are on a sphere for NLPP
bool onSphere;
/// multi walker resource
std::unique_ptr<VPMultiWalkerMem> mw_mem_;

Vector<int, OffloadPinnedAllocator<int>>& getMultiWalkerRefPctls();

public:
/// Reference particle
Expand All @@ -46,12 +52,27 @@ class VirtualParticleSet : public ParticleSet

inline bool isOnSphere() const { return onSphere; }

const Vector<int, OffloadPinnedAllocator<int>>& getMultiWalkerRefPctls() const;

/** constructor
* @param p ParticleSet whose virtual moves are handled by this object
* @param nptcl number of virtual particles
*/
VirtualParticleSet(const ParticleSet& p, int nptcl);

~VirtualParticleSet();

/// initialize a shared resource and hand it to a collection
void createResource(ResourceCollection& collection) const;
/** acquire external resource and assocaite it with the list of ParticleSet
* Note: use RAII ResourceCollectionTeamLock whenever possible
*/
static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<VirtualParticleSet>& vp_list);
/** release external resource
* Note: use RAII ResourceCollectionTeamLock whenever possible
*/
static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<VirtualParticleSet>& vp_list);

/** move virtual particles to new postions and update distance tables
* @param jel reference particle that all the VP moves from
* @param ref_pos reference particle position
Expand Down Expand Up @@ -79,6 +100,22 @@ class VirtualParticleSet : public ParticleSet
ref_list.push_back(vp);
return ref_list;
}

static size_t countVPs(const RefVectorWithLeader<const VirtualParticleSet>& vp_list)
{
size_t nVPs = 0;
for (const VirtualParticleSet& vp : vp_list)
nVPs += vp.getTotalNum();
return nVPs;
}

static size_t countVPs(const RefVectorWithLeader<VirtualParticleSet>& vp_list)
{
size_t nVPs = 0;
for (const VirtualParticleSet& vp : vp_list)
nVPs += vp.getTotalNum();
return nVPs;
}
};
} // namespace qmcplusplus
#endif
28 changes: 28 additions & 0 deletions src/Platforms/OMPTarget/OMPAlignedAllocator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//
// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
// -*- C++ -*-
/** @file
*/
#ifndef QMCPLUSPLUS_OMPTARGET_ALIGNED_ALLOCATOR_H
#define QMCPLUSPLUS_OMPTARGET_ALIGNED_ALLOCATOR_H


#include "OMPallocator.hpp"
#include "PinnedAllocator.h"

namespace qmcplusplus
{
template<typename DT>
using OffloadAllocator = OMPallocator<DT, aligned_allocator<DT>>;
template<typename DT>
using OffloadPinnedAllocator = OMPallocator<DT, PinnedAlignedAllocator<DT>>;
} // namespace qmcplusplus
#endif
Loading

0 comments on commit 9012718

Please sign in to comment.