Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hlpr vctr containers #447

Merged
merged 4 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/detail/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ namespace libcloudphxx
eps_tolerance(sizeof(real_t) * 8 / 4)
{}
};

// just some constant, not related to config but had to put them somewhere
enum { invalid = -1, no_initial_value = -44 };
};
};
};
81 changes: 55 additions & 26 deletions src/impl/particles_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ namespace libcloudphxx
{
namespace lgrngn
{
namespace detail
{
enum { invalid = -1 };

};

// pimpl stuff
template <typename real_t, backend_t device>
struct particles_t<real_t, device>::impl
Expand Down Expand Up @@ -271,11 +265,20 @@ namespace libcloudphxx
// ids of sds to be copied with distmem
thrust_device::vector<thrust_size_t> &lft_id, &rgt_id;

// real_t vectors copied in distributed memory case
std::set<thrust_device::vector<real_t>*> distmem_real_vctrs;
// --- containters with vector pointers to help resize and copy vectors ---

// vectors copied between distributed memories (MPI, multi_CUDA), these are SD attributes
std::set<std::pair<thrust_device::vector<real_t>*, real_t>> distmem_real_vctrs; // pair of vector and its initial value
std::set<thrust_device::vector<n_t>*> distmem_n_vctrs;
// std::set<thrust_device::vector<thrust_size_t>*> distmem_size_vctrs; // no size vectors copied?
//
// vetors that are not in distmem_real_vctrs that need to be resized when the number of SDs changes, these are helper variables
std::set<thrust_device::vector<real_t>*> resize_real_vctrs;
// std::set<thrust_device::vector<n_t>*> resize_n_vctrs;
std::set<thrust_device::vector<thrust_size_t>*> resize_size_vctrs;


// methods
// --- methods ---

// fills u01 with n random real numbers uniformly distributed in range [0,1)
void rand_u01(thrust_size_t n) { rng.generate_n(u01, n); }
Expand Down Expand Up @@ -386,40 +389,66 @@ namespace libcloudphxx
}

// initializing distmem_real_vctrs - list of real_t vectors with properties of SDs that have to be copied/removed/recycled when a SD is copied/removed/recycled
// TODO: add to that list vectors of other types (e.g integer pimpl->n)
// NOTE: this does not include chemical stuff due to the way chem vctrs are organized! multi_CUDA / MPI does not work with chemistry as of now
typedef thrust_device::vector<real_t>* ptr_t;
ptr_t arr[] = {&rd3, &rw2, &kpa, &vt};
distmem_real_vctrs = std::set<ptr_t>(arr, arr + sizeof(arr) / sizeof(ptr_t) );
distmem_real_vctrs.insert({&rd3, detail::no_initial_value});
distmem_real_vctrs.insert({&rw2, detail::no_initial_value});
distmem_real_vctrs.insert({&kpa, detail::no_initial_value});

if (opts_init.nx != 0) distmem_real_vctrs.insert(&x);
if (opts_init.ny != 0) distmem_real_vctrs.insert(&y);
if (opts_init.nz != 0) distmem_real_vctrs.insert(&z);
distmem_real_vctrs.insert({&vt, detail::invalid});

if (opts_init.nx != 0) distmem_real_vctrs.insert({&x, detail::no_initial_value});
if (opts_init.ny != 0) distmem_real_vctrs.insert({&y, detail::no_initial_value});
if (opts_init.nz != 0) distmem_real_vctrs.insert({&z, detail::no_initial_value});

if(allow_sstp_cond && opts_init.exact_sstp_cond)
{
distmem_real_vctrs.insert(&sstp_tmp_rv);
distmem_real_vctrs.insert(&sstp_tmp_th);
distmem_real_vctrs.insert(&sstp_tmp_rh);
distmem_real_vctrs.insert({&sstp_tmp_rv, detail::no_initial_value});
distmem_real_vctrs.insert({&sstp_tmp_th, detail::no_initial_value});
distmem_real_vctrs.insert({&sstp_tmp_rh, detail::no_initial_value});
// sstp_tmp_p needs to be added if a constant pressure profile is used, but this is only known after init - see particles_init
}

if(opts_init.turb_adve_switch)
{
if(opts_init.nx != 0) distmem_real_vctrs.insert(&up);
if(opts_init.ny != 0) distmem_real_vctrs.insert(&vp);
if(opts_init.nz != 0) distmem_real_vctrs.insert(&wp);
if(opts_init.nx != 0) distmem_real_vctrs.insert({&up, 0});
if(opts_init.ny != 0) distmem_real_vctrs.insert({&vp, 0});
if(opts_init.nz != 0) distmem_real_vctrs.insert({&wp, 0});
}

if(opts_init.turb_cond_switch)
{
distmem_real_vctrs.insert(&wp);
distmem_real_vctrs.insert(&ssp);
distmem_real_vctrs.insert(&dot_ssp);
distmem_real_vctrs.insert({&wp, 0});
distmem_real_vctrs.insert({&ssp, 0});
distmem_real_vctrs.insert({&dot_ssp, 0});
}

if(opts_init.diag_incloud_time)
distmem_real_vctrs.insert(&incloud_time);
distmem_real_vctrs.insert({&incloud_time, detail::no_initial_value});

// initializing distmem_n_vctrs - list of n_t vectors with properties of SDs that have to be copied/removed/recycled when a SD is copied/removed/recycled
distmem_n_vctrs.insert(&n);

// real vctrs that need to be resized but do need to be copied in distmem
resize_real_vctrs.insert(&tmp_device_real_part);
if(opts_init.chem_switch || allow_sstp_cond || n_dims >= 2)
resize_real_vctrs.insert(&tmp_device_real_part1);
if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch)
resize_real_vctrs.insert(&tmp_device_real_part2);
if(allow_sstp_cond && opts_init.exact_sstp_cond)
{
resize_real_vctrs.insert(&tmp_device_real_part3);
resize_real_vctrs.insert(&tmp_device_real_part4);
if(const_p)
resize_real_vctrs.insert(&tmp_device_real_part5);
}

resize_size_vctrs.insert(&ijk);
resize_size_vctrs.insert(&sorted_ijk);
resize_size_vctrs.insert(&sorted_id);
resize_size_vctrs.insert(&tmp_device_size_part);
if (opts_init.nx != 0) resize_size_vctrs.insert(&i);
if (opts_init.ny != 0) resize_size_vctrs.insert(&j);
if (opts_init.nz != 0) resize_size_vctrs.insert(&k);
}

void sanity_checks();
Expand Down
2 changes: 1 addition & 1 deletion src/impl/particles_impl_bcnd.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ namespace libcloudphxx
arg::_1 >= opts_init.x1
) - rgt_id.begin();

const int no_of_n_vctrs_copied(int(1));
const int no_of_n_vctrs_copied(distmem_n_vctrs.size());
const int no_of_real_vctrs_copied(distmem_real_vctrs.size());

if(lft_count*no_of_n_vctrs_copied > in_n_bfr.size() || rgt_count*no_of_n_vctrs_copied > in_n_bfr.size())
Expand Down
4 changes: 2 additions & 2 deletions src/impl/particles_impl_hskpng_remove.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ namespace libcloudphxx
for(auto vec: distmem_real_vctrs)
{
thrust::remove_if(
vec->begin(),
vec->begin() + n_part,
vec.first->begin(),
vec.first->begin() + n_part,
n.begin(),
arg::_1 == 0
);
Expand Down
73 changes: 13 additions & 60 deletions src/impl/particles_impl_hskpng_resize.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -7,73 +7,26 @@ namespace libcloudphxx
void particles_t<real_t, device>::impl::hskpng_resize_npart()
{
if(n_part > opts_init.n_sd_max) throw std::runtime_error(detail::formatter() << "n_sd_max (" << opts_init.n_sd_max << ") < n_part (" << n_part << ")");
{
thrust_device::vector<real_t> *vec[] = {&rw2, &rd3, &kpa, &tmp_device_real_part};
for(int i=0; i<4; ++i)
{
vec[i]->resize(n_part);
}
}
{
thrust_device::vector<thrust_size_t> *vec[] = {&ijk, &sorted_id, &sorted_ijk};
for(int i=0; i<3; ++i)
{
vec[i]->resize(n_part);
}
}
n.resize(n_part);
tmp_device_n_part.resize(n_part);
tmp_device_size_part.resize(n_part);

vt.resize(n_part, detail::invalid);
for(auto &vec: distmem_n_vctrs)
vec->resize(n_part);

if (opts_init.nx != 0) i.resize(n_part);
if (opts_init.ny != 0) j.resize(n_part);
if (opts_init.nz != 0) k.resize(n_part);

if (opts_init.nx != 0) x.resize(n_part);
if (opts_init.ny != 0) y.resize(n_part);
if (opts_init.nz != 0) z.resize(n_part);

if(opts_init.turb_adve_switch)
for(auto &pair: distmem_real_vctrs)
{
if (opts_init.nx != 0) up.resize(n_part, 0);
if (opts_init.ny != 0) vp.resize(n_part, 0);
if (opts_init.nz != 0) wp.resize(n_part, 0);
if(pair.second == detail::no_initial_value)
pair.first->resize(n_part);
else
pair.first->resize(n_part, pair.second);
}

if(opts_init.turb_cond_switch)
{
wp.resize(n_part, 0);
ssp.resize(n_part, 0);
dot_ssp.resize(n_part, 0);
}
for(auto &vec: resize_real_vctrs)
vec->resize(n_part);

if(opts_init.chem_switch || allow_sstp_cond || n_dims >= 2)
{
tmp_device_real_part1.resize(n_part);
}
if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch)
{
tmp_device_real_part2.resize(n_part);
}
for(auto &vec: resize_size_vctrs)
vec->resize(n_part);

if(allow_sstp_cond && opts_init.exact_sstp_cond)
{
tmp_device_real_part3.resize(n_part);
tmp_device_real_part4.resize(n_part);
sstp_tmp_rv.resize(n_part);
sstp_tmp_th.resize(n_part);
sstp_tmp_rh.resize(n_part);
if(const_p)
{
tmp_device_real_part5.resize(n_part);
sstp_tmp_p.resize(n_part);
}
}

if(opts_init.diag_incloud_time)
incloud_time.resize(n_part);
// its unsigned int vector, probably only one we will use, hence no resize_t_vctrs helper used
tmp_device_n_part.resize(n_part);
}
};
};
Expand Down
4 changes: 2 additions & 2 deletions src/impl/particles_impl_mpi_exchange.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ namespace libcloudphxx
// start async copy of n buffer to the left
MPI_CHECK(MPI_Isend(
out_n_bfr.data().get(), // raw pointer to the buffer
lft_count, // no of values to send
lft_count * distmem_n_vctrs.size(), // no of values to send
detail::get_mpi_type<n_t>(), // type
lft_rank, // dest comm
detail::tag_n_lft, // message tag
Expand Down Expand Up @@ -218,7 +218,7 @@ namespace libcloudphxx
// start async copy of n buffer to the right
MPI_CHECK(MPI_Isend(
out_n_bfr.data().get(), // raw pointer to the buffer
rgt_count, // no of values to send
rgt_count * distmem_n_vctrs.size(), // no of values to send
detail::get_mpi_type<n_t>(), // type
rgt_rank, // dest comm
detail::tag_n_rgt, // message tag
Expand Down
50 changes: 30 additions & 20 deletions src/impl/particles_impl_pack.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,37 @@ namespace libcloudphxx
template <typename real_t, backend_t device>
void particles_t<real_t, device>::impl::pack_n_lft()
{
assert(out_n_bfr.size() >= lft_count);
assert(in_n_bfr.size() >= lft_count);
thrust::copy(
thrust::make_permutation_iterator(n.begin(), lft_id.begin()),
thrust::make_permutation_iterator(n.begin(), lft_id.begin()) + lft_count,
out_n_bfr.begin()
);
assert(out_n_bfr.size() >= lft_count * distmem_n_vctrs.size());
assert(in_n_bfr.size() >= lft_count * distmem_n_vctrs.size());

auto it = distmem_n_vctrs.begin();
while (it != distmem_n_vctrs.end())
{
thrust::copy(
thrust::make_permutation_iterator((*it)->begin(), lft_id.begin()),
thrust::make_permutation_iterator((*it)->begin(), lft_id.begin()) + lft_count,
out_n_bfr.begin() + std::distance(distmem_n_vctrs.begin(), it) * lft_count
);
it++;
}
}

template <typename real_t, backend_t device>
void particles_t<real_t, device>::impl::pack_n_rgt()
{
assert(out_n_bfr.size() >= rgt_count);
assert(in_n_bfr.size() >= rgt_count);
thrust::copy(
thrust::make_permutation_iterator(n.begin(), rgt_id.begin()),
thrust::make_permutation_iterator(n.begin(), rgt_id.begin()) + rgt_count,
out_n_bfr.begin()
);
assert(out_n_bfr.size() >= rgt_count * distmem_n_vctrs.size());
assert(in_n_bfr.size() >= rgt_count * distmem_n_vctrs.size());

auto it = distmem_n_vctrs.begin();
while (it != distmem_n_vctrs.end())
{
thrust::copy(
thrust::make_permutation_iterator((*it)->begin(), rgt_id.begin()),
thrust::make_permutation_iterator((*it)->begin(), rgt_id.begin()) + rgt_count,
out_n_bfr.begin() + std::distance(distmem_n_vctrs.begin(), it) * rgt_count
);
it++;
}
}

template <typename real_t, backend_t device>
Expand All @@ -57,12 +69,11 @@ namespace libcloudphxx
assert(in_real_bfr.size() >= lft_count * distmem_real_vctrs.size());

auto it = distmem_real_vctrs.begin();

while (it != distmem_real_vctrs.end())
{
thrust::copy(
thrust::make_permutation_iterator((*it)->begin(), lft_id.begin()),
thrust::make_permutation_iterator((*it)->begin(), lft_id.begin()) + lft_count,
thrust::make_permutation_iterator((*it).first->begin(), lft_id.begin()),
thrust::make_permutation_iterator((*it).first->begin(), lft_id.begin()) + lft_count,
out_real_bfr.begin() + std::distance(distmem_real_vctrs.begin(), it) * lft_count
);
it++;
Expand All @@ -76,12 +87,11 @@ namespace libcloudphxx
assert(in_real_bfr.size() >= rgt_count * distmem_real_vctrs.size());

auto it = distmem_real_vctrs.begin();

while (it != distmem_real_vctrs.end())
{
thrust::copy(
thrust::make_permutation_iterator((*it)->begin(), rgt_id.begin()),
thrust::make_permutation_iterator((*it)->begin(), rgt_id.begin()) + rgt_count,
thrust::make_permutation_iterator((*it).first->begin(), rgt_id.begin()),
thrust::make_permutation_iterator((*it).first->begin(), rgt_id.begin()) + rgt_count,
out_real_bfr.begin() + std::distance(distmem_real_vctrs.begin(), it) * rgt_count
);
it++;
Expand Down
2 changes: 1 addition & 1 deletion src/impl/particles_impl_rcyc.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ namespace libcloudphxx

// for each property...
for(auto vec: distmem_real_vctrs)
detail::copy_prop<real_t>(vec->begin(), sorted_id, n_flagged);
detail::copy_prop<real_t>(vec.first->begin(), sorted_id, n_flagged);

// ... chemical properties only if chem enabled
if (opts_init.chem_switch){
Expand Down
2 changes: 1 addition & 1 deletion src/impl/particles_impl_reserve_hskpng_npart.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ namespace libcloudphxx
// done using resize, because _bfr.end() is never used and we want to assert that buffer is large enough using the .size() function
if(distmem())
{
const int no_of_n_vctrs_copied(int(1));
const int no_of_n_vctrs_copied(distmem_n_vctrs.size());
const int no_of_real_vctrs_copied(distmem_real_vctrs.size());

in_n_bfr.resize(no_of_n_vctrs_copied * opts_init.n_sd_max / opts_init.nx / config.bfr_fraction); // for n
Expand Down
16 changes: 11 additions & 5 deletions src/impl/particles_impl_unpack.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,15 @@ namespace libcloudphxx
return;

assert(opts_init.n_sd_max >= n_part);
n.resize(n_part);
thrust::copy(in_n_bfr.begin(), in_n_bfr.begin() + n_copied, n.begin() + n_part_old);

auto it = distmem_n_vctrs.begin();
while (it != distmem_n_vctrs.end())
{
(*it)->resize(n_part);
auto distance = std::distance(distmem_n_vctrs.begin(), it);
thrust::copy( in_n_bfr.begin() + distance * n_copied, in_n_bfr.begin() + (distance+1) * n_copied, (*it)->begin() + n_part_old);
it++;
}
}

template <typename real_t, backend_t device>
Expand All @@ -67,12 +74,11 @@ namespace libcloudphxx
return;

auto it = distmem_real_vctrs.begin();

while (it != distmem_real_vctrs.end())
{
(*it)->resize(n_part);
(*it).first->resize(n_part);
auto distance = std::distance(distmem_real_vctrs.begin(), it);
thrust::copy( in_real_bfr.begin() + distance * n_copied, in_real_bfr.begin() + (distance+1) * n_copied, (*it)->begin() + n_part_old);
thrust::copy( in_real_bfr.begin() + distance * n_copied, in_real_bfr.begin() + (distance+1) * n_copied, (*it).first->begin() + n_part_old);
it++;
}

Expand Down
Loading
Loading