Skip to content

Commit

Permalink
Workaround for Bug in Recent NVCC Versions (at least 11.6, 11.7) (Gri…
Browse files Browse the repository at this point in the history
…dTools#1726)

Similar to GridTools#1710, but in GCL. Bug was triggered in GTBench.
  • Loading branch information
fthaler authored and havogt committed Aug 3, 2022
1 parent d5b3ac1 commit 8b74672
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 42 deletions.
64 changes: 34 additions & 30 deletions include/gridtools/gcl/high_level/descriptor_generic_manual.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ namespace gridtools {
namespace gcl {
template <typename HaloExch, typename proc_layout_abs>
class hndlr_generic<HaloExch, proc_layout_abs, cpu> : public descriptor_base<HaloExch> {
static const int DIMS = 3;
array<char *, static_pow3(DIMS)> send_buffer; // One entry will not be used...
array<char *, static_pow3(DIMS)> recv_buffer;
array<int, static_pow3(DIMS)> send_buffer_size; // One entry will not be used...
array<int, static_pow3(DIMS)> recv_buffer_size;
using DIMS_t = std::integral_constant<int, 3>;
array<char *, static_pow3(DIMS_t::value)> send_buffer; // One entry will not be used...
array<char *, static_pow3(DIMS_t::value)> recv_buffer;
array<int, static_pow3(DIMS_t::value)> send_buffer_size; // One entry will not be used...
array<int, static_pow3(DIMS_t::value)> recv_buffer_size;

public:
typedef descriptor_base<HaloExch> base_type;
Expand All @@ -85,7 +85,7 @@ namespace gridtools {
/**
Type of the translation used to map dimensions to buffer addresses
*/
typedef translate_t<DIMS> translate;
typedef translate_t<DIMS_t::value> translate;

hndlr_generic(grid_type const &g)
: base_type(g), send_buffer{nullptr}, recv_buffer{nullptr}, send_buffer_size{0}, recv_buffer_size{0} {}
Expand Down Expand Up @@ -119,7 +119,7 @@ namespace gridtools {
int typesize = sizeof(DataType)) {

typedef typename field_on_the_fly<DataType, f_layoutmap, traits>::inner_layoutmap t_layoutmap;
array<int, DIMS> eta;
array<int, DIMS_t::value> eta;
for (int i = -1; i <= 1; ++i) {
for (int j = -1; j <= 1; ++j) {
for (int k = -1; k <= 1; ++k) {
Expand Down Expand Up @@ -174,7 +174,7 @@ namespace gridtools {
halos.
*/
template <typename DataType, typename t_layoutmap>
void setup(array<size_t, static_pow3(DIMS)> const &buffer_size_list) {
void setup(array<size_t, static_pow3(DIMS_t::value)> const &buffer_size_list) {
for (int i = -1; i <= 1; ++i) {
for (int j = -1; j <= 1; ++j) {
for (int k = -1; k <= 1; ++k) {
Expand Down Expand Up @@ -210,24 +210,24 @@ namespace gridtools {
}

template <typename... FIELDS>
void pack(const FIELDS &... _fields) const {
void pack(const FIELDS &..._fields) const {
for (int ii = -1; ii <= 1; ++ii) {
for (int jj = -1; jj <= 1; ++jj) {
for (int kk = -1; kk <= 1; ++kk) {
char *it = reinterpret_cast<char *>(&(send_buffer[translate()(ii, jj, kk)][0]));
pack_dims<DIMS, 0>()(*this, ii, jj, kk, it, _fields...);
pack_dims<DIMS_t::value, 0>()(*this, ii, jj, kk, it, _fields...);
}
}
}
}

template <typename... FIELDS>
void unpack(const FIELDS &... _fields) const {
void unpack(const FIELDS &..._fields) const {
for (int ii = -1; ii <= 1; ++ii) {
for (int jj = -1; jj <= 1; ++jj) {
for (int kk = -1; kk <= 1; ++kk) {
char *it = reinterpret_cast<char *>(&(recv_buffer[translate()(ii, jj, kk)][0]));
unpack_dims<DIMS, 0>()(*this, ii, jj, kk, it, _fields...);
unpack_dims<DIMS_t::value, 0>()(*this, ii, jj, kk, it, _fields...);
}
}
}
Expand All @@ -247,7 +247,7 @@ namespace gridtools {
typename field_on_the_fly<T1, T2, T3>::value_type *it =
reinterpret_cast<typename field_on_the_fly<T1, T2, T3>::value_type *>(
&(send_buffer[translate()(ii, jj, kk)][0]));
pack_vector_dims<DIMS, 0>()(*this, ii, jj, kk, it, fields);
pack_vector_dims<DIMS_t::value, 0>()(*this, ii, jj, kk, it, fields);
}
}
}
Expand All @@ -267,7 +267,7 @@ namespace gridtools {
typename field_on_the_fly<T1, T2, T3>::value_type *it =
reinterpret_cast<typename field_on_the_fly<T1, T2, T3>::value_type *>(
&(recv_buffer[translate()(ii, jj, kk)][0]));
unpack_vector_dims<DIMS, 0>()(*this, ii, jj, kk, it, fields);
unpack_vector_dims<DIMS_t::value, 0>()(*this, ii, jj, kk, it, fields);
}
}
}
Expand All @@ -285,7 +285,7 @@ namespace gridtools {

template <typename T, typename iterator, typename FIRST, typename... FIELDS>
void operator()(
const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &... _fields)
const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &..._fields)
const {
using proc_layout = layout_transform<typename FIRST::inner_layoutmap, proc_layout_abs>;
const int ii_P = nth<proc_layout, 0>(ii, jj, kk);
Expand All @@ -309,7 +309,7 @@ namespace gridtools {

template <typename T, typename iterator, typename FIRST, typename... FIELDS>
void operator()(
const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &... _fields)
const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &..._fields)
const {
using proc_layout = layout_transform<typename FIRST::inner_layoutmap, proc_layout_abs>;
const int ii_P = nth<proc_layout, 0>(ii, jj, kk);
Expand Down Expand Up @@ -370,18 +370,18 @@ namespace gridtools {
class hndlr_generic<HaloExch, proc_layout_abs, gpu> : public descriptor_base<HaloExch> {
typedef gpu arch_type;

static const int DIMS = 3;
array<char *, static_pow3(DIMS)> send_buffer; // One entry will not be used...
array<char *, static_pow3(DIMS)> recv_buffer;
array<int, static_pow3(DIMS)> send_buffer_size; // One entry will not be used...
array<int, static_pow3(DIMS)> recv_buffer_size;
using DIMS_t = std::integral_constant<int, 3>;
array<char *, static_pow3(DIMS_t::value)> send_buffer; // One entry will not be used...
array<char *, static_pow3(DIMS_t::value)> recv_buffer;
array<int, static_pow3(DIMS_t::value)> send_buffer_size; // One entry will not be used...
array<int, static_pow3(DIMS_t::value)> recv_buffer_size;
char **d_send_buffer;
char **d_recv_buffer;

int *prefix_send_size;
int *prefix_recv_size;
array<int, static_pow3(DIMS)> send_size;
array<int, static_pow3(DIMS)> recv_size;
array<int, static_pow3(DIMS_t::value)> send_size;
array<int, static_pow3(DIMS_t::value)> recv_size;

int *d_send_size;
int *d_recv_size;
Expand All @@ -401,7 +401,7 @@ namespace gridtools {
/**
Type of the translation used to map dimensions to buffer addresses
*/
typedef translate_t<DIMS> translate;
typedef translate_t<DIMS_t::value> translate;

hndlr_generic(grid_type const &g)
: base_type(g), send_buffer{nullptr}, recv_buffer{nullptr}, send_buffer_size{0}, recv_buffer_size{0} {}
Expand Down Expand Up @@ -502,15 +502,19 @@ namespace gridtools {
}
}

GT_CUDA_CHECK(cudaMalloc(&d_send_buffer, static_pow3(DIMS) * sizeof(DataType *)));
GT_CUDA_CHECK(cudaMalloc(&d_send_buffer, static_pow3(DIMS_t::value) * sizeof(DataType *)));

GT_CUDA_CHECK(cudaMemcpy(
d_send_buffer, &send_buffer[0], static_pow3(DIMS) * sizeof(DataType *), cudaMemcpyHostToDevice));
GT_CUDA_CHECK(cudaMemcpy(d_send_buffer,
&send_buffer[0],
static_pow3(DIMS_t::value) * sizeof(DataType *),
cudaMemcpyHostToDevice));

GT_CUDA_CHECK(cudaMalloc(&d_recv_buffer, static_pow3(DIMS) * sizeof(DataType *)));
GT_CUDA_CHECK(cudaMalloc(&d_recv_buffer, static_pow3(DIMS_t::value) * sizeof(DataType *)));

GT_CUDA_CHECK(cudaMemcpy(
d_recv_buffer, &recv_buffer[0], static_pow3(DIMS) * sizeof(DataType *), cudaMemcpyHostToDevice));
GT_CUDA_CHECK(cudaMemcpy(d_recv_buffer,
&recv_buffer[0],
static_pow3(DIMS_t::value) * sizeof(DataType *),
cudaMemcpyHostToDevice));
}

/**
Expand Down
24 changes: 12 additions & 12 deletions include/gridtools/gcl/high_level/empty_field_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,13 @@ namespace gridtools {

template <typename DataType>
class empty_field_base {
static constexpr int DIMS = 3;
using DIMS_t = std::integral_constant<int, 3>;

typedef array<halo_descriptor, DIMS> HALO_t;
typedef array<halo_descriptor, DIMS_t::value> HALO_t;

public:
array<halo_descriptor, DIMS> halos;
typedef array<std::pair<MPI_Datatype, bool>, static_pow3(DIMS)> MPDT_t;
array<halo_descriptor, DIMS_t::value> halos;
typedef array<std::pair<MPI_Datatype, bool>, static_pow3(DIMS_t::value)> MPDT_t;
MPDT_t MPDT_OUTSIDE;
MPDT_t MPDT_INSIDE;

Expand All @@ -142,8 +142,8 @@ namespace gridtools {
void add_halo(int D, halo_descriptor const &halo) { halos[D] = halo; }

void setup() {
array<int, DIMS> tuple;
_impl::neigh_loop<DIMS>()(
array<int, DIMS_t::value> tuple;
_impl::neigh_loop<DIMS_t::value>()(
[&](auto const &tuple) {
int idx = _impl::neigh_idx(tuple);
MPDT_OUTSIDE[idx] = _impl::make_datatype_outin<DataType>::outside(halos, tuple);
Expand All @@ -152,11 +152,11 @@ namespace gridtools {
tuple);
}

std::pair<MPI_Datatype, bool> mpdt_inside(array<int, DIMS> const &eta) const {
std::pair<MPI_Datatype, bool> mpdt_inside(array<int, DIMS_t::value> const &eta) const {
return MPDT_INSIDE[_impl::neigh_idx(eta)];
}

std::pair<MPI_Datatype, bool> mpdt_outside(array<int, DIMS> const &eta) const {
std::pair<MPI_Datatype, bool> mpdt_outside(array<int, DIMS_t::value> const &eta) const {
return MPDT_OUTSIDE[_impl::neigh_idx(eta)];
}

Expand All @@ -167,9 +167,9 @@ namespace gridtools {
\param[in] eta the eta parameter as indicated in \link MULTI_DIM_ACCESS \endlink
*/
int send_buffer_size(array<int, DIMS> const &eta) const {
int send_buffer_size(array<int, DIMS_t::value> const &eta) const {
int S = 1;
for (int i = 0; i < DIMS; ++i) {
for (int i = 0; i < DIMS_t::value; ++i) {
S *= halos[i].s_length(eta[i]);
}
return S;
Expand All @@ -182,9 +182,9 @@ namespace gridtools {
\param[in] eta the eta parameter as indicated in \link MULTI_DIM_ACCESS \endlink
*/
int recv_buffer_size(array<int, DIMS> const &eta) const {
int recv_buffer_size(array<int, DIMS_t::value> const &eta) const {
int S = 1;
for (int i = 0; i < DIMS; ++i) {
for (int i = 0; i < DIMS_t::value; ++i) {
S *= halos[i].r_length(eta[i]);
}
return S;
Expand Down

0 comments on commit 8b74672

Please sign in to comment.