From 8b74672d3ad2d5957892953a8d09dcec515f6d4d Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Wed, 3 Aug 2022 06:24:34 +0000 Subject: [PATCH] Workaround for Bug in Recent NVCC Versions (at least 11.6, 11.7) (#1726) Similar to #1710, but in GCL. Bug was triggered in GTBench. --- .../high_level/descriptor_generic_manual.hpp | 64 ++++++++++--------- .../gcl/high_level/empty_field_base.hpp | 24 +++---- 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/include/gridtools/gcl/high_level/descriptor_generic_manual.hpp b/include/gridtools/gcl/high_level/descriptor_generic_manual.hpp index f6d1c5d521..a9b9ed92ae 100644 --- a/include/gridtools/gcl/high_level/descriptor_generic_manual.hpp +++ b/include/gridtools/gcl/high_level/descriptor_generic_manual.hpp @@ -64,11 +64,11 @@ namespace gridtools { namespace gcl { template class hndlr_generic : public descriptor_base { - static const int DIMS = 3; - array send_buffer; // One entry will not be used... - array recv_buffer; - array send_buffer_size; // One entry will not be used... - array recv_buffer_size; + using DIMS_t = std::integral_constant; + array send_buffer; // One entry will not be used... + array recv_buffer; + array send_buffer_size; // One entry will not be used... + array recv_buffer_size; public: typedef descriptor_base base_type; @@ -85,7 +85,7 @@ namespace gridtools { /** Type of the translation used to map dimensions to buffer addresses */ - typedef translate_t translate; + typedef translate_t translate; hndlr_generic(grid_type const &g) : base_type(g), send_buffer{nullptr}, recv_buffer{nullptr}, send_buffer_size{0}, recv_buffer_size{0} {} @@ -119,7 +119,7 @@ namespace gridtools { int typesize = sizeof(DataType)) { typedef typename field_on_the_fly::inner_layoutmap t_layoutmap; - array eta; + array eta; for (int i = -1; i <= 1; ++i) { for (int j = -1; j <= 1; ++j) { for (int k = -1; k <= 1; ++k) { @@ -174,7 +174,7 @@ namespace gridtools { halos. */ template - void setup(array const &buffer_size_list) { + void setup(array const &buffer_size_list) { for (int i = -1; i <= 1; ++i) { for (int j = -1; j <= 1; ++j) { for (int k = -1; k <= 1; ++k) { @@ -210,24 +210,24 @@ namespace gridtools { } template - void pack(const FIELDS &... _fields) const { + void pack(const FIELDS &..._fields) const { for (int ii = -1; ii <= 1; ++ii) { for (int jj = -1; jj <= 1; ++jj) { for (int kk = -1; kk <= 1; ++kk) { char *it = reinterpret_cast(&(send_buffer[translate()(ii, jj, kk)][0])); - pack_dims()(*this, ii, jj, kk, it, _fields...); + pack_dims()(*this, ii, jj, kk, it, _fields...); } } } } template - void unpack(const FIELDS &... _fields) const { + void unpack(const FIELDS &..._fields) const { for (int ii = -1; ii <= 1; ++ii) { for (int jj = -1; jj <= 1; ++jj) { for (int kk = -1; kk <= 1; ++kk) { char *it = reinterpret_cast(&(recv_buffer[translate()(ii, jj, kk)][0])); - unpack_dims()(*this, ii, jj, kk, it, _fields...); + unpack_dims()(*this, ii, jj, kk, it, _fields...); } } } @@ -247,7 +247,7 @@ namespace gridtools { typename field_on_the_fly::value_type *it = reinterpret_cast::value_type *>( &(send_buffer[translate()(ii, jj, kk)][0])); - pack_vector_dims()(*this, ii, jj, kk, it, fields); + pack_vector_dims()(*this, ii, jj, kk, it, fields); } } } @@ -267,7 +267,7 @@ namespace gridtools { typename field_on_the_fly::value_type *it = reinterpret_cast::value_type *>( &(recv_buffer[translate()(ii, jj, kk)][0])); - unpack_vector_dims()(*this, ii, jj, kk, it, fields); + unpack_vector_dims()(*this, ii, jj, kk, it, fields); } } } @@ -285,7 +285,7 @@ namespace gridtools { template void operator()( - const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &... _fields) + const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &..._fields) const { using proc_layout = layout_transform; const int ii_P = nth(ii, jj, kk); @@ -309,7 +309,7 @@ namespace gridtools { template void operator()( - const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &... _fields) + const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &..._fields) const { using proc_layout = layout_transform; const int ii_P = nth(ii, jj, kk); @@ -370,18 +370,18 @@ namespace gridtools { class hndlr_generic : public descriptor_base { typedef gpu arch_type; - static const int DIMS = 3; - array send_buffer; // One entry will not be used... - array recv_buffer; - array send_buffer_size; // One entry will not be used... - array recv_buffer_size; + using DIMS_t = std::integral_constant; + array send_buffer; // One entry will not be used... + array recv_buffer; + array send_buffer_size; // One entry will not be used... + array recv_buffer_size; char **d_send_buffer; char **d_recv_buffer; int *prefix_send_size; int *prefix_recv_size; - array send_size; - array recv_size; + array send_size; + array recv_size; int *d_send_size; int *d_recv_size; @@ -401,7 +401,7 @@ namespace gridtools { /** Type of the translation used to map dimensions to buffer addresses */ - typedef translate_t translate; + typedef translate_t translate; hndlr_generic(grid_type const &g) : base_type(g), send_buffer{nullptr}, recv_buffer{nullptr}, send_buffer_size{0}, recv_buffer_size{0} {} @@ -502,15 +502,19 @@ namespace gridtools { } } - GT_CUDA_CHECK(cudaMalloc(&d_send_buffer, static_pow3(DIMS) * sizeof(DataType *))); + GT_CUDA_CHECK(cudaMalloc(&d_send_buffer, static_pow3(DIMS_t::value) * sizeof(DataType *))); - GT_CUDA_CHECK(cudaMemcpy( - d_send_buffer, &send_buffer[0], static_pow3(DIMS) * sizeof(DataType *), cudaMemcpyHostToDevice)); + GT_CUDA_CHECK(cudaMemcpy(d_send_buffer, + &send_buffer[0], + static_pow3(DIMS_t::value) * sizeof(DataType *), + cudaMemcpyHostToDevice)); - GT_CUDA_CHECK(cudaMalloc(&d_recv_buffer, static_pow3(DIMS) * sizeof(DataType *))); + GT_CUDA_CHECK(cudaMalloc(&d_recv_buffer, static_pow3(DIMS_t::value) * sizeof(DataType *))); - GT_CUDA_CHECK(cudaMemcpy( - d_recv_buffer, &recv_buffer[0], static_pow3(DIMS) * sizeof(DataType *), cudaMemcpyHostToDevice)); + GT_CUDA_CHECK(cudaMemcpy(d_recv_buffer, + &recv_buffer[0], + static_pow3(DIMS_t::value) * sizeof(DataType *), + cudaMemcpyHostToDevice)); } /** diff --git a/include/gridtools/gcl/high_level/empty_field_base.hpp b/include/gridtools/gcl/high_level/empty_field_base.hpp index 4cb6fea8ec..b749dc25d2 100644 --- a/include/gridtools/gcl/high_level/empty_field_base.hpp +++ b/include/gridtools/gcl/high_level/empty_field_base.hpp @@ -114,13 +114,13 @@ namespace gridtools { template class empty_field_base { - static constexpr int DIMS = 3; + using DIMS_t = std::integral_constant; - typedef array HALO_t; + typedef array HALO_t; public: - array halos; - typedef array, static_pow3(DIMS)> MPDT_t; + array halos; + typedef array, static_pow3(DIMS_t::value)> MPDT_t; MPDT_t MPDT_OUTSIDE; MPDT_t MPDT_INSIDE; @@ -142,8 +142,8 @@ namespace gridtools { void add_halo(int D, halo_descriptor const &halo) { halos[D] = halo; } void setup() { - array tuple; - _impl::neigh_loop()( + array tuple; + _impl::neigh_loop()( [&](auto const &tuple) { int idx = _impl::neigh_idx(tuple); MPDT_OUTSIDE[idx] = _impl::make_datatype_outin::outside(halos, tuple); @@ -152,11 +152,11 @@ namespace gridtools { tuple); } - std::pair mpdt_inside(array const &eta) const { + std::pair mpdt_inside(array const &eta) const { return MPDT_INSIDE[_impl::neigh_idx(eta)]; } - std::pair mpdt_outside(array const &eta) const { + std::pair mpdt_outside(array const &eta) const { return MPDT_OUTSIDE[_impl::neigh_idx(eta)]; } @@ -167,9 +167,9 @@ namespace gridtools { \param[in] eta the eta parameter as indicated in \link MULTI_DIM_ACCESS \endlink */ - int send_buffer_size(array const &eta) const { + int send_buffer_size(array const &eta) const { int S = 1; - for (int i = 0; i < DIMS; ++i) { + for (int i = 0; i < DIMS_t::value; ++i) { S *= halos[i].s_length(eta[i]); } return S; @@ -182,9 +182,9 @@ namespace gridtools { \param[in] eta the eta parameter as indicated in \link MULTI_DIM_ACCESS \endlink */ - int recv_buffer_size(array const &eta) const { + int recv_buffer_size(array const &eta) const { int S = 1; - for (int i = 0; i < DIMS; ++i) { + for (int i = 0; i < DIMS_t::value; ++i) { S *= halos[i].r_length(eta[i]); } return S;