Skip to content

Commit

Permalink
Workarounds for CUDA 12.1 and 12.2 (#1764)
Browse files Browse the repository at this point in the history
CUDA 12.1 and 12.2 have a problem with constexpr, e.g. in the context of CTAD, see #1766. The workaround is to do pre-C++17 `make_tuple`-construction or construct from a (possibly moved-from) lvalue.

CI: add GCC + CUDA 12.1/12.2 and NVHPC 23.7

Co-authored-by: Péter Kardos <kardospeter1994@hotmail.com>
  • Loading branch information
havogt and petiaccja authored Aug 16, 2023
1 parent 4fb793a commit 5e1011a
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
container: ghcr.io/gridtools/gridtools-base:${{ matrix.compiler }}
strategy:
matrix:
compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, base-hip, gcc-10-hpx, nvhpc-23.3]
compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, gcc-12-cuda-12.1, gcc-12-cuda-12.2, base-hip, gcc-10-hpx, nvhpc-23.3, nvhpc-23.7]
build_type: [debug, release]
exclude:
- compiler: gcc-8
Expand Down
7 changes: 7 additions & 0 deletions include/gridtools/common/defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,10 @@ namespace gridtools {
#define GT_NVCC_DIAG_PUSH_SUPPRESS(x)
#define GT_NVCC_DIAG_POP_SUPPRESS(x)
#endif

#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR__ <= 2)
// enables workaround for CTAD/constexpr issues in CUDA 12.1, 12.2 (https://github.com/GridTools/gridtools/issues/1766)
#define GT_NVCC_WORKAROUND_1766 1
#else
#define GT_NVCC_WORKAROUND_1766 0
#endif
9 changes: 9 additions & 0 deletions include/gridtools/fn/column_stage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <type_traits>
#include <utility>

#include "../common/defs.hpp"
#include "../common/functional.hpp"
#include "../common/integral_constant.hpp"
#include "../common/tuple.hpp"
Expand Down Expand Up @@ -101,5 +102,13 @@ namespace gridtools::fn {
using column_stage_impl_::column_stage;
using column_stage_impl_::fwd;
using column_stage_impl_::merged_column_stage;

#if GT_NVCC_WORKAROUND_1766
template <class F, class Projector = host_device::identity>
GT_FUNCTION constexpr auto scan_pass(F &&f, Projector &&p = {}) {
return column_stage_impl_::scan_pass(std::forward<F>(f), std::forward<Projector>(p));
}
#else
using column_stage_impl_::scan_pass;
#endif
} // namespace gridtools::fn
6 changes: 6 additions & 0 deletions include/gridtools/storage/builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,14 @@ namespace gridtools {

auto operator()() const { return build(); }
};
#if GT_NVCC_WORKAROUND_1766
// not sure if the same bug as https://github.com/GridTools/gridtools/issues/1766
template <class Traits>
builder_type<Traits, keys<>::values<>> builder = {};
#else
template <class Traits>
constexpr builder_type<Traits, keys<>::values<>> builder = {};
#endif
} // namespace builder_impl_
using builder_impl_::builder;
} // namespace storage
Expand Down
22 changes: 22 additions & 0 deletions tests/include/nvcc_workarounds.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* GridTools
*
* Copyright (c) 2014-2021, ETH Zurich
* All rights reserved.
*
* Please, refer to the LICENSE file in the root directory.
* SPDX-License-Identifier: BSD-3-Clause
*/

#include <gridtools/common/tuple.hpp>

namespace gridtools {
namespace nvcc_workarounds {

// see https://github.com/GridTools/gridtools/issues/1766
template <class T>
constexpr auto make_1_tuple(T &&t) {
return tuple(std::forward<T>(t));
}
} // namespace nvcc_workarounds
} // namespace gridtools
7 changes: 4 additions & 3 deletions tests/regression/fn/fn_cartesian_vertical_advection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <gridtools/stencil/global_parameter.hpp>

#include <fn_select.hpp>
#include <nvcc_workarounds.hpp>
#include <test_environment.hpp>

#include "../vertical_advection_repository.hpp"
Expand All @@ -27,7 +28,7 @@ namespace {

struct u_forward_scan : fwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto /*acc*/,
auto const &utens_stage,
auto const &utens,
Expand Down Expand Up @@ -80,7 +81,7 @@ namespace {
}

static GT_FUNCTION constexpr auto epilogue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto acc,
auto const &utens_stage,
auto const &utens,
Expand All @@ -107,7 +108,7 @@ namespace {

struct u_backward_scan : bwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto /*acc*/, auto const &cd, auto const &u_pos, auto const &dtr_stage) {
auto d = tuple_get(1_c, deref(cd));
return make_tuple(deref(dtr_stage) * (d - deref(u_pos)), d);
Expand Down
3 changes: 2 additions & 1 deletion tests/regression/fn/fn_domain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <gridtools/fn/unstructured.hpp>

#include <fn_select.hpp>
#include <nvcc_workarounds.hpp>
#include <test_environment.hpp>

namespace {
Expand Down Expand Up @@ -51,7 +52,7 @@ namespace {

struct empty_column : fwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass([](auto acc) { return acc; }, host_device::identity()));
return nvcc_workarounds::make_1_tuple(scan_pass([](auto acc) { return acc; }, host_device::identity()));
}

static GT_FUNCTION constexpr auto body() {
Expand Down
5 changes: 3 additions & 2 deletions tests/regression/fn/fn_tridiagonal_solve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <gridtools/fn/unstructured.hpp>

#include <fn_select.hpp>
#include <nvcc_workarounds.hpp>
#include <test_environment.hpp>

namespace {
Expand All @@ -23,7 +24,7 @@ namespace {

struct forward_scan : fwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto /*acc*/, auto const & /*a*/, auto const &b, auto const &c, auto const &d) {
return tuple(deref(c) / deref(b), deref(d) / deref(b));
},
Expand All @@ -43,7 +44,7 @@ namespace {

struct backward_scan : bwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto /*xp*/, auto const &cpdp) {
auto [cp, dp] = deref(cpdp);
return dp;
Expand Down
8 changes: 8 additions & 0 deletions tests/unit_tests/common/test_tuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include <gtest/gtest.h>

#include <gridtools/common/defs.hpp>
#include <gridtools/common/tuple_util.hpp>
#include <gridtools/meta/macros.hpp>

Expand Down Expand Up @@ -331,5 +332,12 @@ namespace gridtools {
EXPECT_EQ(a, 42);
EXPECT_EQ(b, 2.5);
}

#if !GT_NVCC_WORKAROUND_1766
TEST(ctad, lambda) {
auto testee = tuple([](int i) { return i; });
EXPECT_EQ(1, get<0>(testee)(1));
}
#endif
} // namespace
} // namespace gridtools
6 changes: 4 additions & 2 deletions tests/unit_tests/fn/test_fn_column_stage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include <gridtools/sid/composite.hpp>
#include <gridtools/sid/synthetic.hpp>

#include <nvcc_workarounds.hpp>

namespace gridtools::fn {
namespace {
using namespace literals;
Expand All @@ -35,10 +37,10 @@ namespace gridtools::fn {

struct sum_fold_with_logues : sum_fold {
static GT_FUNCTION constexpr auto prologue() {
return tuple([](auto acc, auto const &iter) { return acc + 2 * *iter; });
return nvcc_workarounds::make_1_tuple([](auto acc, auto const &iter) { return acc + 2 * *iter; });
}
static GT_FUNCTION constexpr auto epilogue() {
return tuple([](auto acc, auto const &iter) { return acc + 3 * *iter; });
return nvcc_workarounds::make_1_tuple([](auto acc, auto const &iter) { return acc + 3 * *iter; });
}
};

Expand Down

0 comments on commit 5e1011a

Please sign in to comment.