From 2fe5571f8efea4fd91c5c0d67e39fd551281490c Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Thu, 3 Aug 2023 09:22:45 +0200 Subject: [PATCH 1/6] CI: Add NVHPC 23.7 to compilation test --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d96066963..925be2f8f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: container: ghcr.io/gridtools/gridtools-base:${{ matrix.compiler }} strategy: matrix: - compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, base-hip, gcc-10-hpx, nvhpc-23.3] + compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, base-hip, gcc-10-hpx, nvhpc-23.3, nvhpc-23.7] build_type: [debug, release] exclude: - compiler: gcc-8 From ee9598947685ec8dbd676ca7759487b3f602bd46 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 9 Aug 2023 15:36:22 +0200 Subject: [PATCH 2/6] CI: test CUDA 12.1 and 12.2 --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d96066963..1747b66fb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: container: ghcr.io/gridtools/gridtools-base:${{ matrix.compiler }} strategy: matrix: - compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, base-hip, gcc-10-hpx, nvhpc-23.3] + compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, gcc-12-cuda-12.1, gcc-12-cuda-12.2, base-hip, gcc-10-hpx, nvhpc-23.3] build_type: [debug, release] exclude: - compiler: gcc-8 From 52ec54786d8f6c5c3400bf725d3293d1fec119a3 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 9 Aug 2023 22:30:44 +0200 Subject: [PATCH 3/6] cuda 12.1/12.2 partial workarounds --- include/gridtools/fn/column_stage.hpp | 8 +++++++ include/gridtools/storage/builder.hpp | 7 ++++++ tests/include/nvcc_workarounds.hpp | 22 +++++++++++++++++++ .../fn/fn_cartesian_vertical_advection.cpp | 7 +++--- tests/regression/fn/fn_domain.cpp | 3 ++- tests/regression/fn/fn_tridiagonal_solve.cpp | 5 +++-- tests/unit_tests/common/test_tuple.cpp | 8 +++++++ tests/unit_tests/fn/test_fn_column_stage.cpp | 6 +++-- 8 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 tests/include/nvcc_workarounds.hpp diff --git a/include/gridtools/fn/column_stage.hpp b/include/gridtools/fn/column_stage.hpp index e0a868b37..c0ce3f96d 100644 --- a/include/gridtools/fn/column_stage.hpp +++ b/include/gridtools/fn/column_stage.hpp @@ -101,5 +101,13 @@ namespace gridtools::fn { using column_stage_impl_::column_stage; using column_stage_impl_::fwd; using column_stage_impl_::merged_column_stage; +#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR__ <= 2) + // workaround CTAD issue in CUDA 12.1, 12.2 (https://github.com/GridTools/gridtools/issues/1766) + template + GT_FUNCTION constexpr auto scan_pass(F &&f, Projector &&p = {}) { + return column_stage_impl_::scan_pass(std::forward(f), std::forward(p)); + } +#else using column_stage_impl_::scan_pass; +#endif } // namespace gridtools::fn diff --git a/include/gridtools/storage/builder.hpp b/include/gridtools/storage/builder.hpp index 5854f9e96..d5d8c5092 100644 --- a/include/gridtools/storage/builder.hpp +++ b/include/gridtools/storage/builder.hpp @@ -316,8 +316,15 @@ namespace gridtools { auto operator()() const { return build(); } }; +#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR__ <= 2) + // workaround constexpr issue in CUDA 12.1, 12.2 (maybe related + // tohttps://github.com/GridTools/gridtools/issues/1766) + template + builder_type::values<>> builder = {}; +#else template constexpr builder_type::values<>> builder = {}; +#endif } // namespace builder_impl_ using builder_impl_::builder; } // namespace storage diff --git a/tests/include/nvcc_workarounds.hpp b/tests/include/nvcc_workarounds.hpp new file mode 100644 index 000000000..40c78063a --- /dev/null +++ b/tests/include/nvcc_workarounds.hpp @@ -0,0 +1,22 @@ +/* + * GridTools + * + * Copyright (c) 2014-2021, ETH Zurich + * All rights reserved. + * + * Please, refer to the LICENSE file in the root directory. + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include + +namespace gridtools { + namespace nvcc_workarounds { + + // see https://github.com/GridTools/gridtools/issues/1766 + template + constexpr auto make_1_tuple(T &&t) { + return tuple(t); + } + } // namespace nvcc_workarounds +} // namespace gridtools diff --git a/tests/regression/fn/fn_cartesian_vertical_advection.cpp b/tests/regression/fn/fn_cartesian_vertical_advection.cpp index 82871652f..d00aaf7bd 100644 --- a/tests/regression/fn/fn_cartesian_vertical_advection.cpp +++ b/tests/regression/fn/fn_cartesian_vertical_advection.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include "../vertical_advection_repository.hpp" @@ -27,7 +28,7 @@ namespace { struct u_forward_scan : fwd { static GT_FUNCTION constexpr auto prologue() { - return tuple(scan_pass( + return nvcc_workarounds::make_1_tuple(scan_pass( [](auto /*acc*/, auto const &utens_stage, auto const &utens, @@ -80,7 +81,7 @@ namespace { } static GT_FUNCTION constexpr auto epilogue() { - return tuple(scan_pass( + return nvcc_workarounds::make_1_tuple(scan_pass( [](auto acc, auto const &utens_stage, auto const &utens, @@ -107,7 +108,7 @@ namespace { struct u_backward_scan : bwd { static GT_FUNCTION constexpr auto prologue() { - return tuple(scan_pass( + return nvcc_workarounds::make_1_tuple(scan_pass( [](auto /*acc*/, auto const &cd, auto const &u_pos, auto const &dtr_stage) { auto d = tuple_get(1_c, deref(cd)); return make_tuple(deref(dtr_stage) * (d - deref(u_pos)), d); diff --git a/tests/regression/fn/fn_domain.cpp b/tests/regression/fn/fn_domain.cpp index a5b1f74c9..48d5f9585 100644 --- a/tests/regression/fn/fn_domain.cpp +++ b/tests/regression/fn/fn_domain.cpp @@ -13,6 +13,7 @@ #include #include +#include #include namespace { @@ -51,7 +52,7 @@ namespace { struct empty_column : fwd { static GT_FUNCTION constexpr auto prologue() { - return tuple(scan_pass([](auto acc) { return acc; }, host_device::identity())); + return nvcc_workarounds::make_1_tuple(scan_pass([](auto acc) { return acc; }, host_device::identity())); } static GT_FUNCTION constexpr auto body() { diff --git a/tests/regression/fn/fn_tridiagonal_solve.cpp b/tests/regression/fn/fn_tridiagonal_solve.cpp index 283dc22f3..a812342a5 100644 --- a/tests/regression/fn/fn_tridiagonal_solve.cpp +++ b/tests/regression/fn/fn_tridiagonal_solve.cpp @@ -14,6 +14,7 @@ #include #include +#include #include namespace { @@ -23,7 +24,7 @@ namespace { struct forward_scan : fwd { static GT_FUNCTION constexpr auto prologue() { - return tuple(scan_pass( + return nvcc_workarounds::make_1_tuple(scan_pass( [](auto /*acc*/, auto const & /*a*/, auto const &b, auto const &c, auto const &d) { return tuple(deref(c) / deref(b), deref(d) / deref(b)); }, @@ -43,7 +44,7 @@ namespace { struct backward_scan : bwd { static GT_FUNCTION constexpr auto prologue() { - return tuple(scan_pass( + return nvcc_workarounds::make_1_tuple(scan_pass( [](auto /*xp*/, auto const &cpdp) { auto [cp, dp] = deref(cpdp); return dp; diff --git a/tests/unit_tests/common/test_tuple.cpp b/tests/unit_tests/common/test_tuple.cpp index 0b445cb4c..99e299d64 100644 --- a/tests/unit_tests/common/test_tuple.cpp +++ b/tests/unit_tests/common/test_tuple.cpp @@ -331,5 +331,13 @@ namespace gridtools { EXPECT_EQ(a, 42); EXPECT_EQ(b, 2.5); } + +#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR <= 2 +#else + TEST(ctad, lambda) { + auto testee = tuple([](int i) { return i; }); + EXPECT_EQ(1, get<0>(testee)(1)); + } +#endif } // namespace } // namespace gridtools diff --git a/tests/unit_tests/fn/test_fn_column_stage.cpp b/tests/unit_tests/fn/test_fn_column_stage.cpp index 747b99bd0..5b4704cd7 100644 --- a/tests/unit_tests/fn/test_fn_column_stage.cpp +++ b/tests/unit_tests/fn/test_fn_column_stage.cpp @@ -14,6 +14,8 @@ #include #include +#include + namespace gridtools::fn { namespace { using namespace literals; @@ -35,10 +37,10 @@ namespace gridtools::fn { struct sum_fold_with_logues : sum_fold { static GT_FUNCTION constexpr auto prologue() { - return tuple([](auto acc, auto const &iter) { return acc + 2 * *iter; }); + return nvcc_workarounds::make_1_tuple([](auto acc, auto const &iter) { return acc + 2 * *iter; }); } static GT_FUNCTION constexpr auto epilogue() { - return tuple([](auto acc, auto const &iter) { return acc + 3 * *iter; }); + return nvcc_workarounds::make_1_tuple([](auto acc, auto const &iter) { return acc + 3 * *iter; }); } }; From 7c97e9abd672c89ddc19840a08b74aae19ca32b0 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Thu, 10 Aug 2023 07:18:54 +0200 Subject: [PATCH 4/6] Update tests/include/nvcc_workarounds.hpp --- tests/include/nvcc_workarounds.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/include/nvcc_workarounds.hpp b/tests/include/nvcc_workarounds.hpp index 40c78063a..cb825b611 100644 --- a/tests/include/nvcc_workarounds.hpp +++ b/tests/include/nvcc_workarounds.hpp @@ -16,7 +16,7 @@ namespace gridtools { // see https://github.com/GridTools/gridtools/issues/1766 template constexpr auto make_1_tuple(T &&t) { - return tuple(t); + return tuple(std::forward(t)); } } // namespace nvcc_workarounds } // namespace gridtools From 7eb98b35e40afd946a80d43a632c73dc9ac5fdd5 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 15 Aug 2023 08:41:13 +0200 Subject: [PATCH 5/6] introduce workaround macro --- include/gridtools/common/defs.hpp | 7 +++++++ include/gridtools/fn/column_stage.hpp | 5 +++-- include/gridtools/storage/builder.hpp | 5 ++--- tests/unit_tests/common/test_tuple.cpp | 4 ++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/gridtools/common/defs.hpp b/include/gridtools/common/defs.hpp index 1996cbc5f..6ccc52369 100644 --- a/include/gridtools/common/defs.hpp +++ b/include/gridtools/common/defs.hpp @@ -51,3 +51,10 @@ namespace gridtools { #define GT_NVCC_DIAG_PUSH_SUPPRESS(x) #define GT_NVCC_DIAG_POP_SUPPRESS(x) #endif + +#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR__ <= 2) +// enables workaround for CTAD/constexpr issues in CUDA 12.1, 12.2 (https://github.com/GridTools/gridtools/issues/1766) +#define GT_NVCC_WORKAROUND_1766 1 +#else +#define GT_NVCC_WORKAROUND_1766 0 +#endif diff --git a/include/gridtools/fn/column_stage.hpp b/include/gridtools/fn/column_stage.hpp index c0ce3f96d..e5d02e913 100644 --- a/include/gridtools/fn/column_stage.hpp +++ b/include/gridtools/fn/column_stage.hpp @@ -13,6 +13,7 @@ #include #include +#include "../common/defs.hpp" #include "../common/functional.hpp" #include "../common/integral_constant.hpp" #include "../common/tuple.hpp" @@ -101,8 +102,8 @@ namespace gridtools::fn { using column_stage_impl_::column_stage; using column_stage_impl_::fwd; using column_stage_impl_::merged_column_stage; -#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR__ <= 2) - // workaround CTAD issue in CUDA 12.1, 12.2 (https://github.com/GridTools/gridtools/issues/1766) + +#if GT_NVCC_WORKAROUND_1766 template GT_FUNCTION constexpr auto scan_pass(F &&f, Projector &&p = {}) { return column_stage_impl_::scan_pass(std::forward(f), std::forward(p)); diff --git a/include/gridtools/storage/builder.hpp b/include/gridtools/storage/builder.hpp index d5d8c5092..57b08e061 100644 --- a/include/gridtools/storage/builder.hpp +++ b/include/gridtools/storage/builder.hpp @@ -316,9 +316,8 @@ namespace gridtools { auto operator()() const { return build(); } }; -#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR__ <= 2) - // workaround constexpr issue in CUDA 12.1, 12.2 (maybe related - // tohttps://github.com/GridTools/gridtools/issues/1766) +#if GT_NVCC_WORKAROUND_1766 + // not sure if the same bug as https://github.com/GridTools/gridtools/issues/1766 template builder_type::values<>> builder = {}; #else diff --git a/tests/unit_tests/common/test_tuple.cpp b/tests/unit_tests/common/test_tuple.cpp index 99e299d64..aa356aa5c 100644 --- a/tests/unit_tests/common/test_tuple.cpp +++ b/tests/unit_tests/common/test_tuple.cpp @@ -14,6 +14,7 @@ #include +#include #include #include @@ -332,8 +333,7 @@ namespace gridtools { EXPECT_EQ(b, 2.5); } -#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR <= 2 -#else +#if not GT_NVCC_WORKAROUND_1766 TEST(ctad, lambda) { auto testee = tuple([](int i) { return i; }); EXPECT_EQ(1, get<0>(testee)(1)); From 6dbab722f5468ce4db935ce280a3882368339640 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 16 Aug 2023 11:29:43 +0200 Subject: [PATCH 6/6] Update tests/unit_tests/common/test_tuple.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Péter Kardos --- tests/unit_tests/common/test_tuple.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/common/test_tuple.cpp b/tests/unit_tests/common/test_tuple.cpp index aa356aa5c..0c1e7ae88 100644 --- a/tests/unit_tests/common/test_tuple.cpp +++ b/tests/unit_tests/common/test_tuple.cpp @@ -333,7 +333,7 @@ namespace gridtools { EXPECT_EQ(b, 2.5); } -#if not GT_NVCC_WORKAROUND_1766 +#if !GT_NVCC_WORKAROUND_1766 TEST(ctad, lambda) { auto testee = tuple([](int i) { return i; }); EXPECT_EQ(1, get<0>(testee)(1));