Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workarounds for CUDA 12.1 and 12.2 #1764

Merged
merged 7 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
container: ghcr.io/gridtools/gridtools-base:${{ matrix.compiler }}
strategy:
matrix:
compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, base-hip, gcc-10-hpx, nvhpc-23.3]
compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, gcc-12-cuda-12.1, gcc-12-cuda-12.2, base-hip, gcc-10-hpx, nvhpc-23.3, nvhpc-23.7]
build_type: [debug, release]
exclude:
- compiler: gcc-8
Expand Down
8 changes: 8 additions & 0 deletions include/gridtools/fn/column_stage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,13 @@ namespace gridtools::fn {
using column_stage_impl_::column_stage;
using column_stage_impl_::fwd;
using column_stage_impl_::merged_column_stage;
#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR__ <= 2)
petiaccja marked this conversation as resolved.
Show resolved Hide resolved
// workaround CTAD issue in CUDA 12.1, 12.2 (https://github.com/GridTools/gridtools/issues/1766)
havogt marked this conversation as resolved.
Show resolved Hide resolved
template <class F, class Projector = host_device::identity>
GT_FUNCTION constexpr auto scan_pass(F &&f, Projector &&p = {}) {
return column_stage_impl_::scan_pass(std::forward<F>(f), std::forward<Projector>(p));
}
#else
using column_stage_impl_::scan_pass;
#endif
} // namespace gridtools::fn
7 changes: 7 additions & 0 deletions include/gridtools/storage/builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,15 @@ namespace gridtools {

auto operator()() const { return build(); }
};
#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR__ <= 2)
// workaround constexpr issue in CUDA 12.1, 12.2 (maybe related
// tohttps://github.com/GridTools/gridtools/issues/1766)
template <class Traits>
builder_type<Traits, keys<>::values<>> builder = {};
#else
template <class Traits>
constexpr builder_type<Traits, keys<>::values<>> builder = {};
#endif
} // namespace builder_impl_
using builder_impl_::builder;
} // namespace storage
Expand Down
22 changes: 22 additions & 0 deletions tests/include/nvcc_workarounds.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* GridTools
*
* Copyright (c) 2014-2021, ETH Zurich
petiaccja marked this conversation as resolved.
Show resolved Hide resolved
* All rights reserved.
*
* Please, refer to the LICENSE file in the root directory.
* SPDX-License-Identifier: BSD-3-Clause
*/

#include <gridtools/common/tuple.hpp>

namespace gridtools {
namespace nvcc_workarounds {

// see https://github.com/GridTools/gridtools/issues/1766
template <class T>
constexpr auto make_1_tuple(T &&t) {
return tuple(t);
havogt marked this conversation as resolved.
Show resolved Hide resolved
}
} // namespace nvcc_workarounds
} // namespace gridtools
7 changes: 4 additions & 3 deletions tests/regression/fn/fn_cartesian_vertical_advection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <gridtools/stencil/global_parameter.hpp>

#include <fn_select.hpp>
#include <nvcc_workarounds.hpp>
#include <test_environment.hpp>

#include "../vertical_advection_repository.hpp"
Expand All @@ -27,7 +28,7 @@ namespace {

struct u_forward_scan : fwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
petiaccja marked this conversation as resolved.
Show resolved Hide resolved
[](auto /*acc*/,
auto const &utens_stage,
auto const &utens,
Expand Down Expand Up @@ -80,7 +81,7 @@ namespace {
}

static GT_FUNCTION constexpr auto epilogue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto acc,
auto const &utens_stage,
auto const &utens,
Expand All @@ -107,7 +108,7 @@ namespace {

struct u_backward_scan : bwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto /*acc*/, auto const &cd, auto const &u_pos, auto const &dtr_stage) {
auto d = tuple_get(1_c, deref(cd));
return make_tuple(deref(dtr_stage) * (d - deref(u_pos)), d);
Expand Down
3 changes: 2 additions & 1 deletion tests/regression/fn/fn_domain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <gridtools/fn/unstructured.hpp>

#include <fn_select.hpp>
#include <nvcc_workarounds.hpp>
#include <test_environment.hpp>

namespace {
Expand Down Expand Up @@ -51,7 +52,7 @@ namespace {

struct empty_column : fwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass([](auto acc) { return acc; }, host_device::identity()));
return nvcc_workarounds::make_1_tuple(scan_pass([](auto acc) { return acc; }, host_device::identity()));
}

static GT_FUNCTION constexpr auto body() {
Expand Down
5 changes: 3 additions & 2 deletions tests/regression/fn/fn_tridiagonal_solve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <gridtools/fn/unstructured.hpp>

#include <fn_select.hpp>
#include <nvcc_workarounds.hpp>
#include <test_environment.hpp>

namespace {
Expand All @@ -23,7 +24,7 @@ namespace {

struct forward_scan : fwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto /*acc*/, auto const & /*a*/, auto const &b, auto const &c, auto const &d) {
return tuple(deref(c) / deref(b), deref(d) / deref(b));
},
Expand All @@ -43,7 +44,7 @@ namespace {

struct backward_scan : bwd {
static GT_FUNCTION constexpr auto prologue() {
return tuple(scan_pass(
return nvcc_workarounds::make_1_tuple(scan_pass(
[](auto /*xp*/, auto const &cpdp) {
auto [cp, dp] = deref(cpdp);
return dp;
Expand Down
8 changes: 8 additions & 0 deletions tests/unit_tests/common/test_tuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,5 +331,13 @@ namespace gridtools {
EXPECT_EQ(a, 42);
EXPECT_EQ(b, 2.5);
}

#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ >= 1 && __CUDACC_VER_MINOR <= 2
#else
havogt marked this conversation as resolved.
Show resolved Hide resolved
TEST(ctad, lambda) {
auto testee = tuple([](int i) { return i; });
EXPECT_EQ(1, get<0>(testee)(1));
}
#endif
} // namespace
} // namespace gridtools
6 changes: 4 additions & 2 deletions tests/unit_tests/fn/test_fn_column_stage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include <gridtools/sid/composite.hpp>
#include <gridtools/sid/synthetic.hpp>

#include <nvcc_workarounds.hpp>

namespace gridtools::fn {
namespace {
using namespace literals;
Expand All @@ -35,10 +37,10 @@ namespace gridtools::fn {

struct sum_fold_with_logues : sum_fold {
static GT_FUNCTION constexpr auto prologue() {
return tuple([](auto acc, auto const &iter) { return acc + 2 * *iter; });
return nvcc_workarounds::make_1_tuple([](auto acc, auto const &iter) { return acc + 2 * *iter; });
}
static GT_FUNCTION constexpr auto epilogue() {
return tuple([](auto acc, auto const &iter) { return acc + 3 * *iter; });
return nvcc_workarounds::make_1_tuple([](auto acc, auto const &iter) { return acc + 3 * *iter; });
}
};

Expand Down
Loading