diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp index 3ac94a0567..6d890edc9b 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp @@ -8,7 +8,8 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++98, c++03, c++11 +// UNSUPPORTED: c++11 +// UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc // NVRTC_SKIP_KERNEL_RUN // This will have effect once PR 433 is merged (line above should be removed.) diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp index 9eacd1b8bb..e394515666 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp @@ -8,7 +8,8 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++98, c++03, c++11 +// UNSUPPORTED: c++11 +// UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc // NVRTC_SKIP_KERNEL_RUN // This will have effect once PR 433 is merged (line above should be removed.) diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp index 5e355900f2..b10c2bfc26 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp @@ -8,7 +8,8 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++98, c++03, c++11 +// UNSUPPORTED: c++11 +// UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc // NVRTC_SKIP_KERNEL_RUN // This will have effect once PR 433 is merged (line above should be removed.) diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp index 3f12718d27..8e7886db6a 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp @@ -8,7 +8,8 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++98, c++03, c++11 +// UNSUPPORTED: c++11 +// UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc // NVRTC_SKIP_KERNEL_RUN // This will have effect once PR 433 is merged (line above should be removed.) diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp index faf6eb72f0..8be14c21c1 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp @@ -8,7 +8,8 @@ // //===----------------------------------------------------------------------===// // -// UNSUPPORTED: libcpp-has-no-threads, c++98, c++03, c++11 +// UNSUPPORTED: c++11 +// UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc // NVRTC_SKIP_KERNEL_RUN // This will have effect once PR 433 is merged (line above should be removed.) diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h b/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h index 6ac02f200e..9fd883659b 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h @@ -958,9 +958,9 @@ void __cp_async_shared_global(char * __dest, const char * __src) { // If `if constexpr` is not available, this function gets instantiated even // if is not called. Do not static_assert in that case. -#if _LIBCUDACXX_STD_VER > 14 && !defined(_LIBCUDACXX_HAS_NO_CXX14_CONSTEXPR) +#if _LIBCUDACXX_STD_VER >= 17 static_assert(_Copy_size == 4 || _Copy_size == 8 || _Copy_size == 16, "cp.async.shared.global requires a copy size of 4, 8, or 16."); -#endif +#endif // _LIBCUDACXX_STD_VER >= 17 asm volatile( "cp.async.ca.shared.global [%0], [%1], %2, %2;" @@ -990,9 +990,9 @@ inline __device__ void __cp_async_shared_global_mechanism(_Group __g, char * __dest, const char * __src, _CUDA_VSTD::size_t __size) { // If `if constexpr` is not available, this function gets instantiated even // if is not called. Do not static_assert in that case. -#if _LIBCUDACXX_STD_VER > 14 && !defined(_LIBCUDACXX_HAS_NO_CXX14_CONSTEXPR) +#if _LIBCUDACXX_STD_VER >= 17 static_assert(4 <= _Alignment, "cp.async requires at least 4-byte alignment"); -#endif +#endif // _LIBCUDACXX_STD_VER >= 17 // Maximal copy size is 16. constexpr int __copy_size = (_Alignment > 16) ? 16 : _Alignment;