NVIDIA · miscco · Sep 12, 2023 · Aug 18, 2023 · Aug 18, 2023 · Aug 18, 2023
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TEST_ARRIVE_TX_H_
+#define TEST_ARRIVE_TX_H_
+
+#include <cuda/barrier>
+
+#include <cuda/std/utility>
+
+#include "concurrent_agents.h"
+#include "cuda_space_selector.h"
+#include "test_macros.h"
+
+// Suppress warning about barrier in shared memory
+TEST_NV_DIAG_SUPPRESS(static_var_with_dynamic_init)
+
+template<typename Barrier>
+inline __device__
+void mbarrier_complete_tx(
+  Barrier &b, int transaction_count)
+{
+  NV_DISPATCH_TARGET(
+    NV_PROVIDES_SM_90, (
+        if (__isShared(cuda::device::barrier_native_handle(b))) {
+            asm volatile(
+              "mbarrier.complete_tx.relaxed.cta.shared::cta.b64 [%0], %1;"
+              :
+              : "r"((unsigned int) __cvta_generic_to_shared(cuda::device::barrier_native_handle(b))),
+                "r"(transaction_count)
+              : "memory");
+        } else {
+          __trap();
+        }
+    ), NV_ANY_TARGET, (
+      // On architectures pre-SM90 (and on host), we drop the transaction count
+      // update. The barriers do not keep track of transaction counts.
+      __trap();
+    )
+  );
+}
+
+template<typename Barrier>
+__device__
+void thread(Barrier& b, int arrives_per_thread)
+{
+  constexpr int tx_count = 1;
+  auto tok = cuda::device::barrier_arrive_tx(b, arrives_per_thread, tx_count);
+  // Manually increase the transaction count of the barrier.
+  mbarrier_complete_tx(b, tx_count);
+
+  b.wait(cuda::std::move(tok));
+}
+
+__device__
+void test()
+{
+  NV_DISPATCH_TARGET(
+    NV_IS_DEVICE, (
+      // Run all threads, each arriving with arrival count 1
+      constexpr auto block = cuda::thread_scope_block;
+
+      __shared__ cuda::barrier<block> bar_1;
+      init(&bar_1, (int) blockDim.x);
+      __syncthreads();
+      thread(bar_1, 1);
+
+      // Run all threads, each arriving with arrival count 2
+      __shared__ cuda::barrier<block> bar_2;
+      init(&bar_2, (int) 2 * blockDim.x);
+      __syncthreads();
+      thread(bar_2, 2);
+    )
+  );
+}
+
+#endif // TEST_ARRIVE_TX_H_
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads
+// UNSUPPORTED: pre-sm-90
+
+// <cuda/barrier>
+
+#include <cooperative_groups.h>
+#include <cuda/barrier>
+#include "test_macros.h"
+
+// Suppress warning about barrier in shared memory
+TEST_NV_DIAG_SUPPRESS(static_var_with_dynamic_init)
+
+int main(int, char**){
+    NV_DISPATCH_TARGET(
+        NV_IS_HOST, (
+            // When PR #416 is merged, uncomment this line:
+            // cuda_cluster_size = 2;
+        ),
+        NV_IS_DEVICE, (
+            __shared__ cuda::barrier<cuda::thread_scope_block> bar;
+
+            if (threadIdx.x == 0) {
+                init(&bar, blockDim.x);
+            }
+            namespace cg = cooperative_groups;
+            auto cluster = cg::this_cluster();
+
+            cluster.sync();
+
+            // This test currently fails at this point because support for
+            // clusters has not yet been added.
+            cuda::barrier<cuda::thread_scope_block> *remote_bar;
+            remote_bar = cluster.map_shared_rank(&bar, cluster.block_rank() ^ 1);
+
+            // When PR #416 is merged, this should fail here because the barrier
+            // is in device memory.
+            auto token = cuda::device::barrier_arrive_tx(*remote_bar, 1, 0);
+    ));
+    return 0;
+}
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads
+// UNSUPPORTED: pre-sm-90
+
+// <cuda/barrier>
+
+#include "arrive_tx.h"
+
+int main(int, char**)
+{
+    NV_DISPATCH_TARGET(
+        NV_IS_HOST, (
+        // Required by concurrent_agents_launch to know how many we're
+        // launching. This can only be an int, because the nvrtc tests use grep
+        // to figure out how many threads to launch.
+        cuda_thread_count = 256;
+        ),
+        NV_IS_DEVICE, (
+            test();
+        )
+    );
+
+    return 0;
+}
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads
+// UNSUPPORTED: pre-sm-90
+
+// <cuda/barrier>
+
+#include <cuda/barrier>
+#include "test_macros.h"
+
+// Suppress warning about barrier in shared memory
+TEST_NV_DIAG_SUPPRESS(static_var_with_dynamic_init)
+
+__device__ uint64_t bar_storage;
+
+int main(int, char**){
+    NV_IF_TARGET(
+        NV_IS_DEVICE, (
+            cuda::barrier<cuda::thread_scope_block> *bar_ptr;
+            bar_ptr = reinterpret_cast<cuda::barrier<cuda::thread_scope_block> *>(bar_storage);
+
+            if (threadIdx.x == 0) {
+                init(bar_ptr, blockDim.x);
+            }
+            __syncthreads();
+
+            // Should fail because the barrier is in device memory.
+            auto token = cuda::device::barrier_arrive_tx(*bar_ptr, 1, 0);
+    ));
+    return 0;
+}
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads
+// UNSUPPORTED: pre-sm-90
+
+// <cuda/barrier>
+
+#include <cuda/barrier>
+
+#ifndef  __cccl_lib_local_barrier_arrive_tx
+static_assert(false, "should define __cccl_lib_local_barrier_arrive_tx");
+#endif // __cccl_lib_local_barrier_arrive_tx
+
+int main(int, char**){
+    return 0;
+}
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads
+// UNSUPPORTED: nvrtc
+// UNSUPPORTED: pre-sm-70
+
+// <cuda/barrier>
+
+#include <cuda/barrier>
+
+int main(int, char**){
+    NV_IF_TARGET(
+        NV_IS_DEVICE, (
+            __shared__ cuda::barrier<cuda::thread_scope_block> bar;
+            if (threadIdx.x == 0) {
+                init(&bar, blockDim.x);
+            }
+            __syncthreads();
+
+            // barrier_arrive_tx should fail on SM70 and SM80, because it is hidden.
+            auto token = cuda::device::barrier_arrive_tx(bar, 1, 0);
+
+#if defined(__CUDA_MINIMUM_ARCH__) && 900 <= __CUDA_MINIMUM_ARCH__
+            static_assert(false, "Fail manually for SM90 and up.");
+#endif // __CUDA_MINIMUM_ARCH__
+    ));
+    return 0;
+}
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads
+// UNSUPPORTED: pre-sm-90
+
+// <cuda/barrier>
+
+#include "arrive_tx.h"
+
+int main(int, char**)
+{
+    NV_DISPATCH_TARGET(
+        NV_IS_HOST, (
+        // Required by concurrent_agents_launch to know how many we're
+        // launching. This can only be an int, because the nvrtc tests use grep
+        // to figure out how many threads to launch.
+        cuda_thread_count = 2;
+        ),
+        NV_IS_DEVICE, (
+            test();
+        )
+    );
+
+    return 0;
+}
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads
+// UNSUPPORTED: pre-sm-90
+
+// <cuda/barrier>
+
+#include "arrive_tx.h"
+
+int main(int, char**)
+{
+    NV_DISPATCH_TARGET(
+        NV_IS_HOST, (
+        // Required by concurrent_agents_launch to know how many we're
+        // launching. This can only be an int, because the nvrtc tests use grep
+        // to figure out how many threads to launch.
+        cuda_thread_count = 32;
+        ),
+        NV_IS_DEVICE, (
+            test();
+        )
+    );
+
+    return 0;
+}