Skip to content

Commit

Permalink
Update to CCCL 2.7.0-rc2. (#17233)
Browse files Browse the repository at this point in the history
This PR updates to CCCL 2.7.0-rc2. Do not merge until all of RAPIDS is
ready to update.

Depends on rapidsai/rapids-cmake#710 and should
be admin-merged immediately after that PR.

Part of rapidsai/build-planning#115.

---------

Co-authored-by: Michael Schellenberger Costa <miscco@nvidia.com>
  • Loading branch information
bdice and miscco authored Dec 4, 2024
1 parent 6440207 commit 38820ff
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 54 deletions.
5 changes: 0 additions & 5 deletions cpp/cmake/thirdparty/patches/cccl_override.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
"packages" : {
"CCCL" : {
"patches" : [
{
"file" : "${current_json_dir}/cccl_symbol_visibility.diff",
"issue" : "Correct symbol visibility issues in libcudacxx [https://github.com/NVIDIA/cccl/pull/1832/]",
"fixed_in" : "2.6"
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
Expand Down
27 changes: 0 additions & 27 deletions cpp/cmake/thirdparty/patches/cccl_symbol_visibility.diff

This file was deleted.

66 changes: 50 additions & 16 deletions cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
Original file line number Diff line number Diff line change
@@ -1,25 +1,59 @@
diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
index 2a3cc4e33..8fb337b26 100644
index 971b93d62..0d6b25b07 100644
--- a/thrust/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/thrust/system/cuda/detail/dispatch.h
@@ -44,8 +44,7 @@
} \
else \
{ \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
- status = call arguments; \
@@ -36,16 +36,15 @@
* that callables for both branches consist of the same tokens, and is intended to be used with Thrust-style dispatch
* interfaces, that always deduce the size type from the arguments.
*/
-#define THRUST_INDEX_TYPE_DISPATCH(status, call, count, arguments) \
- if (count <= thrust::detail::integer_traits<std::int32_t>::const_max) \
- { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int32_t>(count); \
- status = call arguments; \
- } \
- else \
- { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int64_t>(count); \
- status = call arguments; \
+#define THRUST_INDEX_TYPE_DISPATCH(status, call, count, arguments) \
+ if (count <= thrust::detail::integer_traits<std::int32_t>::const_max) \
+ { \
+ auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int32_t>(count); \
+ status = call arguments; \
+ } \
+ else \
+ { \
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}

/**
@@ -66,9 +65,7 @@
} \
else \
{ \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
- status = call arguments; \
@@ -55,18 +54,16 @@
*
* This version of the macro supports providing two count variables, which is necessary for set algorithms.
*/
-#define THRUST_DOUBLE_INDEX_TYPE_DISPATCH(status, call, count1, count2, arguments) \
- if (count1 + count2 <= thrust::detail::integer_traits<std::int32_t>::const_max) \
- { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int32_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int32_t>(count2); \
- status = call arguments; \
- } \
- else \
- { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int64_t>(count2); \
- status = call arguments; \
+#define THRUST_DOUBLE_INDEX_TYPE_DISPATCH(status, call, count1, count2, arguments) \
+ if (count1 + count2 <= thrust::detail::integer_traits<std::int32_t>::const_max) \
+ { \
+ auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int32_t>(count1); \
+ auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int32_t>(count2); \
+ status = call arguments; \
+ } \
+ else \
+ { \
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}

/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
index eb76ebb0b..c6c529a50 100644
index 29510db5e..cf57e5786 100644
--- a/cub/cub/block/block_merge_sort.cuh
+++ b/cub/cub/block/block_merge_sort.cuh
@@ -95,7 +95,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void SerialMerge(
KeyT key1 = keys_shared[keys1_beg];
KeyT key2 = keys_shared[keys2_beg];

-#pragma unroll
+#pragma unroll 1
for (int item = 0; item < ITEMS_PER_THREAD; ++item)
{
bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
@@ -376,7 +376,7 @@ public:
const bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
@@ -374,7 +374,7 @@ public:
//
KeyT max_key = oob_default;

-#pragma unroll
+#pragma unroll 1
for (int item = 1; item < ITEMS_PER_THREAD; ++item)
Expand All @@ -27,7 +27,7 @@ index 7d9e8622f..da5627306 100644
@@ -87,10 +87,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], ValueT (&items)[ITEMS_PER_THRE
{
constexpr bool KEYS_ONLY = ::cuda::std::is_same<ValueT, NullType>::value;

-#pragma unroll
+#pragma unroll 1
for (int i = 0; i < ITEMS_PER_THREAD; ++i)
Expand Down

0 comments on commit 38820ff

Please sign in to comment.