Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Deprecate CTK<9 #553

Merged
merged 2 commits into from
Aug 19, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions cub/util_arch.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,6 @@ CUB_NAMESPACE_BEGIN

#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document

#if ((__CUDACC_VER_MAJOR__ >= 9) || defined(_NVHPC_CUDA) || \
CUDA_VERSION >= 9000) && \
!defined(CUB_USE_COOPERATIVE_GROUPS)
#define CUB_USE_COOPERATIVE_GROUPS
gevtushenko marked this conversation as resolved.
Show resolved Hide resolved
#endif

/// In device code, CUB_PTX_ARCH expands to the PTX version for which we are
/// compiling. In host code, CUB_PTX_ARCH's value is implementation defined.
#ifndef CUB_PTX_ARCH
Expand Down
39 changes: 1 addition & 38 deletions cub/util_ptx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,7 @@ __device__ __forceinline__ int CTA_SYNC_OR(int p)
*/
__device__ __forceinline__ void WARP_SYNC(unsigned int member_mask)
{
#ifdef CUB_USE_COOPERATIVE_GROUPS
__syncwarp(member_mask);
#endif
}


Expand All @@ -264,11 +262,7 @@ __device__ __forceinline__ void WARP_SYNC(unsigned int member_mask)
*/
__device__ __forceinline__ int WARP_ANY(int predicate, unsigned int member_mask)
{
#ifdef CUB_USE_COOPERATIVE_GROUPS
return __any_sync(member_mask, predicate);
#else
return ::__any(predicate);
#endif
}


Expand All @@ -277,11 +271,7 @@ __device__ __forceinline__ int WARP_ANY(int predicate, unsigned int member_mask
*/
__device__ __forceinline__ int WARP_ALL(int predicate, unsigned int member_mask)
{
#ifdef CUB_USE_COOPERATIVE_GROUPS
return __all_sync(member_mask, predicate);
#else
return ::__all(predicate);
#endif
}


Expand All @@ -290,11 +280,7 @@ __device__ __forceinline__ int WARP_ALL(int predicate, unsigned int member_mask
*/
__device__ __forceinline__ int WARP_BALLOT(int predicate, unsigned int member_mask)
{
#ifdef CUB_USE_COOPERATIVE_GROUPS
return __ballot_sync(member_mask, predicate);
#else
return __ballot(predicate);
#endif
}


Expand All @@ -304,13 +290,8 @@ __device__ __forceinline__ int WARP_BALLOT(int predicate, unsigned int member_m
__device__ __forceinline__
unsigned int SHFL_UP_SYNC(unsigned int word, int src_offset, int flags, unsigned int member_mask)
{
#ifdef CUB_USE_COOPERATIVE_GROUPS
asm volatile("shfl.sync.up.b32 %0, %1, %2, %3, %4;"
: "=r"(word) : "r"(word), "r"(src_offset), "r"(flags), "r"(member_mask));
#else
asm volatile("shfl.up.b32 %0, %1, %2, %3;"
: "=r"(word) : "r"(word), "r"(src_offset), "r"(flags));
#endif
return word;
}

Expand All @@ -320,13 +301,8 @@ unsigned int SHFL_UP_SYNC(unsigned int word, int src_offset, int flags, unsigned
__device__ __forceinline__
unsigned int SHFL_DOWN_SYNC(unsigned int word, int src_offset, int flags, unsigned int member_mask)
{
#ifdef CUB_USE_COOPERATIVE_GROUPS
asm volatile("shfl.sync.down.b32 %0, %1, %2, %3, %4;"
: "=r"(word) : "r"(word), "r"(src_offset), "r"(flags), "r"(member_mask));
#else
asm volatile("shfl.down.b32 %0, %1, %2, %3;"
: "=r"(word) : "r"(word), "r"(src_offset), "r"(flags));
#endif
return word;
}

Expand All @@ -336,13 +312,8 @@ unsigned int SHFL_DOWN_SYNC(unsigned int word, int src_offset, int flags, unsign
__device__ __forceinline__
unsigned int SHFL_IDX_SYNC(unsigned int word, int src_lane, int flags, unsigned int member_mask)
{
#ifdef CUB_USE_COOPERATIVE_GROUPS
asm volatile("shfl.sync.idx.b32 %0, %1, %2, %3, %4;"
: "=r"(word) : "r"(word), "r"(src_lane), "r"(flags), "r"(member_mask));
#else
asm volatile("shfl.idx.b32 %0, %1, %2, %3;"
: "=r"(word) : "r"(word), "r"(src_lane), "r"(flags));
#endif
return word;
}

Expand All @@ -352,11 +323,7 @@ unsigned int SHFL_IDX_SYNC(unsigned int word, int src_lane, int flags, unsigned
__device__ __forceinline__
unsigned int SHFL_IDX_SYNC(unsigned int word, int src_lane, unsigned int member_mask)
{
#ifdef CUB_USE_COOPERATIVE_GROUPS
return __shfl_sync(member_mask, word, src_lane);
#else
return __shfl(word, src_lane);
#endif
return __shfl_sync(member_mask, word, src_lane);
}

/**
Expand Down Expand Up @@ -739,11 +706,7 @@ inline __device__ unsigned int MatchAny(unsigned int label)
" .reg .pred p;\n"
" and.b32 %0, %1, %2;"
" setp.eq.u32 p, %0, %2;\n"
#ifdef CUB_USE_COOPERATIVE_GROUPS
" vote.ballot.sync.b32 %0, p, 0xffffffff;\n"
#else
" vote.ballot.b32 %0, p;\n"
#endif
" @!p not.b32 %0, %0;\n"
"}\n" : "=r"(mask) : "r"(label), "r"(current_bit));

Expand Down
71 changes: 0 additions & 71 deletions cub/warp/specializations/warp_reduce_shfl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ struct WarpReduceShfl
int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane)

// Use predicate set from SHFL to guard against invalid peers
#ifdef CUB_USE_COOPERATIVE_GROUPS
asm volatile(
"{"
" .reg .u32 r0;"
Expand All @@ -151,17 +150,6 @@ struct WarpReduceShfl
" mov.u32 %0, r0;"
"}"
: "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input), "r"(member_mask));
#else
asm volatile(
"{"
" .reg .u32 r0;"
" .reg .pred p;"
" shfl.down.b32 r0|p, %1, %2, %3;"
" @p add.u32 r0, r0, %4;"
" mov.u32 %0, r0;"
"}"
: "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input));
#endif

return output;
}
Expand All @@ -178,7 +166,6 @@ struct WarpReduceShfl
int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane)

// Use predicate set from SHFL to guard against invalid peers
#ifdef CUB_USE_COOPERATIVE_GROUPS
asm volatile(
"{"
" .reg .f32 r0;"
Expand All @@ -188,17 +175,6 @@ struct WarpReduceShfl
" mov.f32 %0, r0;"
"}"
: "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input), "r"(member_mask));
#else
asm volatile(
"{"
" .reg .f32 r0;"
" .reg .pred p;"
" shfl.down.b32 r0|p, %1, %2, %3;"
" @p add.f32 r0, r0, %4;"
" mov.f32 %0, r0;"
"}"
: "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input));
#endif

return output;
}
Expand All @@ -214,7 +190,6 @@ struct WarpReduceShfl
unsigned long long output;
int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane)

#ifdef CUB_USE_COOPERATIVE_GROUPS
asm volatile(
"{"
" .reg .u32 lo;"
Expand All @@ -227,20 +202,6 @@ struct WarpReduceShfl
" @p add.u64 %0, %0, %1;"
"}"
: "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "r"(member_mask));
#else
asm volatile(
"{"
" .reg .u32 lo;"
" .reg .u32 hi;"
" .reg .pred p;"
" mov.b64 {lo, hi}, %1;"
" shfl.down.b32 lo|p, lo, %2, %3;"
" shfl.down.b32 hi|p, hi, %2, %3;"
" mov.b64 %0, {lo, hi};"
" @p add.u64 %0, %0, %1;"
"}"
: "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c));
#endif

return output;
}
Expand All @@ -257,7 +218,6 @@ struct WarpReduceShfl
int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane)

// Use predicate set from SHFL to guard against invalid peers
#ifdef CUB_USE_COOPERATIVE_GROUPS
asm volatile(
"{"
" .reg .u32 lo;"
Expand All @@ -270,20 +230,6 @@ struct WarpReduceShfl
" @p add.s64 %0, %0, %1;"
"}"
: "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "r"(member_mask));
#else
asm volatile(
"{"
" .reg .u32 lo;"
" .reg .u32 hi;"
" .reg .pred p;"
" mov.b64 {lo, hi}, %1;"
" shfl.down.b32 lo|p, lo, %2, %3;"
" shfl.down.b32 hi|p, hi, %2, %3;"
" mov.b64 %0, {lo, hi};"
" @p add.s64 %0, %0, %1;"
"}"
: "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c));
#endif

return output;
}
Expand All @@ -300,7 +246,6 @@ struct WarpReduceShfl
int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane)

// Use predicate set from SHFL to guard against invalid peers
#ifdef CUB_USE_COOPERATIVE_GROUPS
asm volatile(
"{"
" .reg .u32 lo;"
Expand All @@ -315,22 +260,6 @@ struct WarpReduceShfl
" @p add.f64 %0, %0, r0;"
"}"
: "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c), "r"(member_mask));
#else
asm volatile(
"{"
" .reg .u32 lo;"
" .reg .u32 hi;"
" .reg .pred p;"
" .reg .f64 r0;"
" mov.b64 %0, %1;"
" mov.b64 {lo, hi}, %1;"
" shfl.down.b32 lo|p, lo, %2, %3;"
" shfl.down.b32 hi|p, hi, %2, %3;"
" mov.b64 r0, {lo, hi};"
" @p add.f64 %0, %0, r0;"
"}"
: "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c));
#endif

return output;
}
Expand Down
Loading