Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport branch/2.8.x] Deprecate cub::FpLimits in favor of cuda::std::numeric_limits #3658

Open
wants to merge 1 commit into
base: branch/2.8.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions c2h/include/c2h/bfloat16.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

#include <cub/util_type.cuh>

#include <cuda/std/limits>
#include <cuda/std/type_traits>

#include <cstdint>
Expand Down Expand Up @@ -232,19 +233,36 @@ inline std::ostream& operator<<(std::ostream& out, const __nv_bfloat16& x)
* Traits overloads
******************************************************************************/

_LIBCUDACXX_BEGIN_NAMESPACE_STD
template <>
struct CUB_NS_QUALIFIER::FpLimits<bfloat16_t>
struct __is_extended_floating_point<bfloat16_t> : true_type
{};

#ifndef _CCCL_NO_VARIABLE_TEMPLATES
template <>
_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<bfloat16_t> = true;
#endif // _CCCL_NO_VARIABLE_TEMPLATES

template <>
class __numeric_limits_impl<bfloat16_t, __numeric_limits_type::__floating_point>
{
static __host__ __device__ __forceinline__ bfloat16_t Max()
public:
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t max()
{
return bfloat16_t(numeric_limits<__nv_bfloat16>::max());
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t min()
{
return bfloat16_t::max();
return bfloat16_t(numeric_limits<__nv_bfloat16>::min());
}

static __host__ __device__ __forceinline__ bfloat16_t Lowest()
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t lowest()
{
return bfloat16_t::lowest();
return bfloat16_t(numeric_limits<__nv_bfloat16>::lowest());
}
};
_LIBCUDACXX_END_NAMESPACE_STD

template <>
struct CUB_NS_QUALIFIER::NumericTraits<bfloat16_t>
Expand Down
28 changes: 23 additions & 5 deletions c2h/include/c2h/half.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#include <cub/util_type.cuh>

#include <cuda/std/limits>
#include <cuda/std/type_traits>

#include <cstdint>
Expand Down Expand Up @@ -327,19 +328,36 @@ inline std::ostream& operator<<(std::ostream& out, const __half& x)
* Traits overloads
******************************************************************************/

_LIBCUDACXX_BEGIN_NAMESPACE_STD
template <>
struct CUB_NS_QUALIFIER::FpLimits<half_t>
struct __is_extended_floating_point<half_t> : true_type
{};

#ifndef _CCCL_NO_VARIABLE_TEMPLATES
template <>
_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<half_t> = true;
#endif // _CCCL_NO_VARIABLE_TEMPLATES

template <>
class __numeric_limits_impl<half_t, __numeric_limits_type::__floating_point>
{
static __host__ __device__ __forceinline__ half_t Max()
public:
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t max()
{
return half_t(numeric_limits<__half>::max());
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t min()
{
return (half_t::max)();
return half_t(numeric_limits<__half>::min());
}

static __host__ __device__ __forceinline__ half_t Lowest()
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t lowest()
{
return half_t::lowest();
return half_t(numeric_limits<__half>::lowest());
}
};
_LIBCUDACXX_END_NAMESPACE_STD

template <>
struct CUB_NS_QUALIFIER::NumericTraits<half_t>
Expand Down
112 changes: 10 additions & 102 deletions cub/cub/util_type.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -890,116 +890,20 @@ struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T>
}
};

template <typename _T>
struct FpLimits;

template <>
struct FpLimits<float>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Max()
{
return ::cuda::std::numeric_limits<float>::max();
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Lowest()
{
return ::cuda::std::numeric_limits<float>::lowest();
}
};

template <>
struct FpLimits<double>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Max()
{
return ::cuda::std::numeric_limits<double>::max();
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Lowest()
{
return ::cuda::std::numeric_limits<double>::lowest();
}
};

# if defined(_CCCL_HAS_NVFP16)
template <>
struct FpLimits<__half>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Max()
{
unsigned short max_word = 0x7BFF;
return reinterpret_cast<__half&>(max_word);
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Lowest()
{
unsigned short lowest_word = 0xFBFF;
return reinterpret_cast<__half&>(lowest_word);
}
};
# endif // _CCCL_HAS_NVFP16

# if defined(_CCCL_HAS_NVBF16)
template <>
struct FpLimits<__nv_bfloat16>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Max()
{
unsigned short max_word = 0x7F7F;
return reinterpret_cast<__nv_bfloat16&>(max_word);
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Lowest()
{
unsigned short lowest_word = 0xFF7F;
return reinterpret_cast<__nv_bfloat16&>(lowest_word);
}
};
# endif // _CCCL_HAS_NVBF16

# if defined(__CUDA_FP8_TYPES_EXIST__)
template <>
struct FpLimits<__nv_fp8_e4m3>
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Max()
{
unsigned char max_word = 0x7EU;
__nv_fp8_e4m3 ret_val;
memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e4m3));
return ret_val;
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Lowest()
{
unsigned char lowest_word = 0xFEU;
__nv_fp8_e4m3 ret_val;
memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e4m3));
return ret_val;
}
};

template <>
struct FpLimits<__nv_fp8_e5m2>
template <typename T>
struct CCCL_DEPRECATED_BECAUSE("Use cuda::std::numeric_limits instead") FpLimits
{
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Max()
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max()
{
unsigned char max_word = 0x7BU;
__nv_fp8_e5m2 ret_val;
memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e5m2));
return ret_val;
return ::cuda::std::numeric_limits<T>::max();
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Lowest()
static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest()
{
unsigned char lowest_word = 0xFBU;
__nv_fp8_e5m2 ret_val;
memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e5m2));
return ret_val;
return ::cuda::std::numeric_limits<T>::lowest();
}
};

# endif // __CUDA_FP8_TYPES_EXIST__

/**
* Basic type traits (fp primitive specialization)
*/
Expand Down Expand Up @@ -1029,12 +933,16 @@ struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T>

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max()
{
_CCCL_SUPPRESS_DEPRECATED_PUSH
return FpLimits<T>::Max();
_CCCL_SUPPRESS_DEPRECATED_POP
}

static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest()
{
_CCCL_SUPPRESS_DEPRECATED_PUSH
return FpLimits<T>::Lowest();
_CCCL_SUPPRESS_DEPRECATED_POP
}
};

Expand Down
42 changes: 42 additions & 0 deletions cub/test/catch2_test_util_type.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <cuda/std/type_traits>

#include <c2h/catch2_test_helper.h>
#include <c2h/extended_types.h>

C2H_TEST("Tests non_void_value_t", "[util][type]")
{
Expand Down Expand Up @@ -80,3 +81,44 @@ C2H_TEST("Test CUB_DEFINE_DETECT_NESTED_TYPE", "[util][type]")
STATIC_REQUIRE(cat_detect<HasCat>::value);
STATIC_REQUIRE(!cat_detect<HasDog>::value);
}

using types = c2h::type_list<
char,
signed char,
unsigned char,
short,
unsigned short,
int,
unsigned int,
long,
unsigned long,
long long,
unsigned long long,
#if TEST_HALF_T()
__half,
half_t,
#endif // TEST_HALF_T()
#if TEST_BF_T()
__nv_bfloat16,
bfloat16_t,
#endif // TEST_BF_T()
float,
double
#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
,
long double
#endif // _LIBCUDACXX_HAS_NO_LONG_DOUBLE
>;

C2H_TEST("Test FpLimits agrees with numeric_limits", "[util][type]", types)
{
using T = c2h::get<0, TestType>;
CAPTURE(c2h::type_name<T>());
_CCCL_SUPPRESS_DEPRECATED_PUSH
CHECK(cub::FpLimits<T>::Max() == cuda::std::numeric_limits<T>::max());
CHECK(cub::FpLimits<T>::Lowest() == cuda::std::numeric_limits<T>::lowest());

CHECK(cub::FpLimits<const T>::Max() == cuda::std::numeric_limits<const T>::max());
CHECK(cub::FpLimits<const T>::Lowest() == cuda::std::numeric_limits<const T>::lowest());
_CCCL_SUPPRESS_DEPRECATED_POP
}
Loading