NVIDIA · github-actions · Feb 3, 2025
@@ -36,6 +36,7 @@
 
 #include <cub/util_type.cuh>
 
+#include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
 #include <cstdint>
@@ -232,19 +233,36 @@ inline std::ostream& operator<<(std::ostream& out, const __nv_bfloat16& x)
  * Traits overloads
  ******************************************************************************/
 
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
 template <>
-struct CUB_NS_QUALIFIER::FpLimits<bfloat16_t>
+struct __is_extended_floating_point<bfloat16_t> : true_type
+{};
+
+#ifndef _CCCL_NO_VARIABLE_TEMPLATES
+template <>
+_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<bfloat16_t> = true;
+#endif // _CCCL_NO_VARIABLE_TEMPLATES
+
+template <>
+class __numeric_limits_impl<bfloat16_t, __numeric_limits_type::__floating_point>
 {
-  static __host__ __device__ __forceinline__ bfloat16_t Max()
+public:
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t max()
+  {
+    return bfloat16_t(numeric_limits<__nv_bfloat16>::max());
+  }
+
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t min()
   {
-    return bfloat16_t::max();
+    return bfloat16_t(numeric_limits<__nv_bfloat16>::min());
   }
 
-  static __host__ __device__ __forceinline__ bfloat16_t Lowest()
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bfloat16_t lowest()
   {
-    return bfloat16_t::lowest();
+    return bfloat16_t(numeric_limits<__nv_bfloat16>::lowest());
   }
 };
+_LIBCUDACXX_END_NAMESPACE_STD
 
 template <>
 struct CUB_NS_QUALIFIER::NumericTraits<bfloat16_t>

@@ -37,6 +37,7 @@
 
 #include <cub/util_type.cuh>
 
+#include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
 #include <cstdint>
@@ -327,19 +328,36 @@ inline std::ostream& operator<<(std::ostream& out, const __half& x)
  * Traits overloads
  ******************************************************************************/
 
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
 template <>
-struct CUB_NS_QUALIFIER::FpLimits<half_t>
+struct __is_extended_floating_point<half_t> : true_type
+{};
+
+#ifndef _CCCL_NO_VARIABLE_TEMPLATES
+template <>
+_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<half_t> = true;
+#endif // _CCCL_NO_VARIABLE_TEMPLATES
+
+template <>
+class __numeric_limits_impl<half_t, __numeric_limits_type::__floating_point>
 {
-  static __host__ __device__ __forceinline__ half_t Max()
+public:
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t max()
+  {
+    return half_t(numeric_limits<__half>::max());
+  }
+
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t min()
   {
-    return (half_t::max)();
+    return half_t(numeric_limits<__half>::min());
   }
 
-  static __host__ __device__ __forceinline__ half_t Lowest()
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE half_t lowest()
   {
-    return half_t::lowest();
+    return half_t(numeric_limits<__half>::lowest());
   }
 };
+_LIBCUDACXX_END_NAMESPACE_STD
 
 template <>
 struct CUB_NS_QUALIFIER::NumericTraits<half_t>

@@ -890,116 +890,20 @@ struct BaseTraits<SIGNED_INTEGER, true, false, _UnsignedBits, T>
   }
 };
 
-template <typename _T>
-struct FpLimits;
-
-template <>
-struct FpLimits<float>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Max()
-  {
-    return ::cuda::std::numeric_limits<float>::max();
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE float Lowest()
-  {
-    return ::cuda::std::numeric_limits<float>::lowest();
-  }
-};
-
-template <>
-struct FpLimits<double>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Max()
-  {
-    return ::cuda::std::numeric_limits<double>::max();
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE double Lowest()
-  {
-    return ::cuda::std::numeric_limits<double>::lowest();
-  }
-};
-
-#  if defined(_CCCL_HAS_NVFP16)
-template <>
-struct FpLimits<__half>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Max()
-  {
-    unsigned short max_word = 0x7BFF;
-    return reinterpret_cast<__half&>(max_word);
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __half Lowest()
-  {
-    unsigned short lowest_word = 0xFBFF;
-    return reinterpret_cast<__half&>(lowest_word);
-  }
-};
-#  endif // _CCCL_HAS_NVFP16
-
-#  if defined(_CCCL_HAS_NVBF16)
-template <>
-struct FpLimits<__nv_bfloat16>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Max()
-  {
-    unsigned short max_word = 0x7F7F;
-    return reinterpret_cast<__nv_bfloat16&>(max_word);
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_bfloat16 Lowest()
-  {
-    unsigned short lowest_word = 0xFF7F;
-    return reinterpret_cast<__nv_bfloat16&>(lowest_word);
-  }
-};
-#  endif // _CCCL_HAS_NVBF16
-
-#  if defined(__CUDA_FP8_TYPES_EXIST__)
-template <>
-struct FpLimits<__nv_fp8_e4m3>
-{
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Max()
-  {
-    unsigned char max_word = 0x7EU;
-    __nv_fp8_e4m3 ret_val;
-    memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e4m3));
-    return ret_val;
-  }
-
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e4m3 Lowest()
-  {
-    unsigned char lowest_word = 0xFEU;
-    __nv_fp8_e4m3 ret_val;
-    memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e4m3));
-    return ret_val;
-  }
-};
-
-template <>
-struct FpLimits<__nv_fp8_e5m2>
+template <typename T>
+struct CCCL_DEPRECATED_BECAUSE("Use cuda::std::numeric_limits instead") FpLimits
 {
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Max()
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max()
   {
-    unsigned char max_word = 0x7BU;
-    __nv_fp8_e5m2 ret_val;
-    memcpy(&ret_val, &max_word, sizeof(__nv_fp8_e5m2));
-    return ret_val;
+    return ::cuda::std::numeric_limits<T>::max();
   }
 
-  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE __nv_fp8_e5m2 Lowest()
+  static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest()
   {
-    unsigned char lowest_word = 0xFBU;
-    __nv_fp8_e5m2 ret_val;
-    memcpy(&ret_val, &lowest_word, sizeof(__nv_fp8_e5m2));
-    return ret_val;
+    return ::cuda::std::numeric_limits<T>::lowest();
   }
 };
 
-#  endif // __CUDA_FP8_TYPES_EXIST__
-
 /**
  * Basic type traits (fp primitive specialization)
  */
@@ -1029,12 +933,16 @@ struct BaseTraits<FLOATING_POINT, true, false, _UnsignedBits, T>
 
   static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Max()
   {
+    _CCCL_SUPPRESS_DEPRECATED_PUSH
     return FpLimits<T>::Max();
+    _CCCL_SUPPRESS_DEPRECATED_POP
   }
 
   static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T Lowest()
   {
+    _CCCL_SUPPRESS_DEPRECATED_PUSH
     return FpLimits<T>::Lowest();
+    _CCCL_SUPPRESS_DEPRECATED_POP
   }
 };
 

@@ -32,6 +32,7 @@
 #include <cuda/std/type_traits>
 
 #include <c2h/catch2_test_helper.h>
+#include <c2h/extended_types.h>
 
 C2H_TEST("Tests non_void_value_t", "[util][type]")
 {
@@ -80,3 +81,44 @@ C2H_TEST("Test CUB_DEFINE_DETECT_NESTED_TYPE", "[util][type]")
   STATIC_REQUIRE(cat_detect<HasCat>::value);
   STATIC_REQUIRE(!cat_detect<HasDog>::value);
 }
+
+using types = c2h::type_list<
+  char,
+  signed char,
+  unsigned char,
+  short,
+  unsigned short,
+  int,
+  unsigned int,
+  long,
+  unsigned long,
+  long long,
+  unsigned long long,
+#if TEST_HALF_T()
+  __half,
+  half_t,
+#endif // TEST_HALF_T()
+#if TEST_BF_T()
+  __nv_bfloat16,
+  bfloat16_t,
+#endif // TEST_BF_T()
+  float,
+  double
+#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE
+  ,
+  long double
+#endif // _LIBCUDACXX_HAS_NO_LONG_DOUBLE
+  >;
+
+C2H_TEST("Test FpLimits agrees with numeric_limits", "[util][type]", types)
+{
+  using T = c2h::get<0, TestType>;
+  CAPTURE(c2h::type_name<T>());
+  _CCCL_SUPPRESS_DEPRECATED_PUSH
+  CHECK(cub::FpLimits<T>::Max() == cuda::std::numeric_limits<T>::max());
+  CHECK(cub::FpLimits<T>::Lowest() == cuda::std::numeric_limits<T>::lowest());
+
+  CHECK(cub::FpLimits<const T>::Max() == cuda::std::numeric_limits<const T>::max());
+  CHECK(cub::FpLimits<const T>::Lowest() == cuda::std::numeric_limits<const T>::lowest());
+  _CCCL_SUPPRESS_DEPRECATED_POP
+}