diff --git a/paddle/fluid/platform/bfloat16_test.cu b/paddle/fluid/platform/bfloat16_test.cu index 1e1919bfca059..cec83cbd11fe9 100644 --- a/paddle/fluid/platform/bfloat16_test.cu +++ b/paddle/fluid/platform/bfloat16_test.cu @@ -39,7 +39,7 @@ TEST(bfloat16, convert_float32_to_bfloat16_on_gpu) { TEST(bfloat16, assignment_operator_on_gpu) { // Assignment operator bfloat16 v_assign; - v_assign = nv_bfloat16(bfloat16(1.0f)); + v_assign = bfloat16(1.0f).to_nv_bfloat16(); EXPECT_EQ(v_assign.x, 0x3f80); v_assign = 0.33333; EXPECT_EQ(v_assign.x, 0x3eab); diff --git a/paddle/phi/backends/gpu/cuda/cuda_device_function.h b/paddle/phi/backends/gpu/cuda/cuda_device_function.h index 10aee53c45cf9..4ff2e528a91cd 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_device_function.h +++ b/paddle/phi/backends/gpu/cuda/cuda_device_function.h @@ -67,10 +67,8 @@ template <> __forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleDownSync( unsigned mask, phi::dtype::bfloat16 val, int delta, int width) { #if defined(PADDLE_CUDA_BF16) - return phi::dtype::bfloat16(__shfl_down_sync(mask, - static_cast(val), - static_cast(delta), - width)); + return phi::dtype::bfloat16(__shfl_down_sync( + mask, val.to_nv_bfloat16(), static_cast(delta), width)); #else PADDLE_ENFORCE( false, "__shfl_down_sync with bfloat16 is not supported on cuda <= 11."); @@ -114,7 +112,7 @@ __forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleXorSync( unsigned mask, phi::dtype::bfloat16 val, int width) { #if defined(PADDLE_CUDA_BF16) return phi::dtype::bfloat16( - __shfl_xor_sync(mask, static_cast(val), width)); + __shfl_xor_sync(mask, val.to_nv_bfloat16(), width)); #else PADDLE_ENFORCE( false, "__shfl_xor_sync with bfloat16 is not supported on cuda <= 11."); diff --git a/paddle/phi/common/bfloat16.h b/paddle/phi/common/bfloat16.h index 6a11f0c0714c2..37e4b55fbbc07 100644 --- a/paddle/phi/common/bfloat16.h +++ b/paddle/phi/common/bfloat16.h @@ -145,7 +145,7 @@ struct PADDLE_ALIGN(2) bfloat16 { } // Conversion opertors - HOSTDEVICE inline explicit operator float() const { + HOSTDEVICE inline operator float() const { #ifdef PADDLE_WITH_HIP uint32_t res = 0; // We should be using memcpy in order to respect the strict aliasing rule @@ -168,7 +168,7 @@ struct PADDLE_ALIGN(2) bfloat16 { } #ifdef PADDLE_CUDA_BF16 - HOSTDEVICE inline explicit operator __nv_bfloat16() const { + HOSTDEVICE inline __nv_bfloat16 to_nv_bfloat16() const { return *reinterpret_cast(&x); } #endif @@ -207,7 +207,7 @@ struct PADDLE_ALIGN(2) bfloat16 { return static_cast(static_cast(*this)); } - HOSTDEVICE inline explicit operator double() const { + HOSTDEVICE inline operator double() const { return static_cast(static_cast(*this)); } };