From 18a6fa5e9de9409c694e9fc202e8e7470f13dce8 Mon Sep 17 00:00:00 2001 From: Christopher Jones Date: Fri, 27 Aug 2021 13:48:09 -0500 Subject: [PATCH] Removed undefined use of union in libminifloat Testing showed use of memcpy produced identical assembly code. --- DataFormats/Math/interface/libminifloat.h | 115 ++++++++-------------- 1 file changed, 40 insertions(+), 75 deletions(-) diff --git a/DataFormats/Math/interface/libminifloat.h b/DataFormats/Math/interface/libminifloat.h index e811b89bca4e7..c4db7de47d3af 100644 --- a/DataFormats/Math/interface/libminifloat.h +++ b/DataFormats/Math/interface/libminifloat.h @@ -4,68 +4,49 @@ #include #include #include +#include // ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf class MiniFloatConverter { public: MiniFloatConverter(); inline static float float16to32(uint16_t h) { - union { - float flt; - uint32_t i32; - } conv; - conv.i32 = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10]; - return conv.flt; + uint32_t i32 = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10]; + return bit_cast(i32); } inline static uint16_t float32to16(float x) { return float32to16round(x); } /// Fast implementation, but it crops the number so it biases low inline static uint16_t float32to16crop(float x) { - union { - float flt; - uint32_t i32; - } conv; - conv.flt = x; - return basetable[(conv.i32 >> 23) & 0x1ff] + ((conv.i32 & 0x007fffff) >> shifttable[(conv.i32 >> 23) & 0x1ff]); + uint32_t i32 = bit_cast(x); + return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]); } /// Slower implementation, but it rounds to avoid biases inline static uint16_t float32to16round(float x) { - union { - float flt; - uint32_t i32; - } conv; - conv.flt = x; - uint8_t shift = shifttable[(conv.i32 >> 23) & 0x1ff]; + uint32_t i32 = bit_cast(x); + uint8_t shift = shifttable[(i32 >> 23) & 0x1ff]; if (shift == 13) { - uint16_t base2 = (conv.i32 & 0x007fffff) >> 12; + uint16_t base2 = (i32 & 0x007fffff) >> 12; uint16_t base = base2 >> 1; if (((base2 & 1) != 0) && (base < 1023)) base++; - return basetable[(conv.i32 >> 23) & 0x1ff] + base; + return basetable[(i32 >> 23) & 0x1ff] + base; } else { - return basetable[(conv.i32 >> 23) & 0x1ff] + ((conv.i32 & 0x007fffff) >> shifttable[(conv.i32 >> 23) & 0x1ff]); + return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]); } } template inline static float reduceMantissaToNbits(const float &f) { static_assert(bits <= 23, "max mantissa size is 23 bits"); constexpr uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits); - union { - float flt; - uint32_t i32; - } conv; - conv.flt = f; - conv.i32 &= mask; - return conv.flt; + uint32_t i32 = bit_cast(f); + i32 &= mask; + return bit_cast(i32); } inline static float reduceMantissaToNbits(const float &f, int bits) { uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits); - union { - float flt; - uint32_t i32; - } conv; - conv.flt = f; - conv.i32 &= mask; - return conv.flt; + uint32_t i32 = bit_cast(f); + i32 &= mask; + return bit_cast(i32); } class ReduceMantissaToNbitsRounding { @@ -77,20 +58,16 @@ class MiniFloatConverter { float operator()(float f) const { constexpr uint32_t low23 = (0x007FFFFF); // mask to keep lowest 23 bits = mantissa constexpr uint32_t hi9 = (0xFF800000); // mask to keep highest 9 bits = the rest - union { - float flt; - uint32_t i32; - } conv; - conv.flt = f; - if (conv.i32 & test) { // need to round - uint32_t mantissa = (conv.i32 & low23) >> shift; + uint32_t i32 = bit_cast(f); + if (i32 & test) { // need to round + uint32_t mantissa = (i32 & low23) >> shift; if (mantissa < maxn) mantissa++; - conv.i32 = (conv.i32 & hi9) | (mantissa << shift); + i32 = (i32 & hi9) | (mantissa << shift); } else { - conv.i32 &= mask; + i32 &= mask; } - return conv.flt; + return bit_cast(i32); } private: @@ -114,54 +91,34 @@ class MiniFloatConverter { } inline static float max() { - union { - float flt; - uint32_t i32; - } conv; - conv.i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e] - return conv.flt; + constexpr uint32_t i32 = 0x477fe000; // = mantissatable[offsettable[0x1e]+0x3ff]+exponenttable[0x1e] + return bit_cast(i32); } // Maximum float32 value that gets rounded to max() inline static float max32RoundedToMax16() { - union { - float flt; - uint32_t i32; - } conv; // 2^16 in float32 is the first to result inf in float16, so // 2^16-1 is the last float32 to result max() in float16 - conv.i32 = (0x8f << 23) - 1; - return conv.flt; + constexpr uint32_t i32 = (0x8f << 23) - 1; + return bit_cast(i32); } inline static float min() { - union { - float flt; - uint32_t i32; - } conv; - conv.i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1] - return conv.flt; + constexpr uint32_t i32 = 0x38800000; // = mantissatable[offsettable[1]+0]+exponenttable[1] + return bit_cast(i32); } // Minimum float32 value that gets rounded to min() inline static float min32RoundedToMin16() { - union { - float flt; - uint32_t i32; - } conv; // 2^-14-1 in float32 is the first to result denormalized in float16, so // 2^-14 is the first float32 to result min() in float16 - conv.i32 = (0x71 << 23); - return conv.flt; + constexpr uint32_t i32 = (0x71 << 23); + return bit_cast(i32); } inline static float denorm_min() { - union { - float flt; - uint32_t i32; - } conv; - conv.i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0] - return conv.flt; + constexpr uint32_t i32 = 0x33800000; // mantissatable[offsettable[0]+1]+exponenttable[0] + return bit_cast(i32); } inline static bool isdenorm(uint16_t h) { @@ -170,6 +127,14 @@ class MiniFloatConverter { } private: + //in C++20 we can use std::bit_cast which is constexpr + template + inline static To bit_cast(const From &src) noexcept { + static_assert(sizeof(To) == sizeof(From), "incompatible types"); + To dst; + std::memcpy(&dst, &src, sizeof(To)); + return dst; + } CMS_THREAD_SAFE static uint32_t mantissatable[2048]; CMS_THREAD_SAFE static uint32_t exponenttable[64]; CMS_THREAD_SAFE static uint16_t offsettable[64];