diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java index 203df627f4..f0fb0c0d9d 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java @@ -142,6 +142,9 @@ public Statistics build() { // Builder for FLOAT16 type to handle special cases of min/max values like NaN, -0.0, and 0.0 private static class Float16Builder extends Builder { + private final static Binary POSITIVE_ZERO_LITTLE_ENDIAN = Binary.fromConstantByteArray(new byte[] {0x00, 0x00}); + private final static Binary NEGATIVE_ZERO_LITTLE_ENDIAN = Binary.fromConstantByteArray(new byte[] {0x00, (byte) 0x80}); + public Float16Builder(PrimitiveType type) { super(type); assert type.getPrimitiveTypeName() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY; @@ -158,19 +161,15 @@ public Statistics build() { short max = bMax.get2BytesLittleEndian(); // Drop min/max values in case of NaN as the sorting order of values is undefined for this case if (Float16.isNaN(min) || Float16.isNaN(max)) { - bMin = Binary.fromConstantByteArray(new byte[] {0x00, 0x00}); - bMax = Binary.fromConstantByteArray(new byte[] {0x00, (byte) 0x80}); - stats.setMinMax(bMin, bMax); + stats.setMinMax(POSITIVE_ZERO_LITTLE_ENDIAN, NEGATIVE_ZERO_LITTLE_ENDIAN); ((Statistics) stats).hasNonNullValue = false; } else { // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 values would be skipped if (min == (short) 0x0000) { - bMin = Binary.fromConstantByteArray(new byte[] {0x00, (byte) 0x80}); - stats.setMinMax(bMin, bMax); + stats.setMinMax(NEGATIVE_ZERO_LITTLE_ENDIAN, bMax); } if (max == (short) 0x8000) { - bMax = Binary.fromConstantByteArray(new byte[] {0x00, 0x00}); - stats.setMinMax(bMin, bMax); + stats.setMinMax(bMin, POSITIVE_ZERO_LITTLE_ENDIAN); } } } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java b/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java index 784f1aa3d9..427f69d4ff 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java @@ -74,6 +74,65 @@ public class Float16 { private static final int FP32_DENORMAL_MAGIC = 126 << 23; private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); + /** + * Returns true if the specified half-precision float value represents + * a Not-a-Number, false otherwise. + * + * @param h A half-precision float value + * @return True if the value is a NaN, false otherwise + * + */ + public static boolean isNaN(short h) { + return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY; + } + + /** + *

Compares the two specified half-precision float values. The following + * conditions apply during the comparison:

+ * + * + * + * @param x The first half-precision float value to compare. + * @param y The second half-precision float value to compare + * + * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a + * value less than {@code 0} if {@code x} is numerically less than {@code y}, + * and a value greater than {@code 0} if {@code x} is numerically greater + * than {@code y} + * + */ + public static int compare(short x, short y) { + boolean xIsNaN = isNaN(x); + boolean yIsNaN = isNaN(y); + + if (!xIsNaN && !yIsNaN) { + int first = ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff); + int second = ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); + // Returns true if the first half-precision float value is less + // (smaller toward negative infinity) than the second half-precision float value. + if (first < second) { + return -1; + } + + // Returns true if the first half-precision float value is greater + // (larger toward positive infinity) than the second half-precision float value. + if (first > second) { + return 1; + } + } + + // Collapse NaNs, akin to halfToIntBits(), but we want to keep + // (signed) short value types to preserve the ordering of -0.0 + // and +0.0 + short xBits = xIsNaN ? NaN : x; + short yBits = yIsNaN ? NaN : y; + return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); + } + /** * Converts the specified half-precision float value in Binary little endian into a * single-precision float value. The following special cases are handled: @@ -86,7 +145,7 @@ public class Float16 { * @param b The half-precision float value in Binary little endian to convert to single-precision * @return A normalized single-precision float value */ - public static float toFloat(Binary b) { + static float toFloat(Binary b) { short h = b.get2BytesLittleEndian(); int bits = h & 0xffff; int s = bits & SIGN_MASK; @@ -135,7 +194,7 @@ public static float toFloat(Binary b) { * @param f The single-precision float value to convert to half-precision * @return A half-precision float value */ - public static short toFloat16(float f) { + static short toFloat16(float f) { int bits = Float.floatToRawIntBits(f); int s = (bits >>> FP32_SIGN_SHIFT ); int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK; @@ -185,65 +244,6 @@ public static short toFloat16(float f) { return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM); } - /** - * Returns true if the specified half-precision float value represents - * a Not-a-Number, false otherwise. - * - * @param h A half-precision float value - * @return True if the value is a NaN, false otherwise - * - */ - public static boolean isNaN(short h) { - return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY; - } - - /** - *

Compares the two specified half-precision float values. The following - * conditions apply during the comparison:

- * - * - * - * @param x The first half-precision float value to compare. - * @param y The second half-precision float value to compare - * - * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a - * value less than {@code 0} if {@code x} is numerically less than {@code y}, - * and a value greater than {@code 0} if {@code x} is numerically greater - * than {@code y} - * - */ - public static int compare(short x, short y) { - boolean xIsNaN = isNaN(x); - boolean yIsNaN = isNaN(y); - - if (!xIsNaN && !yIsNaN) { - int first = ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff); - int second = ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); - // Returns true if the first half-precision float value is less - // (smaller toward negative infinity) than the second half-precision float value. - if (first < second) { - return -1; - } - - // Returns true if the first half-precision float value is greater - // (larger toward positive infinity) than the second half-precision float value. - if (first > second) { - return 1; - } - } - - // Collapse NaNs, akin to halfToIntBits(), but we want to keep - // (signed) short value types to preserve the ordering of -0.0 - // and +0.0 - short xBits = xIsNaN ? NaN : x; - short yBits = yIsNaN ? NaN : y; - return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); - } - /** * Returns a string representation of the specified half-precision * float value. Calling this method is equivalent to calling @@ -253,7 +253,7 @@ public static int compare(short x, short y) { * @param h A half-precision float value in binary little-endian format * @return A string representation of the specified value */ - public static String toFloatString(Binary h) { + static String toFloatString(Binary h) { return Float.toString(Float16.toFloat(h)); } }