Skip to content

Commit

Permalink
Add logical type FLOAT16
Browse files Browse the repository at this point in the history
  • Loading branch information
jiashenz authored and zhangjiashen committed Sep 17, 2023
1 parent 9b5a962 commit 45a1ae2
Show file tree
Hide file tree
Showing 12 changed files with 486 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ protected LogicalTypeAnnotation fromString(List<String> params) {
protected LogicalTypeAnnotation fromString(List<String> params) {
return IntervalLogicalTypeAnnotation.getInstance();
}
},
FLOAT16 {
@Override
protected LogicalTypeAnnotation fromString(List<String> params) {
return float16Type();
}
};

protected abstract LogicalTypeAnnotation fromString(List<String> params);
Expand Down Expand Up @@ -296,6 +302,10 @@ public static UUIDLogicalTypeAnnotation uuidType() {
return UUIDLogicalTypeAnnotation.INSTANCE;
}

public static Float16LogicalTypeAnnotation float16Type() {
return Float16LogicalTypeAnnotation.INSTANCE;
}

public static class StringLogicalTypeAnnotation extends LogicalTypeAnnotation {
private static final StringLogicalTypeAnnotation INSTANCE = new StringLogicalTypeAnnotation();

Expand Down Expand Up @@ -901,6 +911,36 @@ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
}
}

public static class Float16LogicalTypeAnnotation extends LogicalTypeAnnotation {
private static final Float16LogicalTypeAnnotation INSTANCE = new Float16LogicalTypeAnnotation();
public static final int BYTES = 2;

private Float16LogicalTypeAnnotation() {
}

@Override
@InterfaceAudience.Private
public OriginalType toOriginalType() {
// No OriginalType for Float16
return null;
}

@Override
public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T> logicalTypeAnnotationVisitor) {
return logicalTypeAnnotationVisitor.visit(this);
}

@Override
LogicalTypeToken getType() {
return LogicalTypeToken.FLOAT16;
}

@Override
PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
return PrimitiveStringifier.FLOAT16_STRINGIFIER;
}
}

// This logical type annotation is implemented to support backward compatibility with ConvertedType.
// The new logical type representation in parquet-format doesn't have any interval type,
// thus this annotation is mapped to UNKNOWN.
Expand Down Expand Up @@ -1060,5 +1100,9 @@ default Optional<T> visit(IntervalLogicalTypeAnnotation intervalLogicalType) {
default Optional<T> visit(MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
return empty();
}

default Optional<T> visit(Float16LogicalTypeAnnotation float16LogicalType) {
return empty();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@

import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Comparator;

import static org.apache.parquet.util.Float16.toFloat;

/**
* {@link Comparator} implementation that also supports the comparison of the related primitive type to avoid the
* performance penalty of boxing/unboxing. The {@code compare} methods for the not supported primitive types throw
Expand Down Expand Up @@ -276,4 +279,24 @@ public String toString() {
return "BINARY_AS_SIGNED_INTEGER_COMPARATOR";
}
};

/**
* This comparator is for comparing two float16 values represented in 2 bytes binary.
*/
static final PrimitiveComparator<Binary> BINARY_AS_FLOAT16_COMPARATOR = new BinaryComparator() {

@Override
int compareBinary(Binary b1, Binary b2)
{
ByteBuffer buffer1 = b1.toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
ByteBuffer buffer2 = b2.toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
return Float.compare(toFloat(buffer1.getShort(buffer1.position())),
toFloat(buffer2.getShort(buffer2.position())));
}

@Override
public String toString() {
return "BINARY_AS_FLOAT16_COMPARATOR";
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.apache.parquet.util.Float16.toFloat;

import java.math.BigDecimal;
import java.math.BigInteger;
Expand Down Expand Up @@ -448,4 +449,16 @@ private void appendHex(byte[] array, int offset, int length, StringBuilder build
}
}
};

static final PrimitiveStringifier FLOAT16_STRINGIFIER = new BinaryStringifierBase("FLOAT16_STRINGIFIER") {

@Override
String stringifyNotNull(Binary value) {
if (value.length() != 2) {
return BINARY_INVALID;
}
ByteBuffer buffer = value.toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
return DEFAULT_STRINGIFIER.stringify(toFloat(buffer.getShort(buffer.position())));
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.JsonLogicalType
public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
}

@Override
public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.Float16LogicalTypeAnnotation float16LogicalType) {
return of(PrimitiveComparator.BINARY_AS_FLOAT16_COMPARATOR);
}
}).orElseThrow(() -> new ShouldNeverHappenException("No comparator logic implemented for BINARY logical type: " + logicalType));
}
},
Expand Down Expand Up @@ -564,6 +569,14 @@ public PrimitiveType withId(int id) {
columnOrder);
}

/**
* @param logicalType LogicalTypeAnnotation
* @return a new PrimitiveType with the same fields and a new id null
*/
public PrimitiveType withLogicalTypeAnnotation(LogicalTypeAnnotation logicalType) {
return new PrimitiveType(getRepetition(), primitive, length, getName(), logicalType, getId());
}

/**
* @return the primitive type
*/
Expand Down
12 changes: 12 additions & 0 deletions parquet-column/src/main/java/org/apache/parquet/schema/Types.java
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,11 @@ public Optional<Boolean> visit(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation u
return checkFixedPrimitiveType(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES, uuidLogicalType);
}

@Override
public Optional<Boolean> visit(LogicalTypeAnnotation.Float16LogicalTypeAnnotation float16LogicalType) {
return checkFloat16BinaryPrimitiveType(LogicalTypeAnnotation.Float16LogicalTypeAnnotation.BYTES, float16LogicalType);
}

@Override
public Optional<Boolean> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
Preconditions.checkState(
Expand Down Expand Up @@ -566,6 +571,13 @@ private Optional<Boolean> checkBinaryPrimitiveType(LogicalTypeAnnotation logical
return Optional.of(true);
}

private Optional<Boolean> checkFloat16BinaryPrimitiveType(int l, LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(
primitiveType == PrimitiveTypeName.BINARY && length == l,
"%s can only annotate BINARY(%s bytes)", logicalTypeAnnotation, l);
return Optional.of(true);
}

private Optional<Boolean> checkInt32PrimitiveType(LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(primitiveType == PrimitiveTypeName.INT32,
"%s can only annotate INT32", logicalTypeAnnotation);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,42 @@ public void testDecimalStringifier() {
checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE, Binary.class);
}

@Test
public void testFloat16Stringifier() {
PrimitiveStringifier stringifier = PrimitiveStringifier.FLOAT16_STRINGIFIER;

// Zeroes, NaN and infinities
assertEquals("0.0", stringifier.stringify(toBinary(0x00, 0x00)));
assertEquals("-0.0", stringifier.stringify(toBinary(0x00, 0x80)));
assertEquals(Float.toString(Float.NaN), stringifier.stringify(toBinary(0x00, 0x7e)));
assertEquals(Float.toString(Float.POSITIVE_INFINITY), stringifier.stringify(toBinary(0x00, 0x7c)));
assertEquals(Float.toString(Float.NEGATIVE_INFINITY), stringifier.stringify(toBinary(0x00, 0xfc)));

// Known values
assertEquals("1.0009766", stringifier.stringify(toBinary(0x01, 0x3c)));
assertEquals("-2.0", stringifier.stringify(toBinary(0x00, 0xc0)));
assertEquals("6.1035156E-5", stringifier.stringify(toBinary(0x00, 0x04)));
assertEquals("65504.0", stringifier.stringify(toBinary(0xff, 0x7b)));
assertEquals("0.33325195", stringifier.stringify(toBinary(0x55, 0x35)));

// Subnormals
assertEquals("6.097555E-5", stringifier.stringify(toBinary(0xff, 0x03)));
assertEquals("5.9604645E-8", stringifier.stringify(toBinary(0x01, 0x00)));
assertEquals("-6.097555E-5", stringifier.stringify(toBinary(0xff, 0x83)));
assertEquals("-5.9604645E-8", stringifier.stringify(toBinary(0x01, 0x80)));

// Floats with absolute value above +/-65519 are rounded to +/-inf
// when using round-to-even
assertEquals("65504.0", stringifier.stringify(toBinary(0xff, 0x7b)));

// Check if numbers are rounded to nearest even when they
// cannot be accurately represented by Half
assertEquals("2048.0", stringifier.stringify(toBinary(0x00, 0x68)));
assertEquals("4096.0", stringifier.stringify(toBinary(0x00, 0x6c)));

checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE, Binary.class);
}

@Test
public void testUUIDStringifier() {
PrimitiveStringifier stringifier = PrimitiveStringifier.UUID_STRINGIFIER;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.float16Type;
import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.jsonType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
Expand Down Expand Up @@ -205,10 +206,20 @@ public void testBinaryAnnotations() {
}
}

@Test
public void testBinaryFloat16Annotations() {
LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {float16Type()};
for (final LogicalTypeAnnotation logicalType : types) {
PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, 2,"col", logicalType, null);
PrimitiveType string = Types.required(BINARY).as(logicalType).length(2).named("col");
Assert.assertEquals(expected, string);
}
}

@Test
public void testBinaryAnnotationsRejectsNonBinary() {
LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
stringType(), jsonType(), bsonType()};
stringType(), jsonType(), bsonType(), float16Type()};
for (final LogicalTypeAnnotation logicalType : types) {
PrimitiveTypeName[] nonBinary = new PrimitiveTypeName[]{
BOOLEAN, INT32, INT64, INT96, DOUBLE, FLOAT
Expand Down Expand Up @@ -403,6 +414,18 @@ public void testUUIDLogicalType() {
() -> Types.required(BINARY).as(uuidType()).named("uuid_field").toString());
}

@Test
public void testFloat16LogicalType() {
assertEquals(
"required binary float16_field (FLOAT16)",
Types.required(BINARY).length(2).as(float16Type()).named("float16_field").toString());

assertThrows("Should fail with invalid length", IllegalStateException.class,
() -> Types.required(FIXED_LEN_BYTE_ARRAY).length(10).as(float16Type()).named("float16_field").toString());
assertThrows("Should fail with invalid type", IllegalStateException.class,
() -> Types.required(BINARY).as(float16Type()).named("float16_field").toString());
}

/**
* A convenience method to avoid a large number of @Test(expected=...) tests
* @param message A String message to describe this assertion
Expand Down
Loading

0 comments on commit 45a1ae2

Please sign in to comment.