Skip to content

Commit

Permalink
feat: support for user-defined type literals (#232)
Browse files Browse the repository at this point in the history
substrait-java AND isthmus can now handle user-defined type literals
  • Loading branch information
cheikhachraf authored Feb 17, 2024
1 parent 2a98e3c commit ca8187f
Show file tree
Hide file tree
Showing 10 changed files with 146 additions and 4 deletions.
21 changes: 21 additions & 0 deletions core/src/main/java/io/substrait/expression/Expression.java
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,27 @@ public <R, E extends Throwable> R accept(ExpressionVisitor<R, E> visitor) throws
}
}

@Value.Immutable
abstract static class UserDefinedLiteral implements Literal {
public abstract ByteString value();

public abstract String uri();

public abstract String name();

public Type getType() {
return Type.withNullability(nullable()).userDefined(uri(), name());
}

public static ImmutableExpression.UserDefinedLiteral.Builder builder() {
return ImmutableExpression.UserDefinedLiteral.builder();
}

public <R, E extends Throwable> R accept(ExpressionVisitor<R, E> visitor) throws E {
return visitor.visit(this);
}
}

@Value.Immutable
abstract static class Switch implements Expression {
public abstract Expression match();
Expand Down
11 changes: 11 additions & 0 deletions core/src/main/java/io/substrait/expression/ExpressionCreator.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.substrait.expression;

import com.google.protobuf.Any;
import com.google.protobuf.ByteString;
import io.substrait.extension.SimpleExtension;
import io.substrait.relation.ConsistentPartitionWindow;
Expand Down Expand Up @@ -217,6 +218,16 @@ public static Expression.StructLiteral struct(
return Expression.StructLiteral.builder().nullable(nullable).addAllFields(values).build();
}

public static Expression.UserDefinedLiteral userDefinedLiteral(
boolean nullable, String uri, String name, Any value) {
return Expression.UserDefinedLiteral.builder()
.nullable(nullable)
.uri(uri)
.name(name)
.value(value.toByteString())
.build();
}

public static Expression.Switch switchStatement(
Expression match, Expression defaultExpression, Expression.SwitchClause... conditionClauses) {
return Expression.Switch.builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public interface ExpressionVisitor<R, E extends Throwable> {

R visit(Expression.StructLiteral expr) throws E;

R visit(Expression.UserDefinedLiteral expr) throws E;

R visit(Expression.Switch expr) throws E;

R visit(Expression.IfThen expr) throws E;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package io.substrait.expression.proto;

import com.google.protobuf.Any;
import com.google.protobuf.InvalidProtocolBufferException;
import io.substrait.expression.ExpressionVisitor;
import io.substrait.expression.FieldReference;
import io.substrait.expression.FunctionArg;
import io.substrait.expression.WindowBound;
import io.substrait.extension.ExtensionCollector;
import io.substrait.extension.SimpleExtension;
import io.substrait.proto.Expression;
import io.substrait.proto.FunctionArgument;
import io.substrait.proto.Rel;
Expand Down Expand Up @@ -234,6 +237,25 @@ public Expression visit(io.substrait.expression.Expression.StructLiteral expr) {
});
}

@Override
public Expression visit(io.substrait.expression.Expression.UserDefinedLiteral expr) {
var typeReference =
extensionCollector.getTypeReference(SimpleExtension.TypeAnchor.of(expr.uri(), expr.name()));
return lit(
bldr -> {
try {
bldr.setNullable(expr.nullable())
.setUserDefined(
Expression.Literal.UserDefined.newBuilder()
.setTypeReference(typeReference)
.setValue(Any.parseFrom(expr.value())))
.build();
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException(e);
}
});
}

private Expression.Literal toLiteral(io.substrait.expression.Expression expression) {
var e = expression.accept(this);
assert e.getRexTypeCase() == Expression.RexTypeCase.LITERAL;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,12 @@ public Expression.Literal from(io.substrait.proto.Expression.Literal literal) {
var listType = protoTypeConverter.fromList(literal.getEmptyList());
yield ExpressionCreator.emptyList(listType.nullable(), listType.elementType());
}
case USER_DEFINED -> {
var userDefinedLiteral = literal.getUserDefined();
var type = lookup.getType(userDefinedLiteral.getTypeReference(), extensions);
yield ExpressionCreator.userDefinedLiteral(
literal.getNullable(), type.uri(), type.name(), userDefinedLiteral.getValue());
}
default -> throw new IllegalStateException(
"Unexpected value: " + literal.getLiteralTypeCase());
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ public Optional<Expression> visit(Expression.StructLiteral expr) throws EXCEPTIO
return visitLiteral(expr);
}

@Override
public Optional<Expression> visit(Expression.UserDefinedLiteral expr) throws EXCEPTION {
return visitLiteral(expr);
}

@Override
public Optional<Expression> visit(Expression.Switch expr) throws EXCEPTION {
var match = expr.match().accept(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import io.substrait.expression.ExpressionCreator;
import io.substrait.expression.ImmutableExpression;
import io.substrait.isthmus.*;
import io.substrait.type.Type;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
Expand Down Expand Up @@ -32,6 +33,41 @@ public class CallConverters {
visitor.apply(call.getOperands().get(0)));
};

/**
* {@link SqlKind#REINTERPRET} is utilized by Isthmus to represent and store {@link
* Expression.UserDefinedLiteral}s within Calcite.
*
* <p>When converting from Substrait to Calcite, the {@link Expression.UserDefinedLiteral#value()}
* is stored within a {@link org.apache.calcite.sql.type.SqlTypeName#BINARY} {@link
* org.apache.calcite.rex.RexLiteral} and then re-interpreted to have the correct type.
*
* <p>See {@link ExpressionRexConverter#visit(Expression.UserDefinedLiteral)} for this conversion.
*
* <p>When converting from Calcite to Substrait, this call converter extracts the {@link
* Expression.UserDefinedLiteral} that was stored.
*/
public static Function<TypeConverter, SimpleCallConverter> REINTERPRET =
typeConverter ->
(call, visitor) -> {
if (call.getKind() != SqlKind.REINTERPRET) {
return null;
}
var operand = visitor.apply(call.getOperands().get(0));
var type = typeConverter.toSubstrait(call.getType());

// For now, we only support handling of SqlKind.REINTEPRETET for the case of stored
// user-defined literals
if (operand instanceof Expression.FixedBinaryLiteral literal
&& type instanceof Type.UserDefined t) {
return Expression.UserDefinedLiteral.builder()
.uri(t.uri())
.name(t.name())
.value(literal.value())
.build();
}
return null;
};

// public static SimpleCallConverter OrAnd(FunctionConverter c) {
// return (call, visitor) -> {
// if (call.getKind() != SqlKind.AND && call.getKind() != SqlKind.OR) {
Expand Down Expand Up @@ -93,6 +129,7 @@ public static List<CallConverter> defaults(TypeConverter typeConverter) {
new FieldSelectionConverter(typeConverter),
CallConverters.CASE,
CallConverters.CAST.apply(typeConverter),
CallConverters.REINTERPRET.apply(typeConverter),
new LiteralConstructorConverter(typeConverter));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ public RexNode visit(Expression.NullLiteral expr) throws RuntimeException {
return rexBuilder.makeLiteral(null, typeConverter.toCalcite(typeFactory, expr.getType()));
}

@Override
public RexNode visit(Expression.UserDefinedLiteral expr) throws RuntimeException {
var binaryLiteral = rexBuilder.makeBinaryLiteral(new ByteString(expr.value().toByteArray()));
return rexBuilder.makeReinterpretCast(
typeConverter.toCalcite(typeFactory, expr.getType()), binaryLiteral, null);
}

@Override
public RexNode visit(Expression.BoolLiteral expr) throws RuntimeException {
return rexBuilder.makeLiteral(expr.value());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.List;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.util.*;
Expand Down Expand Up @@ -186,14 +187,12 @@ public Expression.Literal convert(RexLiteral literal) {

case ROW -> {
List<RexLiteral> literals = (List<RexLiteral>) literal.getValue();
yield struct(
n, literals.stream().map(this::convert).collect(java.util.stream.Collectors.toList()));
yield struct(n, literals.stream().map(this::convert).collect(Collectors.toList()));
}

case ARRAY -> {
List<RexLiteral> literals = (List<RexLiteral>) literal.getValue();
yield list(
n, literals.stream().map(this::convert).collect(java.util.stream.Collectors.toList()));
yield list(n, literals.stream().map(this::convert).collect(Collectors.toList()));
}

default -> throw new UnsupportedOperationException(
Expand Down
32 changes: 32 additions & 0 deletions isthmus/src/test/java/io/substrait/isthmus/CustomFunctionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@

import static org.junit.jupiter.api.Assertions.assertEquals;

import com.google.protobuf.Any;
import io.substrait.dsl.SubstraitBuilder;
import io.substrait.expression.ExpressionCreator;
import io.substrait.extension.ExtensionCollector;
import io.substrait.extension.SimpleExtension;
import io.substrait.isthmus.expression.AggregateFunctionConverter;
import io.substrait.isthmus.expression.FunctionMappings;
import io.substrait.isthmus.expression.ScalarFunctionConverter;
import io.substrait.isthmus.expression.WindowFunctionConverter;
import io.substrait.isthmus.utils.UserTypeFactory;
import io.substrait.proto.Expression;
import io.substrait.relation.ProtoRelConverter;
import io.substrait.relation.Rel;
import io.substrait.relation.RelProtoConverter;
import io.substrait.type.Type;
import io.substrait.type.TypeCreator;
import java.io.IOException;
Expand Down Expand Up @@ -232,4 +238,30 @@ void customTypesInFunctionsRoundtrip() {
var relReturned = calciteToSubstrait.apply(calciteRel);
assertEquals(rel, relReturned);
}

@Test
void customTypesLiteralInFunctionsRoundtrip() {
var bldr = Expression.Literal.newBuilder();
var anyValue = Any.pack(bldr.setI32(10).build());
var val = ExpressionCreator.userDefinedLiteral(false, NAMESPACE, "a_type", anyValue);

Rel rel1 =
b.project(
input ->
List.of(
b.scalarFn(
NAMESPACE, "to_b_type:u!a_type", R.userDefined(NAMESPACE, "b_type"), val)),
b.remap(1),
b.namedScan(
List.of("example"), List.of("a"), List.of(N.userDefined(NAMESPACE, "a_type"))));

RelNode calciteRel = substraitToCalcite.convert(rel1);
Rel rel2 = calciteToSubstrait.apply(calciteRel);
assertEquals(rel1, rel2);

var extensionCollector = new ExtensionCollector();
io.substrait.proto.Rel protoRel = new RelProtoConverter(extensionCollector).toProto(rel1);
Rel rel3 = new ProtoRelConverter(extensionCollector, extensionCollection).from(protoRel);
assertEquals(rel1, rel3);
}
}

0 comments on commit ca8187f

Please sign in to comment.