From e119dabf8eaa6ad2924ddec696623fb17d8dee24 Mon Sep 17 00:00:00 2001 From: Michael Nedokushev Date: Sun, 14 Jan 2024 18:50:47 +0000 Subject: [PATCH] Support for all primitive types --- .../parquet/core/filter/OperatorSupport.scala | 79 +++++++++- .../apache/parquet/core/filter/TypeTag.scala | 149 +++++++++++++++++- .../parquet/core/filter/TypeTagDeriver.scala | 37 ++++- .../apache/parquet/core/filter/ExprSpec.scala | 2 +- 4 files changed, 245 insertions(+), 22 deletions(-) diff --git a/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/OperatorSupport.scala b/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/OperatorSupport.scala index 595b093..12d727b 100644 --- a/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/OperatorSupport.scala +++ b/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/OperatorSupport.scala @@ -1,5 +1,25 @@ package me.mnedokushev.zio.apache.parquet.core.filter +import zio.{ Chunk, Duration } + +import java.time.{ + DayOfWeek, + Instant, + LocalDate, + LocalDateTime, + LocalTime, + Month, + MonthDay, + OffsetDateTime, + OffsetTime, + Period, + Year, + YearMonth, + ZoneId, + ZoneOffset, + ZonedDateTime +} +import java.util.UUID import scala.annotation.implicitNotFound sealed trait OperatorSupport[A] @@ -12,9 +32,28 @@ object OperatorSupport { } object LtGt { - implicit case object SByte extends LtGt[Byte] - implicit case object SShort extends LtGt[Short] - implicit case object SInt extends LtGt[Int] + implicit case object byte extends LtGt[Byte] + implicit case object short extends LtGt[Short] + implicit case object int extends LtGt[Int] + implicit case object long extends LtGt[Long] + implicit case object float extends LtGt[Float] + implicit case object double extends LtGt[Double] + implicit case object bigDecimal extends LtGt[java.math.BigDecimal] + implicit case object bigInteger extends LtGt[java.math.BigInteger] + implicit case object dayOfWeek extends LtGt[DayOfWeek] + implicit case object month extends LtGt[Month] + implicit case object monthDay extends LtGt[MonthDay] + implicit case object period extends LtGt[Period] + implicit case object year extends LtGt[Year] + implicit case object yearMonth extends LtGt[YearMonth] + implicit case object duration extends LtGt[Duration] + implicit case object instant extends LtGt[Instant] + implicit case object localDate extends LtGt[LocalDate] + implicit case object localTime extends LtGt[LocalTime] + implicit case object localDateTime extends LtGt[LocalDateTime] + implicit case object offsetTime extends LtGt[OffsetTime] + implicit case object offsetDateTime extends LtGt[OffsetDateTime] + implicit case object zonedDateTime extends LtGt[ZonedDateTime] } @implicitNotFound("You can't use this operator for the type ${A}") @@ -23,11 +62,35 @@ object OperatorSupport { } object EqNotEq { - implicit case object SString extends EqNotEq[String] - implicit case object SBoolean extends EqNotEq[Boolean] - implicit case object SByte extends EqNotEq[Byte] - implicit case object SShort extends EqNotEq[Short] - implicit case object SInt extends EqNotEq[Int] + implicit case object string extends EqNotEq[String] + implicit case object boolean extends EqNotEq[Boolean] + implicit case object byte extends EqNotEq[Byte] + implicit case object short extends EqNotEq[Short] + implicit case object int extends EqNotEq[Int] + implicit case object long extends EqNotEq[Long] + implicit case object float extends EqNotEq[Float] + implicit case object double extends EqNotEq[Double] + implicit case object binary extends EqNotEq[Chunk[Byte]] + implicit case object char extends EqNotEq[Char] + implicit case object uuid extends EqNotEq[UUID] + implicit case object bigDecimal extends EqNotEq[java.math.BigDecimal] + implicit case object bigInteger extends EqNotEq[java.math.BigInteger] + implicit case object dayOfWeek extends EqNotEq[DayOfWeek] + implicit case object month extends EqNotEq[Month] + implicit case object monthDay extends EqNotEq[MonthDay] + implicit case object period extends EqNotEq[Period] + implicit case object year extends EqNotEq[Year] + implicit case object yearMonth extends EqNotEq[YearMonth] + implicit case object zoneId extends EqNotEq[ZoneId] + implicit case object zoneOffset extends EqNotEq[ZoneOffset] + implicit case object duration extends EqNotEq[Duration] + implicit case object instant extends EqNotEq[Instant] + implicit case object localDate extends EqNotEq[LocalDate] + implicit case object localTime extends EqNotEq[LocalTime] + implicit case object localDateTime extends EqNotEq[LocalDateTime] + implicit case object offsetTime extends EqNotEq[OffsetTime] + implicit case object offsetDateTime extends EqNotEq[OffsetDateTime] + implicit case object zonedDateTime extends EqNotEq[ZonedDateTime] } } diff --git a/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTag.scala b/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTag.scala index a8c2014..66968aa 100644 --- a/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTag.scala +++ b/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTag.scala @@ -1,18 +1,39 @@ package me.mnedokushev.zio.apache.parquet.core.filter +import _root_.java.time.Instant import me.mnedokushev.zio.apache.parquet.core.Value import org.apache.parquet.filter2.predicate.FilterApi import org.apache.parquet.filter2.predicate.Operators.{ BinaryColumn, BooleanColumn, Column, + DoubleColumn, + FloatColumn, IntColumn, LongColumn, SupportsEqNotEq, SupportsLtGt } import org.apache.parquet.io.api.Binary +import zio.{ Chunk, Duration } +import java.time.{ + DayOfWeek, + LocalDate, + LocalDateTime, + LocalTime, + Month, + MonthDay, + OffsetDateTime, + OffsetTime, + Period, + Year, + YearMonth, + ZoneId, + ZoneOffset, + ZonedDateTime +} +import java.util.UUID import scala.jdk.CollectionConverters._ trait TypeTag[+A] @@ -93,35 +114,151 @@ object TypeTag { } - implicit val string: TypeTag.EqNotEq[String] = + implicit val string: TypeTag.EqNotEq[String] = eqnoteq[String, Binary, BinaryColumn]( FilterApi.binaryColumn, Value.string(_).value ) - implicit val boolean: TypeTag.EqNotEq[Boolean] = + implicit val boolean: TypeTag.EqNotEq[Boolean] = eqnoteq[Boolean, java.lang.Boolean, BooleanColumn]( FilterApi.booleanColumn, Value.boolean(_).value ) - implicit val byte: TypeTag.LtGt[Byte] = + implicit val byte: TypeTag.LtGt[Byte] = ltgt[Byte, java.lang.Integer, IntColumn]( FilterApi.intColumn, Value.byte(_).value ) - implicit val short: TypeTag.LtGt[Short] = + implicit val short: TypeTag.LtGt[Short] = ltgt[Short, java.lang.Integer, IntColumn]( FilterApi.intColumn, Value.short(_).value ) - implicit val int: TypeTag.LtGt[Int] = + implicit val int: TypeTag.LtGt[Int] = ltgt[Int, java.lang.Integer, IntColumn]( FilterApi.intColumn, Value.int(_).value ) - implicit val long: TypeTag.LtGt[Long] = + implicit val long: TypeTag.LtGt[Long] = ltgt[Long, java.lang.Long, LongColumn]( FilterApi.longColumn, Value.long(_).value ) + implicit val float: TypeTag.LtGt[Float] = + ltgt[Float, java.lang.Float, FloatColumn]( + FilterApi.floatColumn, + Value.float(_).value + ) + implicit val double: TypeTag.LtGt[Double] = + ltgt[Double, java.lang.Double, DoubleColumn]( + FilterApi.doubleColumn, + Value.double(_).value + ) + implicit val binary: TypeTag.EqNotEq[Chunk[Byte]] = + eqnoteq[Chunk[Byte], Binary, BinaryColumn]( + FilterApi.binaryColumn, + Value.binary(_).value + ) + implicit val char: TypeTag.EqNotEq[Char] = + eqnoteq[Char, java.lang.Integer, IntColumn]( + FilterApi.intColumn, + Value.char(_).value + ) + implicit val uuid: TypeTag.EqNotEq[UUID] = + eqnoteq[UUID, Binary, BinaryColumn]( + FilterApi.binaryColumn, + Value.uuid(_).value + ) + implicit val bigDecimal: TypeTag.LtGt[java.math.BigDecimal] = + ltgt[java.math.BigDecimal, java.lang.Long, LongColumn]( + FilterApi.longColumn, + Value.bigDecimal(_).value + ) + implicit val bigInteger: TypeTag.LtGt[java.math.BigInteger] = + ltgt[java.math.BigInteger, Binary, BinaryColumn]( + FilterApi.binaryColumn, + Value.bigInteger(_).value + ) + implicit val dayOfWeek: TypeTag.LtGt[DayOfWeek] = + ltgt[DayOfWeek, java.lang.Integer, IntColumn]( + FilterApi.intColumn, + Value.dayOfWeek(_).value + ) + implicit val month: TypeTag.LtGt[Month] = + ltgt[Month, java.lang.Integer, IntColumn]( + FilterApi.intColumn, + Value.month(_).value + ) + implicit val monthDay: TypeTag.LtGt[MonthDay] = + ltgt[MonthDay, Binary, BinaryColumn]( + FilterApi.binaryColumn, + Value.monthDay(_).value + ) + implicit val period: TypeTag.LtGt[Period] = + ltgt[Period, Binary, BinaryColumn]( + FilterApi.binaryColumn, + Value.period(_).value + ) + implicit val year: TypeTag.LtGt[Year] = + ltgt[Year, java.lang.Integer, IntColumn]( + FilterApi.intColumn, + Value.year(_).value + ) + implicit val yearMonth: TypeTag.LtGt[YearMonth] = + ltgt[YearMonth, Binary, BinaryColumn]( + FilterApi.binaryColumn, + Value.yearMonth(_).value + ) + // NOTE: it is not implicit to make scalac happy since ZoneOffset is a subtype of ZoneId + val zoneId: TypeTag.EqNotEq[ZoneId] = + eqnoteq[ZoneId, Binary, BinaryColumn]( + FilterApi.binaryColumn, + Value.zoneId(_).value + ) + implicit val zoneOffset: TypeTag.EqNotEq[ZoneOffset] = + eqnoteq[ZoneOffset, Binary, BinaryColumn]( + FilterApi.binaryColumn, + Value.zoneOffset(_).value + ) + implicit val duration: TypeTag.LtGt[Duration] = + ltgt[Duration, java.lang.Long, LongColumn]( + FilterApi.longColumn, + Value.duration(_).value + ) + implicit val instant: TypeTag.LtGt[Instant] = + ltgt[Instant, java.lang.Long, LongColumn]( + FilterApi.longColumn, + Value.instant(_).value + ) + implicit val localDate: TypeTag.LtGt[LocalDate] = + ltgt[LocalDate, java.lang.Integer, IntColumn]( + FilterApi.intColumn, + Value.localDate(_).value + ) + implicit val localTime: TypeTag.LtGt[LocalTime] = + ltgt[LocalTime, java.lang.Integer, IntColumn]( + FilterApi.intColumn, + Value.localTime(_).value + ) + implicit val localDateTime: TypeTag.LtGt[LocalDateTime] = + ltgt[LocalDateTime, java.lang.Long, LongColumn]( + FilterApi.longColumn, + Value.localDateTime(_).value + ) + implicit val offsetTime: TypeTag.LtGt[OffsetTime] = + ltgt[OffsetTime, java.lang.Integer, IntColumn]( + FilterApi.intColumn, + Value.offsetTime(_).value + ) + implicit val offsetDateTime: TypeTag.LtGt[OffsetDateTime] = + ltgt[OffsetDateTime, java.lang.Long, LongColumn]( + FilterApi.longColumn, + Value.offsetDateTime(_).value + ) + implicit val zonedDateTime: TypeTag.LtGt[ZonedDateTime] = + ltgt[ZonedDateTime, java.lang.Long, LongColumn]( + FilterApi.longColumn, + Value.zonedDateTime(_).value + ) } diff --git a/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTagDeriver.scala b/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTagDeriver.scala index 59e9eb8..9e578cb 100644 --- a/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTagDeriver.scala +++ b/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTagDeriver.scala @@ -31,13 +31,36 @@ object TypeTagDeriver { summoned: => Option[TypeTag[A]] ): TypeTag[A] = st match { - case StandardType.StringType => TypeTag.string - case StandardType.BoolType => TypeTag.boolean - case StandardType.ByteType => TypeTag.byte - case StandardType.ShortType => TypeTag.short - case StandardType.IntType => TypeTag.int - case StandardType.LongType => TypeTag.long - case _ => TypeTag.dummy[A] + case StandardType.StringType => TypeTag.string + case StandardType.BoolType => TypeTag.boolean + case StandardType.ByteType => TypeTag.byte + case StandardType.ShortType => TypeTag.short + case StandardType.IntType => TypeTag.int + case StandardType.LongType => TypeTag.long + case StandardType.FloatType => TypeTag.float + case StandardType.DoubleType => TypeTag.double + case StandardType.BinaryType => TypeTag.binary + case StandardType.CharType => TypeTag.char + case StandardType.UUIDType => TypeTag.uuid + case StandardType.BigDecimalType => TypeTag.bigDecimal + case StandardType.BigIntegerType => TypeTag.bigInteger + case StandardType.DayOfWeekType => TypeTag.dayOfWeek + case StandardType.MonthType => TypeTag.month + case StandardType.MonthDayType => TypeTag.monthDay + case StandardType.PeriodType => TypeTag.period + case StandardType.YearType => TypeTag.year + case StandardType.YearMonthType => TypeTag.yearMonth + case StandardType.ZoneIdType => TypeTag.zoneId + case StandardType.ZoneOffsetType => TypeTag.zoneOffset + case StandardType.DurationType => TypeTag.duration + case StandardType.InstantType => TypeTag.instant + case StandardType.LocalDateType => TypeTag.localDate + case StandardType.LocalTimeType => TypeTag.localTime + case StandardType.LocalDateTimeType => TypeTag.localDateTime + case StandardType.OffsetTimeType => TypeTag.offsetTime + case StandardType.OffsetDateTimeType => TypeTag.offsetDateTime + case StandardType.ZonedDateTimeType => TypeTag.zonedDateTime + case _ => TypeTag.dummy[A] } override def deriveOption[A]( diff --git a/modules/core/src/test/scala/me/mnedokushev/zio/apache/parquet/core/filter/ExprSpec.scala b/modules/core/src/test/scala/me/mnedokushev/zio/apache/parquet/core/filter/ExprSpec.scala index 8642ed7..df1780e 100644 --- a/modules/core/src/test/scala/me/mnedokushev/zio/apache/parquet/core/filter/ExprSpec.scala +++ b/modules/core/src/test/scala/me/mnedokushev/zio/apache/parquet/core/filter/ExprSpec.scala @@ -15,7 +15,7 @@ object ExprSpec extends ZIOSpecDefault { override def spec: Spec[TestEnvironment with Scope, Any] = suite("ExprSpec")( - test("compile") { + test("compile all operators") { val (a, b, _) = Filter.columns[MyRecord] val result = Expr.compile(