Skip to content

Commit

Permalink
revise according to Cheng Hao
Browse files Browse the repository at this point in the history
  • Loading branch information
adrian-wang committed Oct 10, 2014
1 parent 0e0a4f5 commit f8f219f
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -220,36 +220,52 @@ trait HiveTypeCoercion {
case a: BinaryArithmetic if a.right.dataType == StringType =>
a.makeCopy(Array(a.left, Cast(a.right, DoubleType)))

// we should cast all timestamp/date/string compare into string compare,
// even if both sides are of same type, as Hive use xxxwritable to compare.
case p: BinaryPredicate if p.left.dataType == StringType
&& p.right.dataType == DateType =>
p.makeCopy(Array(Cast(p.left, DateType), p.right))
p.makeCopy(Array(p.left, Cast(p.right, StringType)))
case p: BinaryPredicate if p.left.dataType == DateType
&& p.right.dataType == StringType =>
p.makeCopy(Array(p.left, Cast(p.right, DateType)))
p.makeCopy(Array(Cast(p.left, StringType), p.right))
case p: BinaryPredicate if p.left.dataType == StringType
&& p.right.dataType == TimestampType =>
p.makeCopy(Array(Cast(p.left, TimestampType), p.right))
p.makeCopy(Array(p.left, Cast(p.right, StringType)))
case p: BinaryPredicate if p.left.dataType == TimestampType
&& p.right.dataType == StringType =>
p.makeCopy(Array(p.left, Cast(p.right, TimestampType)))
p.makeCopy(Array(Cast(p.left, StringType), p.right))
case p: BinaryPredicate if p.left.dataType == TimestampType
&& p.right.dataType == DateType =>
p.makeCopy(Array(Cast(p.left, DateType), p.right))
p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
case p: BinaryPredicate if p.left.dataType == DateType
&& p.right.dataType == TimestampType =>
p.makeCopy(Array(p.left, Cast(p.right, DateType)))
p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
// same type
case p: BinaryPredicate if p.left.dataType == DateType
&& p.right.dataType == DateType =>
p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
case p: BinaryPredicate if p.left.dataType == TimestampType
&& p.right.dataType == TimestampType =>
p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))

case p: BinaryPredicate if p.left.dataType == StringType && p.right.dataType != StringType =>
p.makeCopy(Array(Cast(p.left, DoubleType), p.right))
case p: BinaryPredicate if p.left.dataType != StringType && p.right.dataType == StringType =>
p.makeCopy(Array(p.left, Cast(p.right, DoubleType)))

case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == StringType) =>
i.makeCopy(Array(a, b.map(Cast(_, DateType))))
i.makeCopy(Array(Cast(a, StringType), b))
case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == StringType) =>
i.makeCopy(Array(a, b.map(Cast(_, TimestampType))))
i.makeCopy(Array(Cast(a, StringType), b))
case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == TimestampType) =>
i.makeCopy(Array(a, b.map(Cast(_, DateType))))
i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == DateType) =>
i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == DateType) =>
i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == TimestampType) =>
i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))


case Sum(e) if e.dataType == StringType =>
Sum(Cast(e, DoubleType))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp}
import java.text.{DateFormat, SimpleDateFormat}

import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.types._

/** Cast the child expression to the target data type. */
Expand Down Expand Up @@ -101,7 +102,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
case ByteType =>
buildCast[Byte](_, b => new Timestamp(b))
case DateType =>
buildCast[Date](_, d => Timestamp.valueOf(dateToString(d) + " 00:00:00"))
buildCast[Date](_, d => new Timestamp(d.getTime))
// TimestampWritable.decimalToTimestamp
case DecimalType =>
buildCast[BigDecimal](_, d => decimalToTimestamp(d))
Expand Down Expand Up @@ -154,15 +155,16 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
// DateConverter
private[this] def castToDate: Any => Any = child.dataType match {
case StringType =>
buildCast[String](_, s => if (s.contains(" ")) {
try castToDate(castToTimestamp(s))
catch { case _: java.lang.IllegalArgumentException => null }
} else {
buildCast[String](_, s =>
try Date.valueOf(s) catch { case _: java.lang.IllegalArgumentException => null }
})
)
case TimestampType =>
buildCast[Timestamp](_, t => Date.valueOf(timestampToDateString(t)))
// TimestampWritable.decimalToDate
// throw valid precision more than seconds, according to Hive.
// Timestamp.nanos is in 0 to 999,999,999, no more than a second.
buildCast[Timestamp](_, t => new Date(Math.floor(t.getTime / 1000.0).toInt * 1000))
// Hive throws this exception as a Semantic Exception
// It is never possible to compare result when hive return with exception, so we can return null
// NULL is more reasonable here, since the query itself obeys the grammar.
case _ => _ => null
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,12 @@ class ExpressionEvaluationSuite extends FunSuite {

val sd = "1970-01-01"
val d = Date.valueOf(sd)
val sts = sd + " 00:00:01.1"
val ts = Timestamp.valueOf(sts)
val sts = sd + " 00:00:02"
val nts = sts + ".1"
val ts = Timestamp.valueOf(nts)

checkEvaluation("abdef" cast StringType, "abdef")
checkEvaluation("abdef" cast DecimalType, null)
checkEvaluation("abdef" cast DateType, null)
checkEvaluation("abdef" cast TimestampType, null)
checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))

Expand All @@ -271,19 +271,18 @@ class ExpressionEvaluationSuite extends FunSuite {

checkEvaluation(Cast(Literal(sd) cast DateType, StringType), sd)
checkEvaluation(Cast(Literal(d) cast StringType, DateType), d)
checkEvaluation(Cast(Literal(sts) cast TimestampType, StringType), sts)
checkEvaluation(Cast(Literal(nts) cast TimestampType, StringType), nts)
checkEvaluation(Cast(Literal(ts) cast StringType, TimestampType), ts)
// all convert to string type to check
checkEvaluation(
Cast(Cast(Literal(nts) cast TimestampType, DateType), StringType), sd)
checkEvaluation(
Cast(Cast(Literal(ts) cast DateType, TimestampType), StringType), sts)

checkEvaluation(Cast("abdef" cast BinaryType, StringType), "abdef")

checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast ByteType, ShortType), IntegerType), FloatType), DoubleType), LongType), 5)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast ByteType, DateType), DecimalType), LongType), StringType), ShortType), null)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast DateType, ByteType), DecimalType), LongType), StringType), ShortType), null)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast DecimalType, ByteType), DateType), LongType), StringType), ShortType), null)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 5)
checkEvaluation(Cast(Cast(Cast(Cast(
Expand Down Expand Up @@ -315,7 +314,6 @@ class ExpressionEvaluationSuite extends FunSuite {
assert(("abcdef" cast StringType).nullable === false)
assert(("abcdef" cast BinaryType).nullable === false)
assert(("abcdef" cast BooleanType).nullable === false)
assert(("abcdef" cast DateType).nullable === true)
assert(("abcdef" cast TimestampType).nullable === true)
assert(("abcdef" cast LongType).nullable === true)
assert(("abcdef" cast IntegerType).nullable === true)
Expand All @@ -329,8 +327,8 @@ class ExpressionEvaluationSuite extends FunSuite {
}

test("date") {
val d1 = new Date(12)
val d2 = new Date(123)
val d1 = Date.valueOf("1970-01-01")
val d2 = Date.valueOf("1970-01-02")
checkEvaluation(Literal(d1) < Literal(d2), true)
}

Expand Down

0 comments on commit f8f219f

Please sign in to comment.