From e78ee2c5770218a521340cb84f57a02dd00f7f3a Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 29 Apr 2024 16:40:56 -0700 Subject: [PATCH] [SPARK-48016][SQL] Fix a bug in try_divide function when with decimals Currently, the following query will throw DIVIDE_BY_ZERO error instead of returning null ``` SELECT try_divide(1, decimal(0)); ``` This is caused by the rule `DecimalPrecision`: ``` case b BinaryOperator(left, right) if left.dataType != right.dataType => (left, right) match { ... case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] && l.dataType.isInstanceOf[IntegralType] && literalPickMinimumPrecision => b.makeCopy(Array(Cast(l, DataTypeUtils.fromLiteral(l)), r)) ``` The result of the above makeCopy will contain `ANSI` as the `evalMode`, instead of `TRY`. This PR is to fix this bug by replacing the makeCopy method calls with withNewChildren Bug fix in try_* functions. Yes, it fixes a long-standing bug in the try_divide function. New UT No Closes #46286 from gengliangwang/avoidMakeCopy. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 3fbcb26d8e992c65a2778b96da4142e234786e53) Signed-off-by: Gengliang Wang --- .../catalyst/analysis/DecimalPrecision.scala | 14 ++-- .../sql/catalyst/analysis/TypeCoercion.scala | 10 +-- sql/core/src/test/resources/log4j2.properties | 2 +- .../ansi/try_arithmetic.sql.out | 56 ++++++++++++++++ .../analyzer-results/try_arithmetic.sql.out | 56 ++++++++++++++++ .../sql-tests/inputs/try_arithmetic.sql | 8 +++ .../results/ansi/try_arithmetic.sql.out | 64 +++++++++++++++++++ .../sql-tests/results/try_arithmetic.sql.out | 64 +++++++++++++++++++ 8 files changed, 261 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala index 09cf61a77955a..f51127f53b382 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala @@ -83,7 +83,7 @@ object DecimalPrecision extends TypeCoercionRule { val resultType = widerDecimalType(p1, s1, p2, s2) val newE1 = if (e1.dataType == resultType) e1 else Cast(e1, resultType) val newE2 = if (e2.dataType == resultType) e2 else Cast(e2, resultType) - b.makeCopy(Array(newE1, newE2)) + b.withNewChildren(Seq(newE1, newE2)) } /** @@ -202,21 +202,21 @@ object DecimalPrecision extends TypeCoercionRule { case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] && l.dataType.isInstanceOf[IntegralType] && literalPickMinimumPrecision => - b.makeCopy(Array(Cast(l, DataTypeUtils.fromLiteral(l)), r)) + b.withNewChildren(Seq(Cast(l, DataTypeUtils.fromLiteral(l)), r)) case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] && r.dataType.isInstanceOf[IntegralType] && literalPickMinimumPrecision => - b.makeCopy(Array(l, Cast(r, DataTypeUtils.fromLiteral(r)))) + b.withNewChildren(Seq(l, Cast(r, DataTypeUtils.fromLiteral(r)))) // Promote integers inside a binary expression with fixed-precision decimals to decimals, // and fixed-precision decimals in an expression with floats / doubles to doubles case (l @ IntegralTypeExpression(), r @ DecimalExpression(_, _)) => - b.makeCopy(Array(Cast(l, DecimalType.forType(l.dataType)), r)) + b.withNewChildren(Seq(Cast(l, DecimalType.forType(l.dataType)), r)) case (l @ DecimalExpression(_, _), r @ IntegralTypeExpression()) => - b.makeCopy(Array(l, Cast(r, DecimalType.forType(r.dataType)))) + b.withNewChildren(Seq(l, Cast(r, DecimalType.forType(r.dataType)))) case (l, r @ DecimalExpression(_, _)) if isFloat(l.dataType) => - b.makeCopy(Array(l, Cast(r, DoubleType))) + b.withNewChildren(Seq(l, Cast(r, DoubleType))) case (l @ DecimalExpression(_, _), r) if isFloat(r.dataType) => - b.makeCopy(Array(Cast(l, DoubleType), r)) + b.withNewChildren(Seq(Cast(l, DoubleType), r)) case _ => b } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 190e72a8e669e..c9a4a2d40246a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -1102,22 +1102,22 @@ object TypeCoercion extends TypeCoercionBase { case a @ BinaryArithmetic(left @ StringTypeExpression(), right) if right.dataType != CalendarIntervalType => - a.makeCopy(Array(Cast(left, DoubleType), right)) + a.withNewChildren(Seq(Cast(left, DoubleType), right)) case a @ BinaryArithmetic(left, right @ StringTypeExpression()) if left.dataType != CalendarIntervalType => - a.makeCopy(Array(left, Cast(right, DoubleType))) + a.withNewChildren(Seq(left, Cast(right, DoubleType))) // For equality between string and timestamp we cast the string to a timestamp // so that things like rounding of subsecond precision does not affect the comparison. case p @ Equality(left @ StringTypeExpression(), right @ TimestampTypeExpression()) => - p.makeCopy(Array(Cast(left, TimestampType), right)) + p.withNewChildren(Seq(Cast(left, TimestampType), right)) case p @ Equality(left @ TimestampTypeExpression(), right @ StringTypeExpression()) => - p.makeCopy(Array(left, Cast(right, TimestampType))) + p.withNewChildren(Seq(left, Cast(right, TimestampType))) case p @ BinaryComparison(left, right) if findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).isDefined => val commonType = findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).get - p.makeCopy(Array(castExpr(left, commonType), castExpr(right, commonType))) + p.withNewChildren(Seq(castExpr(left, commonType), castExpr(right, commonType))) } } diff --git a/sql/core/src/test/resources/log4j2.properties b/sql/core/src/test/resources/log4j2.properties index 7ab47c16d4f94..d793d16ca6851 100644 --- a/sql/core/src/test/resources/log4j2.properties +++ b/sql/core/src/test/resources/log4j2.properties @@ -50,7 +50,7 @@ logger.parquet_recordwriter.name = org.apache.parquet.hadoop.InternalParquetReco logger.parquet_recordwriter.additivity = false logger.parquet_recordwriter.level = off -logger.parquet_outputcommitter.name = org.apache.parquet.hadoop.ParquetOutputCommitter +logger.parquet_outputcommitter.name = org.sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scalaapache.parquet.hadoop.ParquetOutputCommitter logger.parquet_outputcommitter.additivity = false logger.parquet_outputcommitter.level = off diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out index bbc07c22805a6..15fe614ff0d22 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out @@ -13,6 +13,20 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x] +- OneRowRelation +-- !query +SELECT try_add(2147483647, decimal(1)) +-- !query analysis +Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x] ++- OneRowRelation + + +-- !query +SELECT try_add(2147483647, "1") +-- !query analysis +Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#xL] ++- OneRowRelation + + -- !query SELECT try_add(-2147483648, -1) -- !query analysis @@ -211,6 +225,20 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x] +- OneRowRelation +-- !query +SELECT try_divide(1, decimal(0)) +-- !query analysis +Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x] ++- OneRowRelation + + +-- !query +SELECT try_divide(1, "0") +-- !query analysis +Project [try_divide(1, 0) AS try_divide(1, 0)#x] ++- OneRowRelation + + -- !query SELECT try_divide(interval 2 year, 2) -- !query analysis @@ -267,6 +295,20 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x] +- OneRowRelation +-- !query +SELECT try_subtract(2147483647, decimal(-1)) +-- !query analysis +Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x] ++- OneRowRelation + + +-- !query +SELECT try_subtract(2147483647, "-1") +-- !query analysis +Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#xL] ++- OneRowRelation + + -- !query SELECT try_subtract(-2147483648, 1) -- !query analysis @@ -351,6 +393,20 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x] +- OneRowRelation +-- !query +SELECT try_multiply(2147483647, decimal(-2)) +-- !query analysis +Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x] ++- OneRowRelation + + +-- !query +SELECT try_multiply(2147483647, "-2") +-- !query analysis +Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#xL] ++- OneRowRelation + + -- !query SELECT try_multiply(-2147483648, 2) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out index bbc07c22805a6..ceda149c48434 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out @@ -13,6 +13,20 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x] +- OneRowRelation +-- !query +SELECT try_add(2147483647, decimal(1)) +-- !query analysis +Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x] ++- OneRowRelation + + +-- !query +SELECT try_add(2147483647, "1") +-- !query analysis +Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x] ++- OneRowRelation + + -- !query SELECT try_add(-2147483648, -1) -- !query analysis @@ -211,6 +225,20 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x] +- OneRowRelation +-- !query +SELECT try_divide(1, decimal(0)) +-- !query analysis +Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x] ++- OneRowRelation + + +-- !query +SELECT try_divide(1, "0") +-- !query analysis +Project [try_divide(1, 0) AS try_divide(1, 0)#x] ++- OneRowRelation + + -- !query SELECT try_divide(interval 2 year, 2) -- !query analysis @@ -267,6 +295,20 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x] +- OneRowRelation +-- !query +SELECT try_subtract(2147483647, decimal(-1)) +-- !query analysis +Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x] ++- OneRowRelation + + +-- !query +SELECT try_subtract(2147483647, "-1") +-- !query analysis +Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x] ++- OneRowRelation + + -- !query SELECT try_subtract(-2147483648, 1) -- !query analysis @@ -351,6 +393,20 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x] +- OneRowRelation +-- !query +SELECT try_multiply(2147483647, decimal(-2)) +-- !query analysis +Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x] ++- OneRowRelation + + +-- !query +SELECT try_multiply(2147483647, "-2") +-- !query analysis +Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x] ++- OneRowRelation + + -- !query SELECT try_multiply(-2147483648, 2) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql index 55907b6701e50..943865b68d39e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql @@ -1,6 +1,8 @@ -- Numeric + Numeric SELECT try_add(1, 1); SELECT try_add(2147483647, 1); +SELECT try_add(2147483647, decimal(1)); +SELECT try_add(2147483647, "1"); SELECT try_add(-2147483648, -1); SELECT try_add(9223372036854775807L, 1); SELECT try_add(-9223372036854775808L, -1); @@ -38,6 +40,8 @@ SELECT try_divide(0, 0); SELECT try_divide(1, (2147483647 + 1)); SELECT try_divide(1L, (9223372036854775807L + 1L)); SELECT try_divide(1, 1.0 / 0.0); +SELECT try_divide(1, decimal(0)); +SELECT try_divide(1, "0"); -- Interval / Numeric SELECT try_divide(interval 2 year, 2); @@ -50,6 +54,8 @@ SELECT try_divide(interval 106751991 day, 0.5); -- Numeric - Numeric SELECT try_subtract(1, 1); SELECT try_subtract(2147483647, -1); +SELECT try_subtract(2147483647, decimal(-1)); +SELECT try_subtract(2147483647, "-1"); SELECT try_subtract(-2147483648, 1); SELECT try_subtract(9223372036854775807L, -1); SELECT try_subtract(-9223372036854775808L, 1); @@ -66,6 +72,8 @@ SELECT try_subtract(interval 106751991 day, interval -3 day); -- Numeric * Numeric SELECT try_multiply(2, 3); SELECT try_multiply(2147483647, -2); +SELECT try_multiply(2147483647, decimal(-2)); +SELECT try_multiply(2147483647, "-2"); SELECT try_multiply(-2147483648, 2); SELECT try_multiply(9223372036854775807L, 2); SELECT try_multiply(-9223372036854775808L, -2); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out index 414198b19645d..bb630243ee1ae 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out @@ -15,6 +15,22 @@ struct NULL +-- !query +SELECT try_add(2147483647, decimal(1)) +-- !query schema +struct +-- !query output +2147483648 + + +-- !query +SELECT try_add(2147483647, "1") +-- !query schema +struct +-- !query output +2147483648 + + -- !query SELECT try_add(-2147483648, -1) -- !query schema @@ -341,6 +357,22 @@ org.apache.spark.SparkArithmeticException } +-- !query +SELECT try_divide(1, decimal(0)) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_divide(1, "0") +-- !query schema +struct +-- !query output +NULL + + -- !query SELECT try_divide(interval 2 year, 2) -- !query schema @@ -405,6 +437,22 @@ struct NULL +-- !query +SELECT try_subtract(2147483647, decimal(-1)) +-- !query schema +struct +-- !query output +2147483648 + + +-- !query +SELECT try_subtract(2147483647, "-1") +-- !query schema +struct +-- !query output +2147483648 + + -- !query SELECT try_subtract(-2147483648, 1) -- !query schema @@ -547,6 +595,22 @@ struct NULL +-- !query +SELECT try_multiply(2147483647, decimal(-2)) +-- !query schema +struct +-- !query output +-4294967294 + + +-- !query +SELECT try_multiply(2147483647, "-2") +-- !query schema +struct +-- !query output +-4294967294 + + -- !query SELECT try_multiply(-2147483648, 2) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out index c706a26078926..76f1d89b20927 100644 --- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out @@ -15,6 +15,22 @@ struct NULL +-- !query +SELECT try_add(2147483647, decimal(1)) +-- !query schema +struct +-- !query output +2147483648 + + +-- !query +SELECT try_add(2147483647, "1") +-- !query schema +struct +-- !query output +2.147483648E9 + + -- !query SELECT try_add(-2147483648, -1) -- !query schema @@ -249,6 +265,22 @@ struct NULL +-- !query +SELECT try_divide(1, decimal(0)) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_divide(1, "0") +-- !query schema +struct +-- !query output +NULL + + -- !query SELECT try_divide(interval 2 year, 2) -- !query schema @@ -313,6 +345,22 @@ struct NULL +-- !query +SELECT try_subtract(2147483647, decimal(-1)) +-- !query schema +struct +-- !query output +2147483648 + + +-- !query +SELECT try_subtract(2147483647, "-1") +-- !query schema +struct +-- !query output +2.147483648E9 + + -- !query SELECT try_subtract(-2147483648, 1) -- !query schema @@ -409,6 +457,22 @@ struct NULL +-- !query +SELECT try_multiply(2147483647, decimal(-2)) +-- !query schema +struct +-- !query output +-4294967294 + + +-- !query +SELECT try_multiply(2147483647, "-2") +-- !query schema +struct +-- !query output +-4.294967294E9 + + -- !query SELECT try_multiply(-2147483648, 2) -- !query schema