From e973301bcfda416eec35bc1a5b1090034554e274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=AC=E8=A1=8C=E6=B3=BD?= Date: Fri, 11 Oct 2024 14:28:56 +0800 Subject: [PATCH 1/2] remove unnecessary trim function in cast, cuz velox does it --- .../velox/VeloxSparkPlanExecApi.scala | 44 ------------------- .../gluten/backendsapi/SparkPlanExecApi.scala | 2 - .../expression/ExpressionConverter.scala | 5 +-- 3 files changed, 1 insertion(+), 50 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index d30caa17790e..e702cce98b86 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -706,50 +706,6 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi { VeloxGetStructFieldTransformer(substraitExprName, childTransformer, ordinal, original) } - /** - * To align with spark in casting string type input to other types, add trim node for trimming - * space or whitespace. See spark's Cast.scala. - */ - override def genCastWithNewChild(c: Cast): Cast = { - // scalastyle:off nonascii - // Common whitespace to be trimmed, including: ' ', '\n', '\r', '\f', etc. - val trimWhitespaceStr = " \t\n\u000B\u000C\u000D\u001C\u001D\u001E\u001F" - // Space separator. - val trimSpaceSepStr = "\u1680\u2008\u2009\u200A\u205F\u3000" + - ('\u2000' to '\u2006').toList.mkString - // Line separator. - val trimLineSepStr = "\u2028" - // Paragraph separator. - val trimParaSepStr = "\u2029" - // Needs to be trimmed for casting to float/double/decimal - val trimSpaceStr = ('\u0000' to '\u0020').toList.mkString - // scalastyle:on nonascii - c.dataType match { - case BinaryType | _: ArrayType | _: MapType | _: StructType | _: UserDefinedType[_] => - c - case FloatType | DoubleType | _: DecimalType => - c.child.dataType match { - case StringType => - val trimNode = StringTrim(c.child, Some(Literal(trimSpaceStr))) - c.withNewChildren(Seq(trimNode)).asInstanceOf[Cast] - case _ => - c - } - case _ => - c.child.dataType match { - case StringType => - val trimNode = StringTrim( - c.child, - Some( - Literal(trimWhitespaceStr + - trimSpaceSepStr + trimLineSepStr + trimParaSepStr))) - c.withNewChildren(Seq(trimNode)).asInstanceOf[Cast] - case _ => - c - } - } - } - /** Define backend specfic expression mappings. */ override def extraExpressionMappings: Seq[Sig] = { Seq( diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala index 667c0bdc25a9..51624e709522 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala @@ -436,8 +436,6 @@ trait SparkPlanExecApi { startDate: ExpressionTransformer, original: DateDiff): ExpressionTransformer - def genCastWithNewChild(c: Cast): Cast = c - def genHashExpressionTransformer( substraitExprName: String, exprs: Seq[ExpressionTransformer], diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala index 72586b034936..e092f7acd01c 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala @@ -318,13 +318,10 @@ object ExpressionConverter extends SQLConfHelper with Logging { case s: ScalarSubquery => ScalarSubqueryTransformer(substraitExprName, s) case c: Cast => - // Add trim node, as necessary. - val newCast = - BackendsApiManager.getSparkPlanExecApiInstance.genCastWithNewChild(c) CastTransformer( substraitExprName, replaceWithExpressionTransformer0(newCast.child, attributeSeq, expressionsMap), - newCast) + c) case s: String2TrimExpression => val (srcStr, trimStr) = s match { case StringTrim(srcStr, trimStr) => (srcStr, trimStr) From c2cd9eeec05acbfeea3a55141cf831a6434687c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=AC=E8=A1=8C=E6=B3=BD?= Date: Fri, 11 Oct 2024 15:22:18 +0800 Subject: [PATCH 2/2] fix --- .../org/apache/gluten/expression/ExpressionConverter.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala index e092f7acd01c..bc637fbd0f4c 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala @@ -320,7 +320,7 @@ object ExpressionConverter extends SQLConfHelper with Logging { case c: Cast => CastTransformer( substraitExprName, - replaceWithExpressionTransformer0(newCast.child, attributeSeq, expressionsMap), + replaceWithExpressionTransformer0(c.child, attributeSeq, expressionsMap), c) case s: String2TrimExpression => val (srcStr, trimStr) = s match {