feat: add orderby and unit test for window union to handle same timestamp (#1834)

tobegit3hub · web-flow · commit 0a0691955c60 · 2022-05-19T16:34:43.000+08:00
* Add orderby and unit test for window union

* Move window union order by in sortWithinPartition
diff --git a/java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/nodes/WindowAggPlan.scala b/java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/nodes/WindowAggPlan.scala
@@ -24,11 +24,11 @@ import com._4paradigm.openmldb.batch.window.{WindowAggPlanUtil, WindowComputer}
 import com._4paradigm.openmldb.batch.{OpenmldbBatchConfig, PlanContext, SparkInstance}
 import com._4paradigm.openmldb.common.codec.CodecUtil
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.JoinedRow
 import org.apache.spark.sql.types.{DateType, LongType, StructType, TimestampType}
-import org.apache.spark.sql.{DataFrame, Row, functions}
+import org.apache.spark.sql.{Column, DataFrame, Row, functions}
 import org.apache.spark.util.SerializableConfiguration
 import org.slf4j.LoggerFactory
+
 import scala.collection.mutable
 
 /** The planner which implements window agg physical node.
@@ -67,17 +67,26 @@ object WindowAggPlan {
     val dfWithIndex = inputTable.getDfConsideringIndex(ctx, physicalNode.GetNodeId())
 
     // Do union if physical node has union flag
+    val uniqueColName = "_WINDOW_UNION_FLAG_" + System.currentTimeMillis()
     val unionTable = if (isWindowWithUnion) {
-      WindowAggPlanUtil.windowUnionTables(ctx, physicalNode, dfWithIndex)
+      WindowAggPlanUtil.windowUnionTables(ctx, physicalNode, dfWithIndex, uniqueColName)
     } else {
       dfWithIndex
     }
 
-    // Do groupby and sort with window skew optimization or not
+    // Use order by to make sure that rows with same timestamp from primary will be placed in last
+    // TODO(tobe): support desc if we get config from physical plan
+    val unionSparkCol: Option[Column] = if (isWindowWithUnion) {
+      Some(unionTable.col(uniqueColName))
+    } else {
+      None
+    }
+
+    // Do group by and sort with window skew optimization or not
     val repartitionDf = if (isWindowSkewOptimization) {
-      windowPartitionWithSkewOpt(ctx, physicalNode, unionTable, windowAggConfig)
+      windowPartitionWithSkewOpt(ctx, physicalNode, unionTable, windowAggConfig, unionSparkCol)
     } else {
-      windowPartition(ctx, physicalNode, unionTable)
+      windowPartition(ctx, physicalNode, unionTable, unionSparkCol)
     }
 
     // Get the output schema which may add the index column
@@ -179,7 +188,8 @@ object WindowAggPlan {
   def windowPartitionWithSkewOpt(ctx: PlanContext,
                                  windowAggNode: PhysicalWindowAggrerationNode,
                                  inputDf: DataFrame,
-                                 windowAggConfig: WindowAggConfig): DataFrame = {
+                                 windowAggConfig: WindowAggConfig,
+                                 unionSparkCol: Option[Column]): DataFrame = {
     val uniqueNamePostfix = ctx.getConf.windowSkewOptPostfix
 
     // Cache the input table which may be used for multiple times
@@ -274,7 +284,12 @@ object WindowAggPlan {
     }
 
     val sortedByCol = PhysicalNodeUtil.getOrderbyColumns(windowAggNode, addColumnsDf)
-    val sortedByCols = repartitionCols ++ sortedByCol
+
+    val sortedByCols = if (unionSparkCol.isEmpty) {
+      repartitionCols ++ sortedByCol
+    } else {
+      repartitionCols ++ sortedByCol ++ Array(unionSparkCol.get)
+    }
 
     // Notice that we should make sure the keys in the same partition are ordering as well
     val sortedDf = repartitionDf.sortWithinPartitions(sortedByCols: _*)
@@ -289,7 +304,8 @@ object WindowAggPlan {
    * 1. Repartition the table with the "partition by" keys.
    * 2. Sort the data within partitions with the "order by" keys.
    */
-  def windowPartition(ctx: PlanContext, windowAggNode: PhysicalWindowAggrerationNode, inputDf: DataFrame): DataFrame = {
+  def windowPartition(ctx: PlanContext, windowAggNode: PhysicalWindowAggrerationNode, inputDf: DataFrame,
+                      unionSparkCol: Option[Column]): DataFrame = {
 
     // Repartition the table with window keys
     val repartitionCols = PhysicalNodeUtil.getRepartitionColumns(windowAggNode, inputDf)
@@ -302,9 +318,12 @@ object WindowAggPlan {
     // Sort with the window orderby keys
     val orderbyCols = PhysicalNodeUtil.getOrderbyColumns(windowAggNode, inputDf)
 
+    val sortedDf = if (unionSparkCol.isEmpty) {
+      repartitionDf.sortWithinPartitions(repartitionCols ++ orderbyCols: _*)
+    } else {
+      repartitionDf.sortWithinPartitions(repartitionCols ++ orderbyCols ++ Array(unionSparkCol.get): _*)
+    }
     // Notice that we should make sure the keys in the same partition are ordering as well
-    val sortedDf = repartitionDf.sortWithinPartitions(repartitionCols ++ orderbyCols: _*)
-
     sortedDf
   }
 
diff --git a/java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/window/WindowAggPlanUtil.scala b/java/openmldb-batch/src/main/scala/com/_4paradigm/openmldb/batch/window/WindowAggPlanUtil.scala
@@ -24,6 +24,7 @@ import com._4paradigm.openmldb.batch.utils.{HybridseUtil, SparkColumnUtil, Spark
 import com._4paradigm.openmldb.batch.{OpenmldbBatchConfig, PlanContext, SparkInstance}
 import com._4paradigm.openmldb.sdk.impl.SqlClusterExecutor
 import org.apache.hadoop.fs.FileSystem
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.{DataFrame, functions}
 import org.apache.spark.sql.types.{LongType, StructType}
 import org.apache.spark.util.SerializableConfiguration
@@ -48,10 +49,10 @@ object WindowAggPlanUtil {
    */
   def windowUnionTables(ctx: PlanContext,
                     physicalNode: PhysicalWindowAggrerationNode,
-                    inputDf: DataFrame): DataFrame = {
+                    inputDf: DataFrame,
+                    uniqueColName: String): DataFrame = {
 
     val isKeepIndexColumn = SparkInstance.keepIndexColumn(ctx, physicalNode.GetNodeId())
-    val uniqueColName = "_WINDOW_UNION_FLAG_" + System.currentTimeMillis()
     val unionNum = physicalNode.window_unions().GetSize().toInt
 
     val rightTables = (0 until unionNum).map(i => {
diff --git a/java/openmldb-batch/src/test/scala/com/_4paradigm/openmldb/batch/end2end/TestWindowUnionWithSameTimestamp.scala b/java/openmldb-batch/src/test/scala/com/_4paradigm/openmldb/batch/end2end/TestWindowUnionWithSameTimestamp.scala
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2021 4Paradigm
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com._4paradigm.openmldb.batch.end2end
+
+import com._4paradigm.openmldb.batch.SparkTestSuite
+import com._4paradigm.openmldb.batch.api.OpenmldbSession
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
+
+class TestWindowUnionWithSameTimestamp extends SparkTestSuite {
+
+  test("Test window union with same timestamp") {
+
+    val spark = getSparkSession
+    val sess = new OpenmldbSession(spark)
+
+    val data = Seq[Row](
+      Row(1, 1L)
+    )
+    val schema = StructType(List(
+      StructField("int_col", IntegerType),
+      StructField("long_col", LongType)
+    ))
+    val df = spark.createDataFrame(spark.sparkContext.makeRDD(data), schema)
+    sess.registerTable("t1", df)
+
+    val data2 = Seq[Row](
+      Row(1, 1L),
+      Row(1, 1L)
+    )
+    val schema2 = StructType(List(
+      StructField("int_col", IntegerType),
+      StructField("long_col", LongType)
+    ))
+    val df2 = spark.createDataFrame(spark.sparkContext.makeRDD(data2), schema2)
+    sess.registerTable("t2", df2)
+
+    val sqlText =
+      """
+        | SELECT count(int_col) OVER w
+        | FROM t1
+        | WINDOW w AS (UNION t2 PARTITION BY int_col ORDER BY long_col ROWS BETWEEN 10 PRECEDING AND CURRENT ROW)
+        |""".stripMargin
+
+    val outputDf = sess.sql(sqlText)
+    val outputRow = outputDf.collect()(0)
+    // The output of count(int_col) should contain current row from primary table and other rows from union tables
+    assert(outputRow.getLong(0) == 3)
+  }
+
+}