[SPARK-33853][SQL] EXPLAIN CODEGEN and BenchmarkQueryTest don't show subquery code #30859

sarutak · 2020-12-19T21:41:17Z

What changes were proposed in this pull request?

This PR fixes an issue that EXPLAIN CODEGEN and BenchmarkQueryTest don't show the corresponding code for subqueries.

The following example is about EXPLAIN CODEGEN.

spark.conf.set("spark.sql.adaptive.enabled", "false")
val df = spark.range(1, 100)
df.createTempView("df")
spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")

scala> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")
Found 1 WholeStageCodegen subtrees.
== Subtree 1 / 1 (maxMethodCodeSize:55; maxConstantPoolSize:97(0.15% used); numInnerClasses:0) ==
*(1) Project [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L]
:  +- Subquery scalar-subquery#3, [id=#24]
:     +- *(2) HashAggregate(keys=[], functions=[min(id#0L)], output=[v#2L])
:        +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#20]
:           +- *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L])
:              +- *(1) Range (1, 100, step=1, splits=12)
+- *(1) Scan OneRowRelation[]

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */   private Object[] references;
/* 008 */   private scala.collection.Iterator[] inputs;
/* 009 */   private scala.collection.Iterator rdd_input_0;
/* 010 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] project_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
/* 011 */
/* 012 */   public GeneratedIteratorForCodegenStage1(Object[] references) {
/* 013 */     this.references = references;
/* 014 */   }
/* 015 */
/* 016 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 017 */     partitionIndex = index;
/* 018 */     this.inputs = inputs;
/* 019 */     rdd_input_0 = inputs[0];
/* 020 */     project_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 021 */
/* 022 */   }
/* 023 */
/* 024 */   private void project_doConsume_0() throws java.io.IOException {
/* 025 */     // common sub-expressions
/* 026 */
/* 027 */     project_mutableStateArray_0[0].reset();
/* 028 */
/* 029 */     if (false) {
/* 030 */       project_mutableStateArray_0[0].setNullAt(0);
/* 031 */     } else {
/* 032 */       project_mutableStateArray_0[0].write(0, 1L);
/* 033 */     }
/* 034 */     append((project_mutableStateArray_0[0].getRow()));
/* 035 */
/* 036 */   }
/* 037 */
/* 038 */   protected void processNext() throws java.io.IOException {
/* 039 */     while ( rdd_input_0.hasNext()) {
/* 040 */       InternalRow rdd_row_0 = (InternalRow) rdd_input_0.next();
/* 041 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
/* 042 */       project_doConsume_0();
/* 043 */       if (shouldStop()) return;
/* 044 */     }
/* 045 */   }
/* 046 */
/* 047 */ }

After this change, the corresponding code for subqueries are shown.

Found 3 WholeStageCodegen subtrees.
== Subtree 1 / 3 (maxMethodCodeSize:282; maxConstantPoolSize:206(0.31% used); numInnerClasses:0) ==
*(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L])
+- *(1) Range (1, 100, step=1, splits=12)

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */   private Object[] references;
/* 008 */   private scala.collection.Iterator[] inputs;
/* 009 */   private boolean agg_initAgg_0;
/* 010 */   private boolean agg_bufIsNull_0;
/* 011 */   private long agg_bufValue_0;
/* 012 */   private boolean range_initRange_0;
/* 013 */   private long range_nextIndex_0;
/* 014 */   private TaskContext range_taskContext_0;
/* 015 */   private InputMetrics range_inputMetrics_0;
/* 016 */   private long range_batchEnd_0;
/* 017 */   private long range_numElementsTodo_0;
/* 018 */   private boolean agg_agg_isNull_2_0;
/* 019 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] range_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[3];
/* 020 */
/* 021 */   public GeneratedIteratorForCodegenStage1(Object[] references) {
/* 022 */     this.references = references;
/* 023 */   }
/* 024 */
/* 025 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 026 */     partitionIndex = index;
/* 027 */     this.inputs = inputs;
/* 028 */
/* 029 */     range_taskContext_0 = TaskContext.get();
/* 030 */     range_inputMetrics_0 = range_taskContext_0.taskMetrics().inputMetrics();
/* 031 */     range_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 032 */     range_mutableStateArray_0[1] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 033 */     range_mutableStateArray_0[2] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 034 */
/* 035 */   }
/* 036 */
/* 037 */   private void agg_doAggregateWithoutKey_0() throws java.io.IOException {
/* 038 */     // initialize aggregation buffer
/* 039 */     agg_bufIsNull_0 = true;
/* 040 */     agg_bufValue_0 = -1L;
/* 041 */
/* 042 */     // initialize Range
/* 043 */     if (!range_initRange_0) {
/* 044 */       range_initRange_0 = true;
/* 045 */       initRange(partitionIndex);
/* 046 */     }
/* 047 */
/* 048 */     while (true) {
/* 049 */       if (range_nextIndex_0 == range_batchEnd_0) {
/* 050 */         long range_nextBatchTodo_0;
/* 051 */         if (range_numElementsTodo_0 > 1000L) {
/* 052 */           range_nextBatchTodo_0 = 1000L;
/* 053 */           range_numElementsTodo_0 -= 1000L;
/* 054 */         } else {
/* 055 */           range_nextBatchTodo_0 = range_numElementsTodo_0;
/* 056 */           range_numElementsTodo_0 = 0;
/* 057 */           if (range_nextBatchTodo_0 == 0) break;
/* 058 */         }
/* 059 */         range_batchEnd_0 += range_nextBatchTodo_0 * 1L;
/* 060 */       }
/* 061 */
/* 062 */       int range_localEnd_0 = (int)((range_batchEnd_0 - range_nextIndex_0) / 1L);
/* 063 */       for (int range_localIdx_0 = 0; range_localIdx_0 < range_localEnd_0; range_localIdx_0++) {
/* 064 */         long range_value_0 = ((long)range_localIdx_0 * 1L) + range_nextIndex_0;
/* 065 */
/* 066 */         agg_doConsume_0(range_value_0);
/* 067 */
/* 068 */         // shouldStop check is eliminated
/* 069 */       }
/* 070 */       range_nextIndex_0 = range_batchEnd_0;
/* 071 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(range_localEnd_0);
/* 072 */       range_inputMetrics_0.incRecordsRead(range_localEnd_0);
/* 073 */       range_taskContext_0.killTaskIfInterrupted();
/* 074 */     }
/* 075 */
/* 076 */   }
/* 077 */
/* 078 */   private void initRange(int idx) {
/* 079 */     java.math.BigInteger index = java.math.BigInteger.valueOf(idx);
/* 080 */     java.math.BigInteger numSlice = java.math.BigInteger.valueOf(12L);
/* 081 */     java.math.BigInteger numElement = java.math.BigInteger.valueOf(99L);
/* 082 */     java.math.BigInteger step = java.math.BigInteger.valueOf(1L);
/* 083 */     java.math.BigInteger start = java.math.BigInteger.valueOf(1L);
/* 084 */     long partitionEnd;
/* 085 */
/* 086 */     java.math.BigInteger st = index.multiply(numElement).divide(numSlice).multiply(step).add(start);
/* 087 */     if (st.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) {
/* 088 */       range_nextIndex_0 = Long.MAX_VALUE;
/* 089 */     } else if (st.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) {
/* 090 */       range_nextIndex_0 = Long.MIN_VALUE;
/* 091 */     } else {
/* 092 */       range_nextIndex_0 = st.longValue();
/* 093 */     }
/* 094 */     range_batchEnd_0 = range_nextIndex_0;
/* 095 */
/* 096 */     java.math.BigInteger end = index.add(java.math.BigInteger.ONE).multiply(numElement).divide(numSlice)
/* 097 */     .multiply(step).add(start);
/* 098 */     if (end.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) {
/* 099 */       partitionEnd = Long.MAX_VALUE;
/* 100 */     } else if (end.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) {
/* 101 */       partitionEnd = Long.MIN_VALUE;
/* 102 */     } else {
/* 103 */       partitionEnd = end.longValue();
/* 104 */     }
/* 105 */
/* 106 */     java.math.BigInteger startToEnd = java.math.BigInteger.valueOf(partitionEnd).subtract(
/* 107 */       java.math.BigInteger.valueOf(range_nextIndex_0));
/* 108 */     range_numElementsTodo_0  = startToEnd.divide(step).longValue();
/* 109 */     if (range_numElementsTodo_0 < 0) {
/* 110 */       range_numElementsTodo_0 = 0;
/* 111 */     } else if (startToEnd.remainder(step).compareTo(java.math.BigInteger.valueOf(0L)) != 0) {
/* 112 */       range_numElementsTodo_0++;
/* 113 */     }
/* 114 */   }
/* 115 */
/* 116 */   private void agg_doConsume_0(long agg_expr_0_0) throws java.io.IOException {
/* 117 */     // do aggregate
/* 118 */     // common sub-expressions
/* 119 */
/* 120 */     // evaluate aggregate functions and update aggregation buffers
/* 121 */
/* 122 */     agg_agg_isNull_2_0 = true;
/* 123 */     long agg_value_2 = -1L;
/* 124 */
/* 125 */     if (!agg_bufIsNull_0 && (agg_agg_isNull_2_0 ||
/* 126 */         agg_value_2 > agg_bufValue_0)) {
/* 127 */       agg_agg_isNull_2_0 = false;
/* 128 */       agg_value_2 = agg_bufValue_0;
/* 129 */     }
/* 130 */
/* 131 */     if (!false && (agg_agg_isNull_2_0 ||
/* 132 */         agg_value_2 > agg_expr_0_0)) {
/* 133 */       agg_agg_isNull_2_0 = false;
/* 134 */       agg_value_2 = agg_expr_0_0;
/* 135 */     }
/* 136 */
/* 137 */     agg_bufIsNull_0 = agg_agg_isNull_2_0;
/* 138 */     agg_bufValue_0 = agg_value_2;
/* 139 */
/* 140 */   }
/* 141 */
/* 142 */   protected void processNext() throws java.io.IOException {
/* 143 */     while (!agg_initAgg_0) {
/* 144 */       agg_initAgg_0 = true;
/* 145 */       long agg_beforeAgg_0 = System.nanoTime();
/* 146 */       agg_doAggregateWithoutKey_0();
/* 147 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[2] /* aggTime */).add((System.nanoTime() - agg_beforeAgg_0) / 1000000);
/* 148 */
/* 149 */       // output the result
/* 150 */
/* 151 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[1] /* numOutputRows */).add(1);
/* 152 */       range_mutableStateArray_0[2].reset();
/* 153 */
/* 154 */       range_mutableStateArray_0[2].zeroOutNullBytes();
/* 155 */
/* 156 */       if (agg_bufIsNull_0) {
/* 157 */         range_mutableStateArray_0[2].setNullAt(0);
/* 158 */       } else {
/* 159 */         range_mutableStateArray_0[2].write(0, agg_bufValue_0);
/* 160 */       }
/* 161 */       append((range_mutableStateArray_0[2].getRow()));
/* 162 */     }
/* 163 */   }
/* 164 */
/* 165 */ }

Why are the changes needed?

For better debuggability.

Does this PR introduce any user-facing change?

Yes. After this change, users can see subquery code by EXPLAIN CODEGEN.

How was this patch tested?

New test.

sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala

SparkQA · 2020-12-20T02:59:23Z

Test build #133085 has finished for PR 30859 at commit 09c6fd1.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

maropu · 2020-12-20T05:39:11Z

sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala

+    def findSubtrees(plan: SparkPlan): Unit = {
+      plan transform {
+        case s: WholeStageCodegenExec =>
+          codegenSubtrees += s


The code below has the same issue?

spark/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala

Lines 65 to 70 in df2314b

val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()

plan foreach {

case s: WholeStageCodegenExec =>

codegenSubtrees += s

case _ =>

}

maropu · 2020-12-20T05:39:49Z

sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala

+          .write
+          .format("parquet")
+          .mode("overwrite")
+          .saveAsTable("df1")


nit: for better test performance, could you use a temporary view instead?

maropu · 2020-12-20T05:40:09Z

Looks fine otherwise.

SparkQA · 2020-12-20T06:11:25Z

Kubernetes integration test starting
URL: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/37695/

SparkQA · 2020-12-20T06:18:33Z

Kubernetes integration test status failure
URL: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/37695/

SparkQA · 2020-12-20T07:06:04Z

Kubernetes integration test starting
URL: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/37696/

SparkQA · 2020-12-20T07:10:47Z

Kubernetes integration test status failure
URL: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/37696/

SparkQA · 2020-12-20T09:09:14Z

Test build #133097 has finished for PR 30859 at commit 022d363.

This patch fails Spark unit tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2020-12-20T10:12:25Z

Test build #133095 has finished for PR 30859 at commit 26551bf.

This patch fails SparkR unit tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2020-12-20T10:18:09Z

Test build #133096 has finished for PR 30859 at commit 0f6a53b.

This patch fails Spark unit tests.
This patch merges cleanly.
This patch adds no public classes.

sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala

SparkQA · 2020-12-20T19:08:42Z

Kubernetes integration test starting
URL: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/37712/

SparkQA · 2020-12-20T19:13:19Z

Kubernetes integration test status failure
URL: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/37712/

SparkQA · 2020-12-20T21:46:35Z

Test build #133111 has finished for PR 30859 at commit c338832.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

SparkQA · 2020-12-20T22:39:48Z

Test build #133113 has finished for PR 30859 at commit 13cadeb.

This patch fails Spark unit tests.
This patch merges cleanly.
This patch adds no public classes.

maropu · 2020-12-21T00:42:41Z

retest this please

dongjoon-hyun · 2020-12-21T01:35:21Z

+1, LGTM. (Pending CIs)
Thank you, @sarutak and @maropu .

SparkQA · 2020-12-21T01:42:43Z

Kubernetes integration test starting
URL: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/37717/

SparkQA · 2020-12-21T02:16:05Z

Kubernetes integration test status failure
URL: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/37717/

SparkQA · 2020-12-21T05:29:39Z

Test build #133118 has finished for PR 30859 at commit 13cadeb.

This patch passes all tests.
This patch merges cleanly.
This patch adds no public classes.

…subquery code ### What changes were proposed in this pull request? This PR fixes an issue that `EXPLAIN CODEGEN` and `BenchmarkQueryTest` don't show the corresponding code for subqueries. The following example is about `EXPLAIN CODEGEN`. ``` spark.conf.set("spark.sql.adaptive.enabled", "false") val df = spark.range(1, 100) df.createTempView("df") spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN") scala> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN") Found 1 WholeStageCodegen subtrees. == Subtree 1 / 1 (maxMethodCodeSize:55; maxConstantPoolSize:97(0.15% used); numInnerClasses:0) == *(1) Project [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L] : +- Subquery scalar-subquery#3, [id=#24] : +- *(2) HashAggregate(keys=[], functions=[min(id#0L)], output=[v#2L]) : +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#20] : +- *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L]) : +- *(1) Range (1, 100, step=1, splits=12) +- *(1) Scan OneRowRelation[] Generated code: /* 001 */ public Object generate(Object[] references) { /* 002 */ return new GeneratedIteratorForCodegenStage1(references); /* 003 */ } /* 004 */ /* 005 */ // codegenStageId=1 /* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator { /* 007 */ private Object[] references; /* 008 */ private scala.collection.Iterator[] inputs; /* 009 */ private scala.collection.Iterator rdd_input_0; /* 010 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] project_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1]; /* 011 */ /* 012 */ public GeneratedIteratorForCodegenStage1(Object[] references) { /* 013 */ this.references = references; /* 014 */ } /* 015 */ /* 016 */ public void init(int index, scala.collection.Iterator[] inputs) { /* 017 */ partitionIndex = index; /* 018 */ this.inputs = inputs; /* 019 */ rdd_input_0 = inputs[0]; /* 020 */ project_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0); /* 021 */ /* 022 */ } /* 023 */ /* 024 */ private void project_doConsume_0() throws java.io.IOException { /* 025 */ // common sub-expressions /* 026 */ /* 027 */ project_mutableStateArray_0[0].reset(); /* 028 */ /* 029 */ if (false) { /* 030 */ project_mutableStateArray_0[0].setNullAt(0); /* 031 */ } else { /* 032 */ project_mutableStateArray_0[0].write(0, 1L); /* 033 */ } /* 034 */ append((project_mutableStateArray_0[0].getRow())); /* 035 */ /* 036 */ } /* 037 */ /* 038 */ protected void processNext() throws java.io.IOException { /* 039 */ while ( rdd_input_0.hasNext()) { /* 040 */ InternalRow rdd_row_0 = (InternalRow) rdd_input_0.next(); /* 041 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1); /* 042 */ project_doConsume_0(); /* 043 */ if (shouldStop()) return; /* 044 */ } /* 045 */ } /* 046 */ /* 047 */ } ``` After this change, the corresponding code for subqueries are shown. ``` Found 3 WholeStageCodegen subtrees. == Subtree 1 / 3 (maxMethodCodeSize:282; maxConstantPoolSize:206(0.31% used); numInnerClasses:0) == *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L]) +- *(1) Range (1, 100, step=1, splits=12) Generated code: /* 001 */ public Object generate(Object[] references) { /* 002 */ return new GeneratedIteratorForCodegenStage1(references); /* 003 */ } /* 004 */ /* 005 */ // codegenStageId=1 /* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator { /* 007 */ private Object[] references; /* 008 */ private scala.collection.Iterator[] inputs; /* 009 */ private boolean agg_initAgg_0; /* 010 */ private boolean agg_bufIsNull_0; /* 011 */ private long agg_bufValue_0; /* 012 */ private boolean range_initRange_0; /* 013 */ private long range_nextIndex_0; /* 014 */ private TaskContext range_taskContext_0; /* 015 */ private InputMetrics range_inputMetrics_0; /* 016 */ private long range_batchEnd_0; /* 017 */ private long range_numElementsTodo_0; /* 018 */ private boolean agg_agg_isNull_2_0; /* 019 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] range_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[3]; /* 020 */ /* 021 */ public GeneratedIteratorForCodegenStage1(Object[] references) { /* 022 */ this.references = references; /* 023 */ } /* 024 */ /* 025 */ public void init(int index, scala.collection.Iterator[] inputs) { /* 026 */ partitionIndex = index; /* 027 */ this.inputs = inputs; /* 028 */ /* 029 */ range_taskContext_0 = TaskContext.get(); /* 030 */ range_inputMetrics_0 = range_taskContext_0.taskMetrics().inputMetrics(); /* 031 */ range_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0); /* 032 */ range_mutableStateArray_0[1] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0); /* 033 */ range_mutableStateArray_0[2] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0); /* 034 */ /* 035 */ } /* 036 */ /* 037 */ private void agg_doAggregateWithoutKey_0() throws java.io.IOException { /* 038 */ // initialize aggregation buffer /* 039 */ agg_bufIsNull_0 = true; /* 040 */ agg_bufValue_0 = -1L; /* 041 */ /* 042 */ // initialize Range /* 043 */ if (!range_initRange_0) { /* 044 */ range_initRange_0 = true; /* 045 */ initRange(partitionIndex); /* 046 */ } /* 047 */ /* 048 */ while (true) { /* 049 */ if (range_nextIndex_0 == range_batchEnd_0) { /* 050 */ long range_nextBatchTodo_0; /* 051 */ if (range_numElementsTodo_0 > 1000L) { /* 052 */ range_nextBatchTodo_0 = 1000L; /* 053 */ range_numElementsTodo_0 -= 1000L; /* 054 */ } else { /* 055 */ range_nextBatchTodo_0 = range_numElementsTodo_0; /* 056 */ range_numElementsTodo_0 = 0; /* 057 */ if (range_nextBatchTodo_0 == 0) break; /* 058 */ } /* 059 */ range_batchEnd_0 += range_nextBatchTodo_0 * 1L; /* 060 */ } /* 061 */ /* 062 */ int range_localEnd_0 = (int)((range_batchEnd_0 - range_nextIndex_0) / 1L); /* 063 */ for (int range_localIdx_0 = 0; range_localIdx_0 < range_localEnd_0; range_localIdx_0++) { /* 064 */ long range_value_0 = ((long)range_localIdx_0 * 1L) + range_nextIndex_0; /* 065 */ /* 066 */ agg_doConsume_0(range_value_0); /* 067 */ /* 068 */ // shouldStop check is eliminated /* 069 */ } /* 070 */ range_nextIndex_0 = range_batchEnd_0; /* 071 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(range_localEnd_0); /* 072 */ range_inputMetrics_0.incRecordsRead(range_localEnd_0); /* 073 */ range_taskContext_0.killTaskIfInterrupted(); /* 074 */ } /* 075 */ /* 076 */ } /* 077 */ /* 078 */ private void initRange(int idx) { /* 079 */ java.math.BigInteger index = java.math.BigInteger.valueOf(idx); /* 080 */ java.math.BigInteger numSlice = java.math.BigInteger.valueOf(12L); /* 081 */ java.math.BigInteger numElement = java.math.BigInteger.valueOf(99L); /* 082 */ java.math.BigInteger step = java.math.BigInteger.valueOf(1L); /* 083 */ java.math.BigInteger start = java.math.BigInteger.valueOf(1L); /* 084 */ long partitionEnd; /* 085 */ /* 086 */ java.math.BigInteger st = index.multiply(numElement).divide(numSlice).multiply(step).add(start); /* 087 */ if (st.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) { /* 088 */ range_nextIndex_0 = Long.MAX_VALUE; /* 089 */ } else if (st.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) { /* 090 */ range_nextIndex_0 = Long.MIN_VALUE; /* 091 */ } else { /* 092 */ range_nextIndex_0 = st.longValue(); /* 093 */ } /* 094 */ range_batchEnd_0 = range_nextIndex_0; /* 095 */ /* 096 */ java.math.BigInteger end = index.add(java.math.BigInteger.ONE).multiply(numElement).divide(numSlice) /* 097 */ .multiply(step).add(start); /* 098 */ if (end.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) { /* 099 */ partitionEnd = Long.MAX_VALUE; /* 100 */ } else if (end.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) { /* 101 */ partitionEnd = Long.MIN_VALUE; /* 102 */ } else { /* 103 */ partitionEnd = end.longValue(); /* 104 */ } /* 105 */ /* 106 */ java.math.BigInteger startToEnd = java.math.BigInteger.valueOf(partitionEnd).subtract( /* 107 */ java.math.BigInteger.valueOf(range_nextIndex_0)); /* 108 */ range_numElementsTodo_0 = startToEnd.divide(step).longValue(); /* 109 */ if (range_numElementsTodo_0 < 0) { /* 110 */ range_numElementsTodo_0 = 0; /* 111 */ } else if (startToEnd.remainder(step).compareTo(java.math.BigInteger.valueOf(0L)) != 0) { /* 112 */ range_numElementsTodo_0++; /* 113 */ } /* 114 */ } /* 115 */ /* 116 */ private void agg_doConsume_0(long agg_expr_0_0) throws java.io.IOException { /* 117 */ // do aggregate /* 118 */ // common sub-expressions /* 119 */ /* 120 */ // evaluate aggregate functions and update aggregation buffers /* 121 */ /* 122 */ agg_agg_isNull_2_0 = true; /* 123 */ long agg_value_2 = -1L; /* 124 */ /* 125 */ if (!agg_bufIsNull_0 && (agg_agg_isNull_2_0 || /* 126 */ agg_value_2 > agg_bufValue_0)) { /* 127 */ agg_agg_isNull_2_0 = false; /* 128 */ agg_value_2 = agg_bufValue_0; /* 129 */ } /* 130 */ /* 131 */ if (!false && (agg_agg_isNull_2_0 || /* 132 */ agg_value_2 > agg_expr_0_0)) { /* 133 */ agg_agg_isNull_2_0 = false; /* 134 */ agg_value_2 = agg_expr_0_0; /* 135 */ } /* 136 */ /* 137 */ agg_bufIsNull_0 = agg_agg_isNull_2_0; /* 138 */ agg_bufValue_0 = agg_value_2; /* 139 */ /* 140 */ } /* 141 */ /* 142 */ protected void processNext() throws java.io.IOException { /* 143 */ while (!agg_initAgg_0) { /* 144 */ agg_initAgg_0 = true; /* 145 */ long agg_beforeAgg_0 = System.nanoTime(); /* 146 */ agg_doAggregateWithoutKey_0(); /* 147 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[2] /* aggTime */).add((System.nanoTime() - agg_beforeAgg_0) / 1000000); /* 148 */ /* 149 */ // output the result /* 150 */ /* 151 */ ((org.apache.spark.sql.execution.metric.SQLMetric) references[1] /* numOutputRows */).add(1); /* 152 */ range_mutableStateArray_0[2].reset(); /* 153 */ /* 154 */ range_mutableStateArray_0[2].zeroOutNullBytes(); /* 155 */ /* 156 */ if (agg_bufIsNull_0) { /* 157 */ range_mutableStateArray_0[2].setNullAt(0); /* 158 */ } else { /* 159 */ range_mutableStateArray_0[2].write(0, agg_bufValue_0); /* 160 */ } /* 161 */ append((range_mutableStateArray_0[2].getRow())); /* 162 */ } /* 163 */ } /* 164 */ /* 165 */ } ``` ### Why are the changes needed? For better debuggability. ### Does this PR introduce _any_ user-facing change? Yes. After this change, users can see subquery code by `EXPLAIN CODEGEN`. ### How was this patch tested? New test. Closes #30859 from sarutak/explain-codegen-subqueries. Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com> (cherry picked from commit f4e1069) Signed-off-by: Dongjoon Hyun <dhyun@apple.com>

sarutak added 3 commits December 19, 2020 05:36

Fix an issue that EXPLAIN CODEGEN doesn't show subquery code.

ea7def3

Add test.

b0d34e7

Add JIRA ID to the test.

09c6fd1

github-actions bot added the SQL label Dec 19, 2020

dongjoon-hyun reviewed Dec 19, 2020

View reviewed changes

sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala Outdated Show resolved Hide resolved

Remove SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false".

26551bf

maropu reviewed Dec 20, 2020

View reviewed changes

sarutak added 2 commits December 20, 2020 15:18

Apply the change to BenchmarkQueryTest too.

0f6a53b

Minor change.

022d363

maropu reviewed Dec 20, 2020

View reviewed changes

sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala Outdated Show resolved Hide resolved

sarutak added 2 commits December 21, 2020 02:44

Fix typo.

c338832

withTable -> withTempView

13cadeb

sarutak changed the title ~~[SPARK-33853][SQL] EXPLAIN CODEGEN doesn't show subquery code~~ [SPARK-33853][SQL] EXPLAIN CODEGEN and BenchmarkQueryTest don't show subquery code Dec 20, 2020

maropu approved these changes Dec 21, 2020

View reviewed changes

dongjoon-hyun approved these changes Dec 21, 2020

View reviewed changes

dongjoon-hyun closed this in f4e1069 Dec 21, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SPARK-33853][SQL] EXPLAIN CODEGEN and BenchmarkQueryTest don't show subquery code #30859

[SPARK-33853][SQL] EXPLAIN CODEGEN and BenchmarkQueryTest don't show subquery code #30859

sarutak commented Dec 19, 2020 •

edited

Loading

SparkQA commented Dec 20, 2020

maropu Dec 20, 2020

maropu Dec 20, 2020

maropu commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

maropu commented Dec 21, 2020

dongjoon-hyun commented Dec 21, 2020

SparkQA commented Dec 21, 2020

SparkQA commented Dec 21, 2020

SparkQA commented Dec 21, 2020

	val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
	plan foreach {
	case s: WholeStageCodegenExec =>
	codegenSubtrees += s
	case _ =>
	}

[SPARK-33853][SQL] EXPLAIN CODEGEN and BenchmarkQueryTest don't show subquery code #30859

[SPARK-33853][SQL] EXPLAIN CODEGEN and BenchmarkQueryTest don't show subquery code #30859

Conversation

sarutak commented Dec 19, 2020 • edited Loading

What changes were proposed in this pull request?

Why are the changes needed?

Does this PR introduce any user-facing change?

How was this patch tested?

SparkQA commented Dec 20, 2020

maropu Dec 20, 2020

Choose a reason for hiding this comment

maropu Dec 20, 2020

Choose a reason for hiding this comment

maropu commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

SparkQA commented Dec 20, 2020

maropu commented Dec 21, 2020

dongjoon-hyun commented Dec 21, 2020

SparkQA commented Dec 21, 2020

SparkQA commented Dec 21, 2020

SparkQA commented Dec 21, 2020

sarutak commented Dec 19, 2020 •

edited

Loading