Fix explain plan formatting in sqllogictest (#6329)

* Fix explain plan formatting in sqllogictest * Use `-` to ensure plan indent is maintained * Refine * Update tets
apache · May 15, 2023 · 62621ee · 62621ee
1 parent 93ff57e
commit 62621ee
Show file tree

Hide file tree

Showing 9 changed files with 664 additions and 654 deletions.
diff --git a/datafusion/core/tests/sqllogictests/src/engines/datafusion/normalize.rs b/datafusion/core/tests/sqllogictests/src/engines/datafusion/normalize.rs
@@ -74,7 +74,7 @@ pub fn convert_batches(batches: Vec<RecordBatch>) -> Result<Vec<Vec<String>>> {
 ///   "Sort: d.b ASC NULLS LAST",
 /// ]
 /// [ <--- newly added row
-///   "  Projection: d.b, MAX(d.a) AS max_a",
+///   "|-- Projection: d.b, MAX(d.a) AS max_a",
 /// ]
 /// ```
 fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
@@ -92,7 +92,18 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
         }
 
         // form new rows with each additional line
-        let new_lines: Vec<_> = lines.into_iter().map(|l| vec![l.to_string()]).collect();
+        let new_lines: Vec<_> = lines
+            .into_iter()
+            .map(|l| {
+                // replace any leading spaces with '-' as
+                // `sqllogictest` ignores whitespace differences
+                //
+                // See https://github.com/apache/arrow-datafusion/issues/6328
+                let content = l.trim_start();
+                let new_prefix = "-".repeat(l.len() - content.len());
+                vec![format!("{new_prefix}{content}")]
+            })
+            .collect();
 
         Either::Right(once(row).chain(new_lines.into_iter()))
     } else {

diff --git a/datafusion/core/tests/sqllogictests/test_files/errors.slt b/datafusion/core/tests/sqllogictests/test_files/errors.slt
@@ -72,4 +72,3 @@ SELECT COUNT(*) FROM nonexistentcatalog.public.aggregate_test_100
 
 statement error Error during planning: Unsupported compound identifier '\[Ident \{ value: "way", quote_style: None \}, Ident \{ value: "too", quote_style: None \}, Ident \{ value: "many", quote_style: None \}, Ident \{ value: "namespaces", quote_style: None \}, Ident \{ value: "as", quote_style: None \}, Ident \{ value: "ident", quote_style: None \}, Ident \{ value: "prefixes", quote_style: None \}, Ident \{ value: "aggregate_test_100", quote_style: None \}\]'
 SELECT COUNT(*) FROM way.too.many.namespaces.as.ident.prefixes.aggregate_test_100
-
diff --git a/datafusion/core/tests/sqllogictests/test_files/explain.slt b/datafusion/core/tests/sqllogictests/test_files/explain.slt
@@ -40,14 +40,14 @@ explain SELECT c1 FROM aggregate_test_100 where c2 > 10
 ----
 logical_plan
 Projection: aggregate_test_100.c1
-  Filter: aggregate_test_100.c2 > Int8(10)
-    TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
+--Filter: aggregate_test_100.c2 > Int8(10)
+----TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
 physical_plan
 ProjectionExec: expr=[c1@0 as c1]
-  CoalesceBatchesExec: target_batch_size=8192
-    FilterExec: c2@1 > 10
-      RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-        CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], has_header=true
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: c2@1 > 10
+------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], has_header=true
 
 # explain_csv_exec_scan_config
 
@@ -77,11 +77,11 @@ explain SELECT c1 FROM aggregate_test_100_with_order order by c1 ASC limit 10
 ----
 logical_plan
 Limit: skip=0, fetch=10
-  Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
-    TableScan: aggregate_test_100_with_order projection=[c1]
+--Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
+----TableScan: aggregate_test_100_with_order projection=[c1]
 physical_plan
 GlobalLimitExec: skip=0, fetch=10
-  CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
+--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
 
 
 ## explain_physical_plan_only
@@ -94,7 +94,7 @@ EXPLAIN select count(*) from (values ('a', 1, 100), ('a', 2, 150)) as t (c1,c2,c
 ----
 physical_plan
 ProjectionExec: expr=[2 as COUNT(UInt8(1))]
-  EmptyExec: produce_one_row=true
+--EmptyExec: produce_one_row=true
 
 statement ok
 set datafusion.explain.physical_plan_only = false
@@ -139,7 +139,7 @@ EXPLAIN VERBOSE SELECT a, b, c FROM simple_explain_test
 ----
 initial_logical_plan
 Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
-  TableScan: simple_explain_test
+--TableScan: simple_explain_test
 logical_plan after inline_table_scan SAME TEXT AS ABOVE
 logical_plan after type_coercion SAME TEXT AS ABOVE
 logical_plan after count_wildcard_rule SAME TEXT AS ABOVE
@@ -170,7 +170,7 @@ logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
 logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
 logical_plan after push_down_projection
 Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
-  TableScan: simple_explain_test projection=[a, b, c]
+--TableScan: simple_explain_test projection=[a, b, c]
 logical_plan after eliminate_projection TableScan: simple_explain_test projection=[a, b, c]
 logical_plan after push_down_limit SAME TEXT AS ABOVE
 logical_plan after simplify_expressions SAME TEXT AS ABOVE

diff --git a/datafusion/core/tests/sqllogictests/test_files/groupby.slt b/datafusion/core/tests/sqllogictests/test_files/groupby.slt
@@ -1954,12 +1954,12 @@ EXPLAIN SELECT a, b,
 ----
 logical_plan
 Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotated_data_infinite2.c) AS summation1
-  Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
-    TableScan: annotated_data_infinite2 projection=[a, b, c]
+--Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
+----TableScan: annotated_data_infinite2 projection=[a, b, c]
 physical_plan
 ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1]
-  AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=FullyOrdered
-    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+--AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=FullyOrdered
+----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 
 query III
@@ -1984,12 +1984,12 @@ EXPLAIN SELECT a, d,
 ----
 logical_plan
 Projection: annotated_data_infinite2.a, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) AS summation1
-  Aggregate: groupBy=[[annotated_data_infinite2.d, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
-    TableScan: annotated_data_infinite2 projection=[a, c, d]
+--Aggregate: groupBy=[[annotated_data_infinite2.d, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
+----TableScan: annotated_data_infinite2 projection=[a, c, d]
 physical_plan
 ProjectionExec: expr=[a@1 as a, d@0 as d, SUM(annotated_data_infinite2.c)@2 as summation1]
-  AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=PartiallyOrdered
-    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true
+--AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=PartiallyOrdered
+----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 query III
 SELECT a, d,

diff --git a/datafusion/core/tests/sqllogictests/test_files/json.slt b/datafusion/core/tests/sqllogictests/test_files/json.slt
@@ -50,13 +50,13 @@ EXPLAIN SELECT count(*) from json_test
 ----
 logical_plan
 Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]
-  TableScan: json_test projection=[a]
+--TableScan: json_test projection=[a]
 physical_plan
 AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1))]
-  CoalescePartitionsExec
-    AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]
-      RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-        JsonExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/2.json]]}, projection=[a]
+--CoalescePartitionsExec
+----AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]
+------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------JsonExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/2.json]]}, projection=[a]
 
 query error DataFusion error: Schema error: No field named mycol\.
 SELECT mycol FROM single_nan

diff --git a/datafusion/core/tests/sqllogictests/test_files/order.slt b/datafusion/core/tests/sqllogictests/test_files/order.slt
@@ -160,12 +160,12 @@ explain SELECT c1, c2 FROM aggregate_test_100 ORDER BY c2, c3, c2
 ----
 logical_plan
 Projection: aggregate_test_100.c1, aggregate_test_100.c2
-  Sort: aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST
-    TableScan: aggregate_test_100 projection=[c1, c2, c3]
+--Sort: aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST
+----TableScan: aggregate_test_100 projection=[c1, c2, c3]
 physical_plan
 ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
-  SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST]
-    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true
+--SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST]
+----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true
 
 query II
 SELECT c2, c3 FROM aggregate_test_100 ORDER BY c2, c3, c2

diff --git a/datafusion/core/tests/sqllogictests/test_files/select.slt b/datafusion/core/tests/sqllogictests/test_files/select.slt
@@ -448,11 +448,11 @@ EXPLAIN SELECT c1 BETWEEN 2 AND 3 FROM select_between_data
 ----
 logical_plan
 Projection: select_between_data.c1 >= Int64(2) AND select_between_data.c1 <= Int64(3) AS select_between_data.c1 BETWEEN Int64(2) AND Int64(3)
-  TableScan: select_between_data projection=[c1]
+--TableScan: select_between_data projection=[c1]
 physical_plan
 ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as select_between_data.c1 BETWEEN Int64(2) AND Int64(3)]
-  RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-    MemoryExec: partitions=1, partition_sizes=[1]
+--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+----MemoryExec: partitions=1, partition_sizes=[1]
 
 
 # TODO: query_get_indexed_field
@@ -714,11 +714,11 @@ EXPLAIN SELECT a FROM annotated_data_finite2
 ----
 logical_plan
 Limit: skip=0, fetch=5
-  Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
-    TableScan: annotated_data_finite2 projection=[a]
+--Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
+----TableScan: annotated_data_finite2 projection=[a]
 physical_plan
 GlobalLimitExec: skip=0, fetch=5
-  CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 query I
 SELECT a FROM annotated_data_finite2