apache · alamb · May 15, 2023 · May 10, 2023 · May 12, 2023 · May 12, 2023
diff --git a/datafusion/core/tests/sqllogictests/src/engines/datafusion/normalize.rs b/datafusion/core/tests/sqllogictests/src/engines/datafusion/normalize.rs
@@ -74,7 +74,7 @@ pub fn convert_batches(batches: Vec<RecordBatch>) -> Result<Vec<Vec<String>>> {
 ///   "Sort: d.b ASC NULLS LAST",
 /// ]
 /// [ <--- newly added row
-///   "  Projection: d.b, MAX(d.a) AS max_a",
+///   "|  Projection: d.b, MAX(d.a) AS max_a",
 /// ]
 /// ```
 fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
@@ -92,7 +92,15 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
         }
 
         // form new rows with each additional line
-        let new_lines: Vec<_> = lines.into_iter().map(|l| vec![l.to_string()]).collect();
+        let new_lines: Vec<_> = lines
+            .into_iter()
+            .map(|l| {
+                // Add a prefix "|" so that sqllogictest --complete doesn't
+                // strip the leading space
+                // See https://github.com/apache/arrow-datafusion/issues/6328
+                vec![format!("|{l}")]
+            })
+            .collect();
 
         Either::Right(once(row).chain(new_lines.into_iter()))
     } else {

diff --git a/datafusion/core/tests/sqllogictests/test_files/errors.slt b/datafusion/core/tests/sqllogictests/test_files/errors.slt
@@ -72,4 +72,3 @@ SELECT COUNT(*) FROM nonexistentcatalog.public.aggregate_test_100
 
 statement error Error during planning: Unsupported compound identifier '\[Ident \{ value: "way", quote_style: None \}, Ident \{ value: "too", quote_style: None \}, Ident \{ value: "many", quote_style: None \}, Ident \{ value: "namespaces", quote_style: None \}, Ident \{ value: "as", quote_style: None \}, Ident \{ value: "ident", quote_style: None \}, Ident \{ value: "prefixes", quote_style: None \}, Ident \{ value: "aggregate_test_100", quote_style: None \}\]'
 SELECT COUNT(*) FROM way.too.many.namespaces.as.ident.prefixes.aggregate_test_100
-
diff --git a/datafusion/core/tests/sqllogictests/test_files/explain.slt b/datafusion/core/tests/sqllogictests/test_files/explain.slt
@@ -39,15 +39,15 @@ query TT
 explain SELECT c1 FROM aggregate_test_100 where c2 > 10
 ----
 logical_plan
-Projection: aggregate_test_100.c1
-  Filter: aggregate_test_100.c2 > Int8(10)
-    TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
+|Projection: aggregate_test_100.c1
+|  Filter: aggregate_test_100.c2 > Int8(10)
+|    TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
 physical_plan
-ProjectionExec: expr=[c1@0 as c1]
-  CoalesceBatchesExec: target_batch_size=8192
-    FilterExec: c2@1 > 10
-      RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-        CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], has_header=true
+|ProjectionExec: expr=[c1@0 as c1]
+|  CoalesceBatchesExec: target_batch_size=8192
+|    FilterExec: c2@1 > 10
+|      RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+|        CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], has_header=true
 
 # explain_csv_exec_scan_config
 
@@ -76,12 +76,12 @@ query TT
 explain SELECT c1 FROM aggregate_test_100_with_order order by c1 ASC limit 10
 ----
 logical_plan
-Limit: skip=0, fetch=10
-  Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
-    TableScan: aggregate_test_100_with_order projection=[c1]
+|Limit: skip=0, fetch=10
+|  Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
+|    TableScan: aggregate_test_100_with_order projection=[c1]
 physical_plan
-GlobalLimitExec: skip=0, fetch=10
-  CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
+|GlobalLimitExec: skip=0, fetch=10
+|  CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
 
 
 ## explain_physical_plan_only
@@ -93,8 +93,8 @@ query TT
 EXPLAIN select count(*) from (values ('a', 1, 100), ('a', 2, 150)) as t (c1,c2,c3)
 ----
 physical_plan
-ProjectionExec: expr=[2 as COUNT(UInt8(1))]
-  EmptyExec: produce_one_row=true
+|ProjectionExec: expr=[2 as COUNT(UInt8(1))]
+|  EmptyExec: produce_one_row=true
 
 statement ok
 set datafusion.explain.physical_plan_only = false

diff --git a/datafusion/core/tests/sqllogictests/test_files/groupby.slt b/datafusion/core/tests/sqllogictests/test_files/groupby.slt
@@ -1953,13 +1953,13 @@ EXPLAIN SELECT a, b,
  GROUP BY b, a
 ----
 logical_plan
-Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotated_data_infinite2.c) AS summation1
-  Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
-    TableScan: annotated_data_infinite2 projection=[a, b, c]
+|Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotated_data_infinite2.c) AS summation1
+|  Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
+|    TableScan: annotated_data_infinite2 projection=[a, b, c]
 physical_plan
-ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1]
-  AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=FullyOrdered
-    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+|ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1]
+|  AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=FullyOrdered
+|    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 
 query III
@@ -1983,13 +1983,13 @@ EXPLAIN SELECT a, d,
  GROUP BY d, a
 ----
 logical_plan
-Projection: annotated_data_infinite2.a, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) AS summation1
-  Aggregate: groupBy=[[annotated_data_infinite2.d, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
-    TableScan: annotated_data_infinite2 projection=[a, c, d]
+|Projection: annotated_data_infinite2.a, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) AS summation1
+|  Aggregate: groupBy=[[annotated_data_infinite2.d, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
+|    TableScan: annotated_data_infinite2 projection=[a, c, d]
 physical_plan
-ProjectionExec: expr=[a@1 as a, d@0 as d, SUM(annotated_data_infinite2.c)@2 as summation1]
-  AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=PartiallyOrdered
-    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true
+|ProjectionExec: expr=[a@1 as a, d@0 as d, SUM(annotated_data_infinite2.c)@2 as summation1]
+|  AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=PartiallyOrdered
+|    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 query III
 SELECT a, d,

diff --git a/datafusion/core/tests/sqllogictests/test_files/json.slt b/datafusion/core/tests/sqllogictests/test_files/json.slt
@@ -49,14 +49,14 @@ query TT
 EXPLAIN SELECT count(*) from json_test
 ----
 logical_plan
-Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]
-  TableScan: json_test projection=[a]
+|Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]
+|  TableScan: json_test projection=[a]
 physical_plan
-AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1))]
-  CoalescePartitionsExec
-    AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]
-      RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-        JsonExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/2.json]]}, projection=[a]
+|AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1))]
+|  CoalescePartitionsExec
+|    AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]
+|      RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+|        JsonExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/2.json]]}, projection=[a]
 
 query error DataFusion error: Schema error: No field named mycol\.
 SELECT mycol FROM single_nan

diff --git a/datafusion/core/tests/sqllogictests/test_files/order.slt b/datafusion/core/tests/sqllogictests/test_files/order.slt
@@ -159,13 +159,13 @@ query TT
 explain SELECT c1, c2 FROM aggregate_test_100 ORDER BY c2, c3, c2
 ----
 logical_plan
-Projection: aggregate_test_100.c1, aggregate_test_100.c2
-  Sort: aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST
-    TableScan: aggregate_test_100 projection=[c1, c2, c3]
+|Projection: aggregate_test_100.c1, aggregate_test_100.c2
+|  Sort: aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST
+|    TableScan: aggregate_test_100 projection=[c1, c2, c3]
 physical_plan
-ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
-  SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST]
-    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true
+|ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
+|  SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST]
+|    CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true
 
 query II
 SELECT c2, c3 FROM aggregate_test_100 ORDER BY c2, c3, c2

diff --git a/datafusion/core/tests/sqllogictests/test_files/select.slt b/datafusion/core/tests/sqllogictests/test_files/select.slt
@@ -447,12 +447,12 @@ query TT
 EXPLAIN SELECT c1 BETWEEN 2 AND 3 FROM select_between_data
 ----
 logical_plan
-Projection: select_between_data.c1 >= Int64(2) AND select_between_data.c1 <= Int64(3) AS select_between_data.c1 BETWEEN Int64(2) AND Int64(3)
-  TableScan: select_between_data projection=[c1]
+|Projection: select_between_data.c1 >= Int64(2) AND select_between_data.c1 <= Int64(3) AS select_between_data.c1 BETWEEN Int64(2) AND Int64(3)
+|  TableScan: select_between_data projection=[c1]
 physical_plan
-ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as select_between_data.c1 BETWEEN Int64(2) AND Int64(3)]
-  RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-    MemoryExec: partitions=1, partition_sizes=[1]
+|ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as select_between_data.c1 BETWEEN Int64(2) AND Int64(3)]
+|  RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+|    MemoryExec: partitions=1, partition_sizes=[1]
 
 
 # TODO: query_get_indexed_field
@@ -713,12 +713,12 @@ EXPLAIN SELECT a FROM annotated_data_finite2
         LIMIT 5
 ----
 logical_plan
-Limit: skip=0, fetch=5
-  Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
-    TableScan: annotated_data_finite2 projection=[a]
+|Limit: skip=0, fetch=5
+|  Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
+|    TableScan: annotated_data_finite2 projection=[a]
 physical_plan
-GlobalLimitExec: skip=0, fetch=5
-  CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+|GlobalLimitExec: skip=0, fetch=5
+|  CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 query I
 SELECT a FROM annotated_data_finite2