Skip to content

Commit

Permalink
Fix explain plan formatting in sqllogictest (#6329)
Browse files Browse the repository at this point in the history
* Fix explain plan formatting in sqllogictest

* Use `-` to ensure plan indent is maintained

* Refine

* Update tets
  • Loading branch information
alamb authored May 15, 2023
1 parent 93ff57e commit 62621ee
Show file tree
Hide file tree
Showing 9 changed files with 664 additions and 654 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ pub fn convert_batches(batches: Vec<RecordBatch>) -> Result<Vec<Vec<String>>> {
/// "Sort: d.b ASC NULLS LAST",
/// ]
/// [ <--- newly added row
/// " Projection: d.b, MAX(d.a) AS max_a",
/// "|-- Projection: d.b, MAX(d.a) AS max_a",
/// ]
/// ```
fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
Expand All @@ -92,7 +92,18 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
}

// form new rows with each additional line
let new_lines: Vec<_> = lines.into_iter().map(|l| vec![l.to_string()]).collect();
let new_lines: Vec<_> = lines
.into_iter()
.map(|l| {
// replace any leading spaces with '-' as
// `sqllogictest` ignores whitespace differences
//
// See https://github.com/apache/arrow-datafusion/issues/6328
let content = l.trim_start();
let new_prefix = "-".repeat(l.len() - content.len());
vec![format!("{new_prefix}{content}")]
})
.collect();

Either::Right(once(row).chain(new_lines.into_iter()))
} else {
Expand Down
1 change: 0 additions & 1 deletion datafusion/core/tests/sqllogictests/test_files/errors.slt
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,3 @@ SELECT COUNT(*) FROM nonexistentcatalog.public.aggregate_test_100

statement error Error during planning: Unsupported compound identifier '\[Ident \{ value: "way", quote_style: None \}, Ident \{ value: "too", quote_style: None \}, Ident \{ value: "many", quote_style: None \}, Ident \{ value: "namespaces", quote_style: None \}, Ident \{ value: "as", quote_style: None \}, Ident \{ value: "ident", quote_style: None \}, Ident \{ value: "prefixes", quote_style: None \}, Ident \{ value: "aggregate_test_100", quote_style: None \}\]'
SELECT COUNT(*) FROM way.too.many.namespaces.as.ident.prefixes.aggregate_test_100

24 changes: 12 additions & 12 deletions datafusion/core/tests/sqllogictests/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ explain SELECT c1 FROM aggregate_test_100 where c2 > 10
----
logical_plan
Projection: aggregate_test_100.c1
Filter: aggregate_test_100.c2 > Int8(10)
TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
--Filter: aggregate_test_100.c2 > Int8(10)
----TableScan: aggregate_test_100 projection=[c1, c2], partial_filters=[aggregate_test_100.c2 > Int8(10)]
physical_plan
ProjectionExec: expr=[c1@0 as c1]
CoalesceBatchesExec: target_batch_size=8192
FilterExec: c2@1 > 10
RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], has_header=true
--CoalesceBatchesExec: target_batch_size=8192
----FilterExec: c2@1 > 10
------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], has_header=true

# explain_csv_exec_scan_config

Expand Down Expand Up @@ -77,11 +77,11 @@ explain SELECT c1 FROM aggregate_test_100_with_order order by c1 ASC limit 10
----
logical_plan
Limit: skip=0, fetch=10
Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
TableScan: aggregate_test_100_with_order projection=[c1]
--Sort: aggregate_test_100_with_order.c1 ASC NULLS LAST, fetch=10
----TableScan: aggregate_test_100_with_order projection=[c1]
physical_plan
GlobalLimitExec: skip=0, fetch=10
CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true


## explain_physical_plan_only
Expand All @@ -94,7 +94,7 @@ EXPLAIN select count(*) from (values ('a', 1, 100), ('a', 2, 150)) as t (c1,c2,c
----
physical_plan
ProjectionExec: expr=[2 as COUNT(UInt8(1))]
EmptyExec: produce_one_row=true
--EmptyExec: produce_one_row=true

statement ok
set datafusion.explain.physical_plan_only = false
Expand Down Expand Up @@ -139,7 +139,7 @@ EXPLAIN VERBOSE SELECT a, b, c FROM simple_explain_test
----
initial_logical_plan
Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
TableScan: simple_explain_test
--TableScan: simple_explain_test
logical_plan after inline_table_scan SAME TEXT AS ABOVE
logical_plan after type_coercion SAME TEXT AS ABOVE
logical_plan after count_wildcard_rule SAME TEXT AS ABOVE
Expand Down Expand Up @@ -170,7 +170,7 @@ logical_plan after unwrap_cast_in_comparison SAME TEXT AS ABOVE
logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
logical_plan after push_down_projection
Projection: simple_explain_test.a, simple_explain_test.b, simple_explain_test.c
TableScan: simple_explain_test projection=[a, b, c]
--TableScan: simple_explain_test projection=[a, b, c]
logical_plan after eliminate_projection TableScan: simple_explain_test projection=[a, b, c]
logical_plan after push_down_limit SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
Expand Down
16 changes: 8 additions & 8 deletions datafusion/core/tests/sqllogictests/test_files/groupby.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1954,12 +1954,12 @@ EXPLAIN SELECT a, b,
----
logical_plan
Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotated_data_infinite2.c) AS summation1
Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
TableScan: annotated_data_infinite2 projection=[a, b, c]
--Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
----TableScan: annotated_data_infinite2 projection=[a, b, c]
physical_plan
ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1]
AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=FullyOrdered
CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
--AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=FullyOrdered
----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true


query III
Expand All @@ -1984,12 +1984,12 @@ EXPLAIN SELECT a, d,
----
logical_plan
Projection: annotated_data_infinite2.a, annotated_data_infinite2.d, SUM(annotated_data_infinite2.c) AS summation1
Aggregate: groupBy=[[annotated_data_infinite2.d, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
TableScan: annotated_data_infinite2 projection=[a, c, d]
--Aggregate: groupBy=[[annotated_data_infinite2.d, annotated_data_infinite2.a]], aggr=[[SUM(annotated_data_infinite2.c)]]
----TableScan: annotated_data_infinite2 projection=[a, c, d]
physical_plan
ProjectionExec: expr=[a@1 as a, d@0 as d, SUM(annotated_data_infinite2.c)@2 as summation1]
AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=PartiallyOrdered
CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true
--AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=PartiallyOrdered
----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true

query III
SELECT a, d,
Expand Down
10 changes: 5 additions & 5 deletions datafusion/core/tests/sqllogictests/test_files/json.slt
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ EXPLAIN SELECT count(*) from json_test
----
logical_plan
Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]
TableScan: json_test projection=[a]
--TableScan: json_test projection=[a]
physical_plan
AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1))]
CoalescePartitionsExec
AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]
RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
JsonExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/2.json]]}, projection=[a]
--CoalescePartitionsExec
----AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]
------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
--------JsonExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/2.json]]}, projection=[a]

query error DataFusion error: Schema error: No field named mycol\.
SELECT mycol FROM single_nan
Expand Down
8 changes: 4 additions & 4 deletions datafusion/core/tests/sqllogictests/test_files/order.slt
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,12 @@ explain SELECT c1, c2 FROM aggregate_test_100 ORDER BY c2, c3, c2
----
logical_plan
Projection: aggregate_test_100.c1, aggregate_test_100.c2
Sort: aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST
TableScan: aggregate_test_100 projection=[c1, c2, c3]
--Sort: aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST
----TableScan: aggregate_test_100 projection=[c1, c2, c3]
physical_plan
ProjectionExec: expr=[c1@0 as c1, c2@1 as c2]
SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST]
CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true
--SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST]
----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true

query II
SELECT c2, c3 FROM aggregate_test_100 ORDER BY c2, c3, c2
Expand Down
12 changes: 6 additions & 6 deletions datafusion/core/tests/sqllogictests/test_files/select.slt
Original file line number Diff line number Diff line change
Expand Up @@ -448,11 +448,11 @@ EXPLAIN SELECT c1 BETWEEN 2 AND 3 FROM select_between_data
----
logical_plan
Projection: select_between_data.c1 >= Int64(2) AND select_between_data.c1 <= Int64(3) AS select_between_data.c1 BETWEEN Int64(2) AND Int64(3)
TableScan: select_between_data projection=[c1]
--TableScan: select_between_data projection=[c1]
physical_plan
ProjectionExec: expr=[c1@0 >= 2 AND c1@0 <= 3 as select_between_data.c1 BETWEEN Int64(2) AND Int64(3)]
RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
MemoryExec: partitions=1, partition_sizes=[1]
--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
----MemoryExec: partitions=1, partition_sizes=[1]


# TODO: query_get_indexed_field
Expand Down Expand Up @@ -714,11 +714,11 @@ EXPLAIN SELECT a FROM annotated_data_finite2
----
logical_plan
Limit: skip=0, fetch=5
Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
TableScan: annotated_data_finite2 projection=[a]
--Sort: annotated_data_finite2.a ASC NULLS LAST, fetch=5
----TableScan: annotated_data_finite2 projection=[a]
physical_plan
GlobalLimitExec: skip=0, fetch=5
CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC NULLS LAST], has_header=true
--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC NULLS LAST], has_header=true

query I
SELECT a FROM annotated_data_finite2
Expand Down
Loading

0 comments on commit 62621ee

Please sign in to comment.