From 98e146e29189865043b24f8cac7a7b6f5accbc35 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 18 Jul 2021 07:43:31 -0400 Subject: [PATCH 1/2] Move assert_batches_eq! macros to test_utils.rs --- datafusion/src/test/mod.rs | 69 ----------------------------------- datafusion/src/test_util.rs | 71 ++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/datafusion/src/test/mod.rs b/datafusion/src/test/mod.rs index b791551133e7..e9a33745eeeb 100644 --- a/datafusion/src/test/mod.rs +++ b/datafusion/src/test/mod.rs @@ -280,72 +280,3 @@ pub fn make_timestamps() -> RecordBatch { pub mod exec; pub mod user_defined; pub mod variable; - -/// Compares formatted output of a record batch with an expected -/// vector of strings, with the result of pretty formatting record -/// batches. This is a macro so errors appear on the correct line -/// -/// Designed so that failure output can be directly copy/pasted -/// into the test code as expected results. -/// -/// Expects to be called about like this: -/// -/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])` -#[macro_export] -macro_rules! assert_batches_eq { - ($EXPECTED_LINES: expr, $CHUNKS: expr) => { - let expected_lines: Vec = - $EXPECTED_LINES.iter().map(|&s| s.into()).collect(); - - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap(); - - let actual_lines: Vec<&str> = formatted.trim().lines().collect(); - - assert_eq!( - expected_lines, actual_lines, - "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", - expected_lines, actual_lines - ); - }; -} - -/// Compares formatted output of a record batch with an expected -/// vector of strings in a way that order does not matter. -/// This is a macro so errors appear on the correct line -/// -/// Designed so that failure output can be directly copy/pasted -/// into the test code as expected results. -/// -/// Expects to be called about like this: -/// -/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])` -#[macro_export] -macro_rules! assert_batches_sorted_eq { - ($EXPECTED_LINES: expr, $CHUNKS: expr) => { - let mut expected_lines: Vec = - $EXPECTED_LINES.iter().map(|&s| s.into()).collect(); - - // sort except for header + footer - let num_lines = expected_lines.len(); - if num_lines > 3 { - expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable() - } - - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap(); - // fix for windows: \r\n --> - - let mut actual_lines: Vec<&str> = formatted.trim().lines().collect(); - - // sort except for header + footer - let num_lines = actual_lines.len(); - if num_lines > 3 { - actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable() - } - - assert_eq!( - expected_lines, actual_lines, - "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", - expected_lines, actual_lines - ); - }; -} diff --git a/datafusion/src/test_util.rs b/datafusion/src/test_util.rs index e96e8e0c209f..0c9498acf920 100644 --- a/datafusion/src/test_util.rs +++ b/datafusion/src/test_util.rs @@ -15,10 +15,79 @@ // specific language governing permissions and limitations // under the License. -//! Utils to make testing easier +//! Utility functions to make testing DataFusion based crates easier use std::{env, error::Error, path::PathBuf}; +/// Compares formatted output of a record batch with an expected +/// vector of strings, with the result of pretty formatting record +/// batches. This is a macro so errors appear on the correct line +/// +/// Designed so that failure output can be directly copy/pasted +/// into the test code as expected results. +/// +/// Expects to be called about like this: +/// +/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])` +#[macro_export] +macro_rules! assert_batches_eq { + ($EXPECTED_LINES: expr, $CHUNKS: expr) => { + let expected_lines: Vec = + $EXPECTED_LINES.iter().map(|&s| s.into()).collect(); + + let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap(); + + let actual_lines: Vec<&str> = formatted.trim().lines().collect(); + + assert_eq!( + expected_lines, actual_lines, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected_lines, actual_lines + ); + }; +} + +/// Compares formatted output of a record batch with an expected +/// vector of strings in a way that order does not matter. +/// This is a macro so errors appear on the correct line +/// +/// Designed so that failure output can be directly copy/pasted +/// into the test code as expected results. +/// +/// Expects to be called about like this: +/// +/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])` +#[macro_export] +macro_rules! assert_batches_sorted_eq { + ($EXPECTED_LINES: expr, $CHUNKS: expr) => { + let mut expected_lines: Vec = + $EXPECTED_LINES.iter().map(|&s| s.into()).collect(); + + // sort except for header + footer + let num_lines = expected_lines.len(); + if num_lines > 3 { + expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable() + } + + let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap(); + // fix for windows: \r\n --> + + let mut actual_lines: Vec<&str> = formatted.trim().lines().collect(); + + // sort except for header + footer + let num_lines = actual_lines.len(); + if num_lines > 3 { + actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable() + } + + assert_eq!( + expected_lines, actual_lines, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected_lines, actual_lines + ); + }; +} + /// Returns the arrow test data directory, which is by default stored /// in a git submodule rooted at `testing/data`. /// From 6c806a719db25e371ac68a3af45c115e76a51ed1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 18 Jul 2021 07:55:01 -0400 Subject: [PATCH 2/2] port test --- datafusion/tests/sql.rs | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 9c7d0795edb9..342bf0e943ce 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -35,6 +35,7 @@ use arrow::{ util::display::array_value_to_string, }; +use datafusion::assert_batches_eq; use datafusion::logical_plan::LogicalPlan; use datafusion::prelude::*; use datafusion::{ @@ -112,19 +113,23 @@ async fn parquet_query() { // NOTE that string_col is actually a binary column and does not have the UTF8 logical type // so we need an explicit cast let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain"; - let actual = execute(&mut ctx, sql).await; + let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - vec!["4", "0"], - vec!["5", "1"], - vec!["6", "0"], - vec!["7", "1"], - vec!["2", "0"], - vec!["3", "1"], - vec!["0", "0"], - vec!["1", "1"], + "+----+--------------------------+", + "| id | CAST(string_col AS Utf8) |", + "+----+--------------------------+", + "| 4 | 0 |", + "| 5 | 1 |", + "| 6 | 0 |", + "| 7 | 1 |", + "| 2 | 0 |", + "| 3 | 1 |", + "| 0 | 0 |", + "| 1 | 1 |", + "+----+--------------------------+", ]; - assert_eq!(expected, actual); + assert_batches_eq!(expected, &actual); } #[tokio::test] @@ -2476,7 +2481,7 @@ fn register_alltypes_parquet(ctx: &mut ExecutionContext) { /// Execute query and return result set as 2-d table of Vecs /// `result[row][column]` -async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec> { +async fn execute_to_batches(ctx: &mut ExecutionContext, sql: &str) -> Vec { let msg = format!("Creating logical plan for '{}'", sql); let plan = ctx.create_logical_plan(sql).expect(&msg); let logical_schema = plan.schema(); @@ -2492,8 +2497,13 @@ async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec> { let results = collect(plan).await.expect(&msg); assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref()); + results +} - result_vec(&results) +/// Execute query and return result set as 2-d table of Vecs +/// `result[row][column]` +async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec> { + result_vec(&execute_to_batches(ctx, sql).await) } /// Specialised String representation