apache · xudong963 · Dec 22, 2022 · Dec 18, 2022 · Dec 18, 2022 · Dec 22, 2022
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
@@ -109,7 +109,7 @@ doc-comment = "0.3"
 env_logger = "0.10"
 parquet-test-utils = { path = "../../parquet-test-utils" }
 rstest = "0.16.0"
-sqllogictest = "0.9.0"
+sqllogictest = "0.10.0"
 sqlparser = "0.28"
 test-utils = { path = "../../test-utils" }
 thiserror = "1.0.37"

diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
@@ -1079,37 +1079,3 @@ async fn aggregate_with_alias() -> Result<()> {
     );
     Ok(())
 }
-
-#[tokio::test]
-async fn array_agg_zero() -> Result<()> {
-    let ctx = SessionContext::new();
-    // 2 different aggregate functions: avg and sum(distinct)
-    let sql = "SELECT ARRAY_AGG([]);";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = vec![
-        "+------------------------+",
-        "| ARRAYAGG(List([NULL])) |",
-        "+------------------------+",
-        "| []                     |",
-        "+------------------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn array_agg_one() -> Result<()> {
-    let ctx = SessionContext::new();
-    // 2 different aggregate functions: avg and sum(distinct)
-    let sql = "SELECT ARRAY_AGG([1]);";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = vec![
-        "+---------------------+",
-        "| ARRAYAGG(List([1])) |",
-        "+---------------------+",
-        "| [[1]]               |",
-        "+---------------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md
@@ -21,7 +21,11 @@
 
 This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). We use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) as a parser/runner of `.slt` files in `test_files`.
 
-#### Running tests
+#### Running tests: Validation Mode
+
+In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the fule to the output produced by that run.
+
+Run all tests suites in validation mode
 
 ```shell
 cargo test -p datafusion --test sqllogictests
@@ -40,6 +44,17 @@ Run only the tests in `information_schema.slt`:
 cargo test -p datafusion --test sqllogictests -- information
 ```
 
+#### Updating tests: Completion Mode
+
+In test script completion mode, `sqllogictests` reads a prototype script and runs the statements and queries against the database engine. The output is is a full script that is a copy of the prototype script with result inserted.
+
+You can update tests by passing the `--complete` argument.
+
+```shell
+# Update ddl.slt with output from running
+cargo test -p datafusion --test sqllogictests -- ddl
+```
+
 #### sqllogictests
 
 > :warning: **Warning**:Datafusion's sqllogictest implementation and migration is still in progress. Definitions taken from https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki

diff --git a/datafusion/core/tests/sqllogictests/src/error.rs b/datafusion/core/tests/sqllogictests/src/error.rs
@@ -38,6 +38,9 @@ pub enum DFSqlLogicTestError {
     /// Error from arrow-rs
     #[error("Arrow error: {0}")]
     Arrow(ArrowError),
+    /// Generic error
+    #[error("Other Error: {0}")]
+    Other(String),
 }
 
 impl From<TestError> for DFSqlLogicTestError {
@@ -63,3 +66,9 @@ impl From<ArrowError> for DFSqlLogicTestError {
         DFSqlLogicTestError::Arrow(value)
     }
 }
+
+impl From<String> for DFSqlLogicTestError {
+    fn from(value: String) -> Self {
+        DFSqlLogicTestError::Other(value)
+    }
+}
diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -23,7 +23,7 @@ use log::info;
 use normalize::convert_batches;
 use sqllogictest::DBOutput;
 use sqlparser::ast::Statement as SQLStatement;
-use std::path::{Path, PathBuf};
+use std::path::Path;
 use std::time::Duration;
 
 use crate::error::{DFSqlLogicTestError, Result};
@@ -78,78 +78,49 @@ pub async fn main() -> Result<()> {
 #[cfg(not(target_family = "windows"))]
 pub async fn main() -> Result<()> {
     // Enable logging (e.g. set RUST_LOG=debug to see debug logs)
+
+    use sqllogictest::{default_validator, update_test_file};
     env_logger::init();
 
-    // run each file using its own new DB
-    //
-    // Note: can't use tester.run_parallel_async()
-    // as that will reuse the same SessionContext
-    //
-    // We could run these tests in parallel eventually if we wanted.
+    let options = Options::new();
+
+    // default to all files in test directory filtering based on name
+    let files: Vec<_> = std::fs::read_dir(TEST_DIRECTORY)
+        .unwrap()
+        .map(|path| path.unwrap().path())
+        .filter(|path| options.check_test_file(path.as_path()))
+        .collect();
 
-    let files = get_test_files();
     info!("Running test files {:?}", files);
 
     for path in files {
         println!("Running: {}", path.display());
 
         let file_name = path.file_name().unwrap().to_str().unwrap().to_string();
 
+        // Create the test runner
         let ctx = context_for_test_file(&file_name).await;
-
-        let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
-        tester.run_file_async(path).await?;
+        let mut runner = sqllogictest::Runner::new(DataFusion { ctx, file_name });
+
+        // run each file using its own new DB
+        //
+        // We could run these tests in parallel eventually if we wanted.
+        if options.complete_mode {
+            info!("Using complete mode to complete {}", path.display());
+            let col_separator = " ";
+            let validator = default_validator;
+            update_test_file(path, runner, col_separator, validator)
+                .await
+                .map_err(|e| e.to_string())?;
+        } else {
+            // run the test normally:
+            runner.run_file_async(path).await?;
+        }
     }
 
     Ok(())
 }
 
-/// Gets a list of test files to execute. If there were arguments
-/// passed to the program treat it as a cargo test filter (substring match on filenames)
-fn get_test_files() -> Vec<PathBuf> {
-    info!("Test directory: {}", TEST_DIRECTORY);
-
-    let args: Vec<_> = std::env::args().collect();
-
-    // treat args after the first as filters to run (substring matching)
-    let filters = if !args.is_empty() {
-        args.iter()
-            .skip(1)
-            .map(|arg| arg.as_str())
-            .collect::<Vec<_>>()
-    } else {
-        vec![]
-    };
-
-    // default to all files in test directory filtering based on name
-    std::fs::read_dir(TEST_DIRECTORY)
-        .unwrap()
-        .map(|path| path.unwrap().path())
-        .filter(|path| check_test_file(&filters, path.as_path()))
-        .collect()
-}
-
-/// because this test can be run as a cargo test, commands like
-///
-/// ```shell
-/// cargo test foo
-/// ```
-///
-/// Will end up passing `foo` as a command line argument.
-///
-/// be compatible with this, treat the command line arguments as a
-/// filter and that does a substring match on each input.
-/// returns true f this path should be run
-fn check_test_file(filters: &[&str], path: &Path) -> bool {
-    if filters.is_empty() {
-        return true;
-    }
-
-    // otherwise check if any filter matches
-    let path_str = path.to_string_lossy();
-    filters.iter().any(|filter| path_str.contains(filter))
-}
-
 /// Create a SessionContext, configured for the specific test
 async fn context_for_test_file(file_name: &str) -> SessionContext {
     match file_name {
@@ -189,3 +160,60 @@ async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> Result<DBOut
     let formatted_batches = convert_batches(results)?;
     Ok(formatted_batches)
 }
+
+/// Parsed command line options
+struct Options {
+    // regex like
+    /// arguments passed to the program which are treated as
+    /// cargo test filter (substring match on filenames)
+    filters: Vec<String>,
+
+    /// Auto complete mode to fill out expected results
+    complete_mode: bool,
+}
+
+impl Options {
+    fn new() -> Self {
+        let args: Vec<_> = std::env::args().collect();
+
+        let complete_mode = args.iter().any(|a| a == "--complete");
+
+        // treat args after the first as filters to run (substring matching)
+        let filters = if !args.is_empty() {
+            args.into_iter()
+                .skip(1)
+                // ignore command line arguments like `--complete`
+                .filter(|arg| !arg.as_str().starts_with("--"))
+                .map(|arg| arg.to_string())
+                .collect::<Vec<_>>()
+        } else {
+            vec![]
+        };
+
+        Self {
+            filters,
+            complete_mode,
+        }
+    }
+
+    /// Because this test can be run as a cargo test, commands like
+    ///
+    /// ```shell
+    /// cargo test foo
+    /// ```
+    ///
+    /// Will end up passing `foo` as a command line argument.
+    ///
+    /// To be compatible with this, treat the command line arguments as a
+    /// filter and that does a substring match on each input.  returns
+    /// true f this path should be run
+    fn check_test_file(&self, path: &Path) -> bool {
+        if self.filters.is_empty() {
+            return true;
+        }
+
+        // otherwise check if any filter matches
+        let path_str = path.to_string_lossy();
+        self.filters.iter().any(|filter| path_str.contains(filter))
+    }
+}
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -1077,3 +1077,17 @@ b 5
 c 4
 d 4
 e 4
+
+
+
+# array_agg_zero
+query U
+SELECT ARRAY_AGG([]);
+----
+[]
+
+# array_agg_one
+query U
+SELECT ARRAY_AGG([1]);
+----
+[[1]]
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -411,5 +411,11 @@ SELECT * FROM aggregate_simple order by c1 LIMIT 1;
 ----
 0.00001 0.000000000001 true
 
+query CCC
+SELECT * FROM aggregate_simple order by c1 DESC LIMIT 1;
+----
+0.00005 0.000000000005 true
+
+
 statement ok
 DROP TABLE aggregate_simple