Skip to content

Commit

Permalink
[sqllogictest] Define output types and check them in tests (#5253)
Browse files Browse the repository at this point in the history
* [sqllogictest] Define output types and check them in tests

* Don't change imports

* Fix import

* Check types when results are empty

* Use Sqllite compatible types 'T','R','I'

* Update sqllogictest-rs to 0.13.0

* Complete types
  • Loading branch information
melgenek authored Feb 17, 2023
1 parent f154a9a commit 22b974f
Show file tree
Hide file tree
Showing 33 changed files with 492 additions and 512 deletions.
2 changes: 1 addition & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ postgres-protocol = "0.6.4"
postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] }
rstest = "0.16.0"
rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
sqllogictest = "0.12.0"
sqllogictest = "0.13.0"
test-utils = { path = "../../test-utils" }
thiserror = "1.0.37"
tokio-postgres = "0.7.7"
Expand Down
11 changes: 7 additions & 4 deletions datafusion/core/tests/sqllogictests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,13 @@ query <type_string> <sort_mode>

- `test_name`: Uniquely identify the test name (arrow-datafusion only)
- `type_string`: A short string that specifies the number of result columns and the expected datatype of each result column. There is one character in the <type_string> for each result column. The characters codes are:
- "T" for a text result,
- "I" for an integer result,
- "R" for a floating-point result,
- "?" for any other type.
- 'B' - **B**oolean,
- 'D' - **D**atetime,
- 'I' - **I**nteger,
- 'P' - timestam**P**,
- 'R' - floating-point results,
- 'T' - **T**ext,
- "?" - any other types
- `expected_result`: In the results section, some values are converted according to some rules:
- floating point values are rounded to the scale of "12",
- NULL values are rendered as `NULL`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
// under the License.

use super::error::Result;
use crate::engines::datafusion::error::DFSqlLogicTestError;
use crate::engines::datafusion::util::LogicTestContextProvider;
use crate::{engines::datafusion::error::DFSqlLogicTestError, output::DFOutput};
use crate::engines::output::DFOutput;
use datafusion::datasource::MemTable;
use datafusion::prelude::SessionContext;
use datafusion_common::{DataFusionError, OwnedTableReference};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
// under the License.

use super::error::Result;
use crate::{engines::datafusion::util::LogicTestContextProvider, output::DFOutput};
use crate::engines::datafusion::util::LogicTestContextProvider;
use crate::engines::output::DFOutput;
use arrow::record_batch::RecordBatch;
use datafusion::datasource::MemTable;
use datafusion::prelude::SessionContext;
Expand Down
14 changes: 11 additions & 3 deletions datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use std::path::PathBuf;
use std::time::Duration;

use crate::output::{DFColumnType, DFOutput};
use crate::engines::output::{DFColumnType, DFOutput};

use self::error::{DFSqlLogicTestError, Result};
use async_trait::async_trait;
Expand All @@ -27,6 +27,7 @@ use datafusion::arrow::record_batch::RecordBatch;
use datafusion::prelude::SessionContext;
use datafusion_sql::parser::{DFParser, Statement};
use insert::insert;
use sqllogictest::DBOutput;
use sqlparser::ast::Statement as SQLStatement;

mod create_table;
Expand Down Expand Up @@ -108,7 +109,14 @@ async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> Result<DFOut
}
}
let df = ctx.sql(sql.as_str()).await?;

let types = normalize::convert_schema_to_types(df.schema().fields());
let results: Vec<RecordBatch> = df.collect().await?;
let formatted_batches = normalize::convert_batches(results)?;
Ok(formatted_batches)
let rows = normalize::convert_batches(results)?;

if rows.is_empty() && types.is_empty() {
Ok(DBOutput::StatementComplete(0))
} else {
Ok(DBOutput::Rows { types, rows })
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,53 +17,43 @@

use arrow::datatypes::SchemaRef;
use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBatch};
use datafusion_common::DFField;
use datafusion_common::DataFusionError;
use lazy_static::lazy_static;
use sqllogictest::DBOutput;
use std::path::PathBuf;

use crate::output::{DFColumnType, DFOutput};
use crate::engines::output::DFColumnType;

use super::super::conversion::*;
use super::error::{DFSqlLogicTestError, Result};

/// Converts `batches` to a DBOutput as expected by sqllogicteset.
///
/// Assumes empty record batches are a successful statement completion
///
pub fn convert_batches(batches: Vec<RecordBatch>) -> Result<DFOutput> {
/// Converts `batches` to a result as expected by sqllogicteset.
pub fn convert_batches(batches: Vec<RecordBatch>) -> Result<Vec<Vec<String>>> {
if batches.is_empty() {
// DataFusion doesn't report number of rows complete
return Ok(DBOutput::StatementComplete(0));
}
Ok(vec![])
} else {
let schema = batches[0].schema();
let mut rows = vec![];
for batch in batches {
// Verify schema
if !equivalent_names_and_types(&schema, batch.schema()) {
return Err(DFSqlLogicTestError::DataFusion(DataFusionError::Internal(
format!(
"Schema mismatch. Previously had\n{:#?}\n\nGot:\n{:#?}",
&schema,
batch.schema()
),
)));
}

let schema = batches[0].schema();

// TODO: report the the actual types of the result
// https://github.com/apache/arrow-datafusion/issues/4499
let types = vec![DFColumnType::Any; batches[0].num_columns()];

let mut rows = vec![];
for batch in batches {
// Verify schema
if !equivalent_names_and_types(&schema, batch.schema()) {
return Err(DFSqlLogicTestError::DataFusion(DataFusionError::Internal(
format!(
"Schema mismatch. Previously had\n{:#?}\n\nGot:\n{:#?}",
&schema,
batch.schema()
),
)));
let new_rows = convert_batch(batch)?
.into_iter()
.flat_map(expand_row)
.map(normalize_paths);
rows.extend(new_rows);
}

let new_rows = convert_batch(batch)?
.into_iter()
.flat_map(expand_row)
.map(normalize_paths);
rows.extend(new_rows);
Ok(rows)
}

Ok(DBOutput::Rows { types, rows })
}

/// special case rows that have newlines in them (like explain plans)
Expand Down Expand Up @@ -233,3 +223,34 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result<String> {
.map_err(DFSqlLogicTestError::Arrow)
}
}

/// Converts columns to a result as expected by sqllogicteset.
pub fn convert_schema_to_types(columns: &[DFField]) -> Vec<DFColumnType> {
columns
.iter()
.map(|f| f.data_type())
.map(|data_type| match data_type {
DataType::Boolean => DFColumnType::Boolean,
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64 => DFColumnType::Integer,
DataType::Float16
| DataType::Float32
| DataType::Float64
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _) => DFColumnType::Float,
DataType::Utf8 | DataType::LargeUtf8 => DFColumnType::Text,
DataType::Date32
| DataType::Date64
| DataType::Time32(_)
| DataType::Time64(_) => DFColumnType::DateTime,
DataType::Timestamp(_, _) => DFColumnType::Timestamp,
_ => DFColumnType::Another,
})
.collect()
}
1 change: 1 addition & 0 deletions datafusion/core/tests/sqllogictests/src/engines/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@

mod conversion;
pub mod datafusion;
mod output;
pub mod postgres;
57 changes: 57 additions & 0 deletions datafusion/core/tests/sqllogictests/src/engines/output.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use sqllogictest::{ColumnType, DBOutput};

#[derive(Debug, PartialEq, Eq, Clone)]
pub enum DFColumnType {
Boolean,
DateTime,
Integer,
Float,
Text,
Timestamp,
Another,
}

impl ColumnType for DFColumnType {
fn from_char(value: char) -> Option<Self> {
match value {
'B' => Some(Self::Boolean),
'D' => Some(Self::DateTime),
'I' => Some(Self::Integer),
'P' => Some(Self::Timestamp),
'R' => Some(Self::Float),
'T' => Some(Self::Text),
_ => Some(Self::Another),
}
}

fn to_char(&self) -> char {
match self {
Self::Boolean => 'B',
Self::DateTime => 'D',
Self::Integer => 'I',
Self::Timestamp => 'P',
Self::Float => 'R',
Self::Text => 'T',
Self::Another => '?',
}
}
}

pub type DFOutput = DBOutput<DFColumnType>;
Loading

0 comments on commit 22b974f

Please sign in to comment.