Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support alternate formats for unparsing datetime to timestamp and interval #11466

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions datafusion-examples/examples/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use datafusion::error::Result;

use datafusion::prelude::*;
use datafusion::sql::unparser::expr_to_sql;
use datafusion_sql::unparser::dialect::CustomDialect;
use datafusion_sql::unparser::dialect::CustomDialectBuilder;
use datafusion_sql::unparser::{plan_to_sql, Unparser};

/// This example demonstrates the programmatic construction of SQL strings using
Expand Down Expand Up @@ -80,7 +80,9 @@ fn simple_expr_to_pretty_sql_demo() -> Result<()> {
/// using a custom dialect and an explicit unparser
fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> {
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
let dialect = CustomDialect::new(Some('`'));
let dialect = CustomDialectBuilder::new()
.with_identifier_quote_style('`')
.build();
let unparser = Unparser::new(&dialect);
let sql = unparser.expr_to_sql(&expr)?.to_string();
assert_eq!(sql, r#"((`a` < 5) OR (`a` = 8))"#);
Expand Down
140 changes: 140 additions & 0 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,33 @@ pub trait Dialect {
fn supports_nulls_first_in_sort(&self) -> bool {
true
}

// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
// E.g. Trino, Athena and Dremio does not have DATETIME data type
fn use_timestamp_for_date64(&self) -> bool {
false
}

fn interval_style(&self) -> IntervalStyle {
IntervalStyle::PostgresVerbose
}
}

/// `IntervalStyle` to use for unparsing
///
/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
/// different DBMS follows different standards, popular ones are:
/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
/// compatible with arrow display format, as well as duckdb
/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
#[derive(Clone, Copy)]
pub enum IntervalStyle {
PostgresVerbose,
SQLStandard,
MySQL,
}

pub struct DefaultDialect {}

impl Dialect for DefaultDialect {
Expand All @@ -57,6 +83,10 @@ impl Dialect for PostgreSqlDialect {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('"')
}

fn interval_style(&self) -> IntervalStyle {
IntervalStyle::PostgresVerbose
}
}

pub struct MySqlDialect {}
Expand All @@ -69,6 +99,10 @@ impl Dialect for MySqlDialect {
fn supports_nulls_first_in_sort(&self) -> bool {
false
}

fn interval_style(&self) -> IntervalStyle {
IntervalStyle::MySQL
}
}

pub struct SqliteDialect {}
Expand All @@ -81,12 +115,29 @@ impl Dialect for SqliteDialect {

pub struct CustomDialect {
identifier_quote_style: Option<char>,
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
interval_style: IntervalStyle,
}

impl Default for CustomDialect {
fn default() -> Self {
Self {
identifier_quote_style: None,
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::SQLStandard,
}
}
}

impl CustomDialect {
// create a CustomDialect
#[deprecated(note = "please use `CustomDialectBuilder` instead")]
pub fn new(identifier_quote_style: Option<char>) -> Self {
Self {
identifier_quote_style,
..Default::default()
}
}
}
Expand All @@ -95,4 +146,93 @@ impl Dialect for CustomDialect {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
self.identifier_quote_style
}

fn supports_nulls_first_in_sort(&self) -> bool {
self.supports_nulls_first_in_sort
}

fn use_timestamp_for_date64(&self) -> bool {
self.use_timestamp_for_date64
}

fn interval_style(&self) -> IntervalStyle {
self.interval_style
}
}

/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
///
///
/// # Examples
///
/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
/// but with `use_timestamp_for_date64` overridden to `true`
///
/// ```
/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
/// let dialect = CustomDialectBuilder::new()
/// .with_use_timestamp_for_date64(true)
/// .build();
/// ```
pub struct CustomDialectBuilder {
identifier_quote_style: Option<char>,
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
interval_style: IntervalStyle,
}

impl Default for CustomDialectBuilder {
fn default() -> Self {
Self::new()
}
}

impl CustomDialectBuilder {
pub fn new() -> Self {
Self {
identifier_quote_style: None,
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::PostgresVerbose,
}
}

pub fn build(self) -> CustomDialect {
CustomDialect {
identifier_quote_style: self.identifier_quote_style,
supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
use_timestamp_for_date64: self.use_timestamp_for_date64,
interval_style: self.interval_style,
}
}

/// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
self.identifier_quote_style = Some(identifier_quote_style);
self
}

/// Customize the dialect to supports `NULLS FIRST` in `ORDER BY` clauses
pub fn with_supports_nulls_first_in_sort(
mut self,
supports_nulls_first_in_sort: bool,
) -> Self {
self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
self
}

/// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
pub fn with_use_timestamp_for_date64(
mut self,
use_timestamp_for_date64: bool,
) -> Self {
self.use_timestamp_for_date64 = use_timestamp_for_date64;
self
}

/// Customize the dialect with a specific interval style listed in `IntervalStyle`
pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
self.interval_style = interval_style;
self
}
}
Loading