Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support prql #14922

Merged
merged 3 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions src/query/ast/src/parser/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ pub enum Dialect {
PostgreSQL,
MySQL,
Hive,
PRQL,
Experimental,
}

Expand All @@ -95,31 +96,31 @@ impl Dialect {
Dialect::MySQL => c == '`',
Dialect::Hive => c == '`',
// TODO: remove '`' quote support once mysql handler correctly set mysql dialect.
Dialect::Experimental | Dialect::PostgreSQL => c == '"' || c == '`',
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => c == '"' || c == '`',
}
}

pub fn is_string_quote(&self, c: char) -> bool {
match self {
Dialect::MySQL => c == '\'' || c == '"',
Dialect::Hive => c == '\'' || c == '"',
Dialect::Experimental | Dialect::PostgreSQL => c == '\'',
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => c == '\'',
}
}

pub fn is_null_biggest(&self) -> bool {
match self {
Dialect::MySQL => false,
Dialect::Hive => false,
Dialect::Experimental | Dialect::PostgreSQL => true,
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => true,
}
}

pub fn substr_index_zero_literal_as_one(&self) -> bool {
match self {
Dialect::MySQL => false,
Dialect::Hive => true,
Dialect::Experimental | Dialect::PostgreSQL => false,
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => false,
}
}
}
2 changes: 1 addition & 1 deletion src/query/settings/src/settings_default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ impl DefaultSettings {
value: UserSettingValue::String("PostgreSQL".to_owned()),
desc: "Sets the SQL dialect. Available values include \"PostgreSQL\", \"MySQL\", \"Experimental\", and \"Hive\".",
mode: SettingMode::Both,
range: Some(SettingRange::String(vec!["PostgreSQL".into(), "MySQL".into(), "Experimental".into(), "Hive".into()])),
range: Some(SettingRange::String(vec!["PostgreSQL".into(), "MySQL".into(), "Experimental".into(), "Hive".into(), "Prql".into()])),
}),
("enable_dphyp", DefaultSettingValue {
value: UserSettingValue::UInt64(1),
Expand Down
1 change: 1 addition & 0 deletions src/query/settings/src/settings_getter_setter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ impl Settings {
"hive" => Ok(Dialect::Hive),
"mysql" => Ok(Dialect::MySQL),
"experimental" => Ok(Dialect::Experimental),
"prql" => Ok(Dialect::PRQL),
_ => Ok(Dialect::PostgreSQL),
}
}
Expand Down
1 change: 1 addition & 0 deletions src/query/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ opendal = { workspace = true }
ordered-float = { workspace = true }
parking_lot = { workspace = true }
percent-encoding = "2"
prqlc = "0.11.3"
regex = { workspace = true }
roaring = "0.10.1"
serde = { workspace = true }
Expand Down
46 changes: 39 additions & 7 deletions src/query/sql/src/planner/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ use databend_common_ast::parser::Dialect;
use databend_common_catalog::catalog::CatalogManager;
use databend_common_catalog::query_kind::QueryKind;
use databend_common_catalog::table_context::TableContext;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
use derive_visitor::DriveMut;
use log::info;
use log::warn;
use parking_lot::RwLock;

use super::semantic::AggregateRewriter;
Expand Down Expand Up @@ -67,8 +70,31 @@ impl Planner {
pub async fn plan_sql(&mut self, sql: &str) -> Result<(Plan, PlanExtras)> {
let settings = self.ctx.get_settings();
let sql_dialect = settings.get_sql_dialect()?;
// compile prql to sql for prql dialect
let mut prql_converted = false;
let final_sql: String = match sql_dialect == Dialect::PRQL {
true => {
let options = prqlc::Options::default();
match prqlc::compile(sql, &options) {
Ok(res) => {
info!("Try convert prql to sql succeed, generated sql: {}", &res);
prql_converted = true;
res
}
Err(e) => {
warn!(
sundy-li marked this conversation as resolved.
Show resolved Hide resolved
"Try convert prql to sql failed, still use raw sql to parse. error: {}",
e.to_string()
);
sql.to_string()
}
}
}
false => sql.to_string(),
};

// Step 1: Tokenize the SQL.
let mut tokenizer = Tokenizer::new(sql).peekable();
let mut tokenizer = Tokenizer::new(&final_sql).peekable();

// Only tokenize the beginning tokens for `INSERT INTO` statement because the rest tokens after `VALUES` is unused.
// Stop the tokenizer on unrecognized token because some values inputs (e.g. CSV) may not be valid for the tokenizer.
Expand All @@ -83,8 +109,8 @@ impl Planner {
(&mut tokenizer)
.take(PROBE_INSERT_INITIAL_TOKENS)
.take_while(|token| token.is_ok())
// Make sure the token stream is always ended with EOI.
.chain(std::iter::once(Ok(Token::new_eoi(sql))))
// Make sure the tokens stream is always ended with EOI.
.chain(std::iter::once(Ok(Token::new_eoi(&final_sql))))
.collect::<Result<_>>()
.unwrap()
} else {
Expand All @@ -101,6 +127,12 @@ impl Planner {
} else {
parse_sql(&tokens, sql_dialect)?
};
if !matches!(stmt, Statement::SetVariable { .. })
&& sql_dialect == Dialect::PRQL
&& !prql_converted
{
return Err(ErrorCode::SyntaxException("convert prql to sql failed."));
}

if matches!(stmt, Statement::CopyIntoLocation(_)) {
// Indicate binder there is no need to collect column statistics for the binding table.
Expand Down Expand Up @@ -161,15 +193,15 @@ impl Planner {
.take(tokens.len() * 2)
.take_while(|token| token.is_ok())
.map(|token| token.unwrap())
// Make sure the token stream is always ended with EOI.
.chain(std::iter::once(Token::new_eoi(sql)));
// Make sure the tokens stream is always ended with EOI.
.chain(std::iter::once(Token::new_eoi(&final_sql)));
tokens.extend(iter);
} else {
let iter = (&mut tokenizer)
.take_while(|token| token.is_ok())
.map(|token| token.unwrap())
// Make sure the token stream is always ended with EOI.
.chain(std::iter::once(Token::new_eoi(sql)));
// Make sure the tokens stream is always ended with EOI.
.chain(std::iter::once(Token::new_eoi(&final_sql)));
tokens.extend(iter);
};
} else {
Expand Down
56 changes: 56 additions & 0 deletions tests/sqllogictests/suites/query/02_function/02_0072_prql.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
statement ok
DROP DATABASE IF EXISTS prql

statement ok
CREATE DATABASE prql

statement ok
CREATE TABLE `prql`.`aboba` (`user_id` INT UNSIGNED NULL,`message` VARCHAR NULL,`creation_date` TIMESTAMP NULL,`metric` FLOAT NULL) ENGINE=FUSE CLUSTER BY (user_id)

statement ok
INSERT INTO `prql`.`aboba` (user_id, message, creation_date, metric) VALUES (101, 'xxx', to_datetime('2019-01-01 00:00:00'), -1.0), (102, 'yyy', to_datetime('2019-02-01 00:00:00'), 1.41421 ), (102, 'zzz', to_datetime('2019-03-01 00:00:00'), 2.718), (101, 'xyz', to_datetime('2019-05-01 00:00:00'), 3.14159), (103, 'qwer', to_datetime('2019-04-01 00:00:00'), 42)

statement ok
set sql_dialect = 'prql'

query ITIT
from `prql`.`aboba` | derive { a = 2, b = s"LEFT(message, 2)" } | select { user_id, message, a, b } | sort {user_id, message}
----
101 xxx 2 xx
101 xyz 2 xy
102 yyy 2 yy
102 zzz 2 zz
103 qwer 2 qw

query IR
from `prql`.`aboba` | filter user_id > 101 | group user_id ( aggregate { metrics = sum metric }) | sort {user_id}
----
102 4.132209897041321
103 42.0

query ITIR
from `prql`.`aboba` | select { user_id, message, metric } | derive creation_date = s"TO_UNIX_TIMESTAMP(creation_date)" | select { user_id, message, creation_date, metric} | sort { user_id, message }
----
101 xxx 1546300800 -1.0
101 xyz 1556668800 3.14159
102 yyy 1548979200 1.41421
102 zzz 1551398400 2.718
103 qwer 1554076800 42.0

statement error 1005
SELECT user_id, message, TO_UNIX_TIMESTAMP(creation_date) as creation_date, metric FROM `prql`.`aboba` order by user_id, message

statement ok
set sql_dialect = 'PostgreSQL'

query ITIR
SELECT user_id, message, TO_UNIX_TIMESTAMP(creation_date) as creation_date, metric FROM `prql`.`aboba` order by user_id, message
----
101 xxx 1546300800 -1.0
101 xyz 1556668800 3.14159
102 yyy 1548979200 1.41421
102 zzz 1551398400 2.718
103 qwer 1554076800 42.0

statement ok
DROP DATABASE prql
Loading