-
Notifications
You must be signed in to change notification settings - Fork 558
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for ClickHouse FORMAT
on INSERT
#1628
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -33,10 +33,10 @@ pub use super::ddl::{ColumnDef, TableConstraint}; | |||||
|
||||||
use super::{ | ||||||
display_comma_separated, display_separated, ClusteredBy, CommentDef, Expr, FileFormat, | ||||||
FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, | ||||||
FormatClause, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, | ||||||
InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens, | ||||||
OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine, | ||||||
TableWithJoins, Tag, WrappedCollection, | ||||||
OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption, SqliteOnConflict, | ||||||
TableEngine, TableWithJoins, Tag, WrappedCollection, | ||||||
}; | ||||||
|
||||||
/// CREATE INDEX statement. | ||||||
|
@@ -495,6 +495,20 @@ pub struct Insert { | |||||
pub priority: Option<MysqlInsertPriority>, | ||||||
/// Only for mysql | ||||||
pub insert_alias: Option<InsertAliases>, | ||||||
/// Settings used in together with a specified `FORMAT`. | ||||||
/// | ||||||
/// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'` | ||||||
/// | ||||||
/// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into) | ||||||
/// [ClickHouse Formats](https://clickhouse.com/docs/en/interfaces/formats) | ||||||
pub settings: Option<Vec<Setting>>, | ||||||
/// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`, | ||||||
/// `JSON`, `JSONAsString`, `LineAsString` and more. | ||||||
/// | ||||||
/// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}` | ||||||
/// | ||||||
/// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data) | ||||||
pub format_clause: Option<FormatClause>, | ||||||
} | ||||||
|
||||||
impl Display for Insert { | ||||||
|
@@ -547,7 +561,15 @@ impl Display for Insert { | |||||
write!(f, "{source}")?; | ||||||
} | ||||||
|
||||||
if self.source.is_none() && self.columns.is_empty() { | ||||||
if let Some(settings) = &self.settings { | ||||||
write!(f, "SETTINGS {} ", display_comma_separated(settings))?; | ||||||
} | ||||||
|
||||||
if let Some(format_clause) = &self.format_clause { | ||||||
write!(f, "{format_clause}")?; | ||||||
} | ||||||
|
||||||
if self.source.is_none() && self.columns.is_empty() && self.format_clause.is_none() { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
thinking the format isn't necessarily tied to the columns? |
||||||
write!(f, "DEFAULT VALUES")?; | ||||||
} | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2465,14 +2465,25 @@ impl fmt::Display for GroupByExpr { | |
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||
pub enum FormatClause { | ||
Identifier(Ident), | ||
Identifier { | ||
ident: Ident, | ||
expr: Option<Vec<Expr>>, | ||
}, | ||
Null, | ||
} | ||
Comment on lines
2467
to
2473
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it doesn't look correct to reuse this struct? The existing FormatClause is currently used on a query which has different variants/syntax than on insert? maybe we can do something like struct OutputFormatClause {
name: Ident
values: Vec<Expr>
} can we also add doc comments describing the struct/properties with a link to the doc? |
||
|
||
impl fmt::Display for FormatClause { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self { | ||
FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident), | ||
FormatClause::Identifier { ident, expr } => { | ||
write!(f, "FORMAT {}", ident)?; | ||
|
||
if let Some(exprs) = expr { | ||
write!(f, " {}", display_comma_separated(exprs))?; | ||
} | ||
|
||
Ok(()) | ||
} | ||
FormatClause::Null => write!(f, "FORMAT NULL"), | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -931,7 +931,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ | |||||
Keyword::PREWHERE, | ||||||
// for ClickHouse SELECT * FROM t SETTINGS ... | ||||||
Keyword::SETTINGS, | ||||||
// for ClickHouse SELECT * FROM t FORMAT... | ||||||
// for ClickHouse SELECT * FROM t FORMAT... or INSERT INTO t FORMAT... | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
we could also remove the full description, the keyword isn't really tied to any specific syntax/dialect |
||||||
Keyword::FORMAT, | ||||||
// for Snowflake START WITH .. CONNECT BY | ||||||
Keyword::START, | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9570,12 +9570,7 @@ impl<'a> Parser<'a> { | |
let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) | ||
&& self.parse_keyword(Keyword::FORMAT) | ||
{ | ||
if self.parse_keyword(Keyword::NULL) { | ||
Some(FormatClause::Null) | ||
} else { | ||
let ident = self.parse_identifier()?; | ||
Some(FormatClause::Identifier(ident)) | ||
} | ||
Some(self.parse_format_clause(false)?) | ||
} else { | ||
None | ||
}; | ||
|
@@ -11824,30 +11819,53 @@ impl<'a> Parser<'a> { | |
|
||
let is_mysql = dialect_of!(self is MySqlDialect); | ||
|
||
let (columns, partitioned, after_columns, source) = | ||
if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { | ||
(vec![], None, vec![], None) | ||
let (columns, partitioned, after_columns, source) = if self | ||
.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) | ||
{ | ||
(vec![], None, vec![], None) | ||
} else { | ||
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { | ||
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; | ||
|
||
let partitioned = self.parse_insert_partition()?; | ||
// Hive allows you to specify columns after partitions as well if you want. | ||
let after_columns = if dialect_of!(self is HiveDialect) { | ||
self.parse_parenthesized_column_list(Optional, false)? | ||
} else { | ||
vec![] | ||
}; | ||
(columns, partitioned, after_columns) | ||
} else { | ||
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { | ||
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; | ||
Default::default() | ||
}; | ||
|
||
let partitioned = self.parse_insert_partition()?; | ||
// Hive allows you to specify columns after partitions as well if you want. | ||
let after_columns = if dialect_of!(self is HiveDialect) { | ||
self.parse_parenthesized_column_list(Optional, false)? | ||
} else { | ||
vec![] | ||
}; | ||
(columns, partitioned, after_columns) | ||
let source = | ||
if self.peek_keyword(Keyword::FORMAT) || self.peek_keyword(Keyword::SETTINGS) { | ||
None | ||
} else { | ||
Default::default() | ||
Some(self.parse_query()?) | ||
}; | ||
|
||
let source = Some(self.parse_query()?); | ||
(columns, partitioned, after_columns, source) | ||
}; | ||
|
||
let (format_clause, settings) = if dialect_of!(self is ClickHouseDialect | GenericDialect) | ||
{ | ||
// Settings always comes before `FORMAT` for ClickHouse: | ||
// <https://clickhouse.com/docs/en/sql-reference/statements/insert-into> | ||
let settings = self.parse_settings()?; | ||
|
||
(columns, partitioned, after_columns, source) | ||
let format = if self.parse_keyword(Keyword::FORMAT) { | ||
Some(self.parse_format_clause(true)?) | ||
} else { | ||
None | ||
}; | ||
|
||
(format, settings) | ||
} else { | ||
(None, None) | ||
}; | ||
|
||
let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) | ||
&& self.parse_keyword(Keyword::AS) | ||
{ | ||
|
@@ -11931,10 +11949,41 @@ impl<'a> Parser<'a> { | |
replace_into, | ||
priority, | ||
insert_alias, | ||
settings, | ||
format_clause, | ||
})) | ||
} | ||
} | ||
|
||
// Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and | ||
// `INSERT` and also when using the CLI for pipes. It may or may not take an additional | ||
// expression after the format so we try to parse the expression but allow failure. | ||
// | ||
// Since we know we never take an additional expression in `SELECT` context we never only try | ||
// to parse if `can_have_expression` is true. | ||
// | ||
// <https://clickhouse.com/docs/en/interfaces/formats> | ||
Comment on lines
+11958
to
+11965
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah so per earlier comment I'm imagining if we use different representations for the format clauses the impl isn't affected? |
||
pub fn parse_format_clause( | ||
&mut self, | ||
can_have_expression: bool, | ||
) -> Result<FormatClause, ParserError> { | ||
if self.parse_keyword(Keyword::NULL) { | ||
Ok(FormatClause::Null) | ||
} else { | ||
let ident = self.parse_identifier()?; | ||
let expr = if can_have_expression { | ||
match self.try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) { | ||
Ok(expr) => Some(expr), | ||
_ => None, | ||
} | ||
} else { | ||
None | ||
}; | ||
|
||
Ok(FormatClause::Identifier { ident, expr }) | ||
} | ||
} | ||
|
||
/// Returns true if the immediate tokens look like the | ||
/// beginning of a subquery. `(SELECT ...` | ||
fn peek_subquery_start(&mut self) -> bool { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1378,7 +1378,10 @@ fn test_query_with_format_clause() { | |
} else { | ||
assert_eq!( | ||
query.format_clause, | ||
Some(FormatClause::Identifier(Ident::new(*format))) | ||
Some(FormatClause::Identifier { | ||
ident: Ident::new(*format), | ||
expr: None | ||
}) | ||
); | ||
} | ||
} | ||
|
@@ -1398,6 +1401,27 @@ fn test_query_with_format_clause() { | |
} | ||
} | ||
|
||
#[test] | ||
fn test_insert_query_with_format_clause() { | ||
let cases = [ | ||
r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#, | ||
r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#, | ||
r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#, | ||
r#"INSERT INTO tbl FORMAT jsoneachrow {"id": 1}"#, | ||
r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#, | ||
Comment on lines
+1407
to
+1411
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we dedupe some of the test cases that seem to check case sensitivity? unless the impl is doing special case sensitive handling (which doesnt seem to be the case) I imagine no need to explicitly test for it |
||
r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#, | ||
r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#, | ||
r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#, | ||
r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#, | ||
r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#, | ||
r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#, | ||
]; | ||
|
||
for sql in &cases { | ||
clickhouse_and_generic().verified_stmt(sql); | ||
} | ||
} | ||
|
||
#[test] | ||
fn parse_create_table_on_commit_and_as_query() { | ||
let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hmm double checking if this line is correct, is it required that a format clause is present if setting is present? Maybe we can avoid describing semantics in any case, we just mentiong that this is clickhouse settings with a link to the docs