feat: Add support for MSSQL table options (#1414)

apache · Sep 11, 2024 · b9e7754 · b9e7754
1 parent cb0c511
commit b9e7754
Show file tree

Hide file tree

Showing 7 changed files with 523 additions and 65 deletions.
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
@@ -2082,6 +2082,15 @@ pub enum CreateTableOptions {
     /// e.g. `WITH (description = "123")`
     ///
     /// <https://www.postgresql.org/docs/current/sql-createtable.html>
+    ///
+    /// MSSQL supports more specific options that's not only key-value pairs.
+    ///
+    /// WITH (
+    ///     DISTRIBUTION = ROUND_ROBIN,
+    ///     CLUSTERED INDEX (column_a DESC, column_b)
+    /// )
+    ///
+    /// <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#syntax>
     With(Vec<SqlOption>),
     /// Options specified using the `OPTIONS` keyword.
     /// e.g. `OPTIONS(description = "123")`
@@ -5728,14 +5737,119 @@ pub struct HiveFormat {
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
-pub struct SqlOption {
+pub struct ClusteredIndex {
     pub name: Ident,
-    pub value: Expr,
+    pub asc: Option<bool>,
+}
+
+impl fmt::Display for ClusteredIndex {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.name)?;
+        match self.asc {
+            Some(true) => write!(f, " ASC"),
+            Some(false) => write!(f, " DESC"),
+            _ => Ok(()),
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum TableOptionsClustered {
+    ColumnstoreIndex,
+    ColumnstoreIndexOrder(Vec<Ident>),
+    Index(Vec<ClusteredIndex>),
+}
+
+impl fmt::Display for TableOptionsClustered {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            TableOptionsClustered::ColumnstoreIndex => {
+                write!(f, "CLUSTERED COLUMNSTORE INDEX")
+            }
+            TableOptionsClustered::ColumnstoreIndexOrder(values) => {
+                write!(
+                    f,
+                    "CLUSTERED COLUMNSTORE INDEX ORDER ({})",
+                    display_comma_separated(values)
+                )
+            }
+            TableOptionsClustered::Index(values) => {
+                write!(f, "CLUSTERED INDEX ({})", display_comma_separated(values))
+            }
+        }
+    }
+}
+
+/// Specifies which partition the boundary values on table partitioning belongs to.
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum PartitionRangeDirection {
+    Left,
+    Right,
+}
+
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum SqlOption {
+    /// Clustered represents the clustered version of table storage for MSSQL.
+    ///
+    /// <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#TableOptions>
+    Clustered(TableOptionsClustered),
+    /// Single identifier options, e.g. `HEAP` for MSSQL.
+    ///
+    /// <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#TableOptions>
+    Ident(Ident),
+    /// Any option that consists of a key value pair where the value is an expression. e.g.
+    ///
+    ///   WITH(DISTRIBUTION = ROUND_ROBIN)
+    KeyValue { key: Ident, value: Expr },
+    /// One or more table partitions and represents which partition the boundary values belong to,
+    /// e.g.
+    ///
+    ///   PARTITION (id RANGE LEFT FOR VALUES (10, 20, 30, 40))
+    ///
+    /// <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#TablePartitionOptions>
+    Partition {
+        column_name: Ident,
+        range_direction: Option<PartitionRangeDirection>,
+        for_values: Vec<Expr>,
+    },
 }
 
 impl fmt::Display for SqlOption {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{} = {}", self.name, self.value)
+        match self {
+            SqlOption::Clustered(c) => write!(f, "{}", c),
+            SqlOption::Ident(ident) => {
+                write!(f, "{}", ident)
+            }
+            SqlOption::KeyValue { key: name, value } => {
+                write!(f, "{} = {}", name, value)
+            }
+            SqlOption::Partition {
+                column_name,
+                range_direction,
+                for_values,
+            } => {
+                let direction = match range_direction {
+                    Some(PartitionRangeDirection::Left) => " LEFT",
+                    Some(PartitionRangeDirection::Right) => " RIGHT",
+                    None => "",
+                };
+
+                write!(
+                    f,
+                    "PARTITION ({} RANGE{} FOR VALUES ({}))",
+                    column_name,
+                    direction,
+                    display_comma_separated(for_values)
+                )
+            }
+        }
     }
 }
 

diff --git a/src/keywords.rs b/src/keywords.rs
@@ -166,6 +166,7 @@ define_keywords!(
     COLLECTION,
     COLUMN,
     COLUMNS,
+    COLUMNSTORE,
     COMMENT,
     COMMIT,
     COMMITTED,
@@ -355,6 +356,7 @@ define_keywords!(
     HASH,
     HAVING,
     HEADER,
+    HEAP,
     HIGH_PRIORITY,
     HISTORY,
     HIVEVAR,

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -6480,10 +6480,91 @@ impl<'a> Parser<'a> {
     }
 
     pub fn parse_sql_option(&mut self) -> Result<SqlOption, ParserError> {
-        let name = self.parse_identifier(false)?;
-        self.expect_token(&Token::Eq)?;
-        let value = self.parse_expr()?;
-        Ok(SqlOption { name, value })
+        let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect);
+
+        match self.peek_token().token {
+            Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => {
+                Ok(SqlOption::Ident(self.parse_identifier(false)?))
+            }
+            Token::Word(w) if w.keyword == Keyword::PARTITION && is_mssql => {
+                self.parse_option_partition()
+            }
+            Token::Word(w) if w.keyword == Keyword::CLUSTERED && is_mssql => {
+                self.parse_option_clustered()
+            }
+            _ => {
+                let name = self.parse_identifier(false)?;
+                self.expect_token(&Token::Eq)?;
+                let value = self.parse_expr()?;
+
+                Ok(SqlOption::KeyValue { key: name, value })
+            }
+        }
+    }
+
+    pub fn parse_option_clustered(&mut self) -> Result<SqlOption, ParserError> {
+        if self.parse_keywords(&[
+            Keyword::CLUSTERED,
+            Keyword::COLUMNSTORE,
+            Keyword::INDEX,
+            Keyword::ORDER,
+        ]) {
+            Ok(SqlOption::Clustered(
+                TableOptionsClustered::ColumnstoreIndexOrder(
+                    self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?,
+                ),
+            ))
+        } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::COLUMNSTORE, Keyword::INDEX]) {
+            Ok(SqlOption::Clustered(
+                TableOptionsClustered::ColumnstoreIndex,
+            ))
+        } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::INDEX]) {
+            self.expect_token(&Token::LParen)?;
+
+            let columns = self.parse_comma_separated(|p| {
+                let name = p.parse_identifier(false)?;
+                let asc = p.parse_asc_desc();
+
+                Ok(ClusteredIndex { name, asc })
+            })?;
+
+            self.expect_token(&Token::RParen)?;
+
+            Ok(SqlOption::Clustered(TableOptionsClustered::Index(columns)))
+        } else {
+            Err(ParserError::ParserError(
+                "invalid CLUSTERED sequence".to_string(),
+            ))
+        }
+    }
+
+    pub fn parse_option_partition(&mut self) -> Result<SqlOption, ParserError> {
+        self.expect_keyword(Keyword::PARTITION)?;
+        self.expect_token(&Token::LParen)?;
+        let column_name = self.parse_identifier(false)?;
+
+        self.expect_keyword(Keyword::RANGE)?;
+        let range_direction = if self.parse_keyword(Keyword::LEFT) {
+            Some(PartitionRangeDirection::Left)
+        } else if self.parse_keyword(Keyword::RIGHT) {
+            Some(PartitionRangeDirection::Right)
+        } else {
+            None
+        };
+
+        self.expect_keywords(&[Keyword::FOR, Keyword::VALUES])?;
+        self.expect_token(&Token::LParen)?;
+
+        let for_values = self.parse_comma_separated(Parser::parse_expr)?;
+
+        self.expect_token(&Token::RParen)?;
+        self.expect_token(&Token::RParen)?;
+
+        Ok(SqlOption::Partition {
+            column_name,
+            range_direction,
+            for_values,
+        })
     }
 
     pub fn parse_partition(&mut self) -> Result<Partition, ParserError> {
@@ -11014,17 +11095,23 @@ impl<'a> Parser<'a> {
         })
     }
 
-    /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY)
-    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
-        let expr = self.parse_expr()?;
-
-        let asc = if self.parse_keyword(Keyword::ASC) {
+    /// Parse ASC or DESC, returns an Option with true if ASC, false of DESC or `None` if none of
+    /// them.
+    pub fn parse_asc_desc(&mut self) -> Option<bool> {
+        if self.parse_keyword(Keyword::ASC) {
             Some(true)
         } else if self.parse_keyword(Keyword::DESC) {
             Some(false)
         } else {
             None
-        };
+        }
+    }
+
+    /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY)
+    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
+        let expr = self.parse_expr()?;
+
+        let asc = self.parse_asc_desc();
 
         let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) {
             Some(true)

diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs
@@ -267,8 +267,8 @@ fn parse_create_view_with_options() {
                     ViewColumnDef {
                         name: Ident::new("age"),
                         data_type: None,
-                        options: Some(vec![SqlOption {
-                            name: Ident::new("description"),
+                        options: Some(vec![SqlOption::KeyValue {
+                            key: Ident::new("description"),
                             value: Expr::Value(Value::DoubleQuotedString("field age".to_string())),
                         }])
                     },
@@ -287,8 +287,8 @@ fn parse_create_view_with_options() {
                 unreachable!()
             };
             assert_eq!(
-                &SqlOption {
-                    name: Ident::new("description"),
+                &SqlOption::KeyValue {
+                    key: Ident::new("description"),
                     value: Expr::Value(Value::DoubleQuotedString(
                         "a view that expires in 2 days".to_string()
                     )),
@@ -414,8 +414,8 @@ fn parse_create_table_with_options() {
                             },
                             ColumnOptionDef {
                                 name: None,
-                                option: ColumnOption::Options(vec![SqlOption {
-                                    name: Ident::new("description"),
+                                option: ColumnOption::Options(vec![SqlOption::KeyValue {
+                                    key: Ident::new("description"),
                                     value: Expr::Value(Value::DoubleQuotedString(
                                         "field x".to_string()
                                     )),
@@ -429,8 +429,8 @@ fn parse_create_table_with_options() {
                         collation: None,
                         options: vec![ColumnOptionDef {
                             name: None,
-                            option: ColumnOption::Options(vec![SqlOption {
-                                name: Ident::new("description"),
+                            option: ColumnOption::Options(vec![SqlOption::KeyValue {
+                                key: Ident::new("description"),
                                 value: Expr::Value(Value::DoubleQuotedString(
                                     "field y".to_string()
                                 )),
@@ -448,12 +448,12 @@ fn parse_create_table_with_options() {
                         Ident::new("age"),
                     ])),
                     Some(vec![
-                        SqlOption {
-                            name: Ident::new("partition_expiration_days"),
+                        SqlOption::KeyValue {
+                            key: Ident::new("partition_expiration_days"),
                             value: Expr::Value(number("1")),
                         },
-                        SqlOption {
-                            name: Ident::new("description"),
+                        SqlOption::KeyValue {
+                            key: Ident::new("description"),
                             value: Expr::Value(Value::DoubleQuotedString(
                                 "table option description".to_string()
                             )),
@@ -2005,8 +2005,8 @@ fn test_bigquery_create_function() {
             function_body: Some(CreateFunctionBody::AsAfterOptions(Expr::Value(number(
                 "42"
             )))),
-            options: Some(vec![SqlOption {
-                name: Ident::new("x"),
+            options: Some(vec![SqlOption::KeyValue {
+                key: Ident::new("x"),
                 value: Expr::Value(Value::SingleQuotedString("y".into())),
             }]),
             behavior: None,