diff --git a/src/formatter.rs b/src/formatter.rs index 4f29b05..2ccab77 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -75,6 +75,35 @@ impl<'a> Formatter<'a> { } fn format_line_comment(&self, token: &Token<'_>, query: &mut String) { + let is_whitespace_followed_by_special_token = + self.next_token(1).map_or(false, |current_token| { + current_token.kind == TokenKind::Whitespace + && self.next_token(2).map_or(false, |next_token| { + matches!( + next_token.kind, + TokenKind::Number + | TokenKind::String + | TokenKind::Word + | TokenKind::ReservedTopLevel + | TokenKind::ReservedTopLevelNoIndent + | TokenKind::ReservedNewline + | TokenKind::Reserved + ) + }) + }); + + let previous_token = self.previous_token(1); + if previous_token.is_some() + && previous_token.unwrap().value.contains("\n") + && is_whitespace_followed_by_special_token + { + self.add_new_line(query); + } else if let Some(Token { value, .. }) = self.previous_token(2) { + if *value == "," { + self.trim_all_spaces_end(query); + query.push_str(" "); + } + } query.push_str(token.value); self.add_new_line(query); } @@ -126,7 +155,7 @@ impl<'a> Formatter<'a> { // Take out the preceding space unless there was whitespace there in the original query // or another opening parens or line comment - let previous_token = self.previous_token(); + let previous_token = self.previous_token(1); if previous_token.is_none() || !PRESERVE_WHITESPACE_FOR.contains(&previous_token.unwrap().kind) { @@ -222,6 +251,10 @@ impl<'a> Formatter<'a> { query.truncate(query.trim_end_matches(|c| c == ' ' || c == '\t').len()); } + fn trim_all_spaces_end(&self, query: &mut String) { + query.truncate(query.trim_end_matches(|c: char| c.is_whitespace()).len()); + } + fn indent_comment(&self, token: &str) -> String { let mut combined = String::with_capacity(token.len() + 4); for (i, line) in token.split('\n').enumerate() { @@ -264,8 +297,17 @@ impl<'a> Formatter<'a> { combined } - fn previous_token(&self) -> Option<&Token<'_>> { - let index = self.index.checked_sub(1); + fn previous_token(&self, idx: usize) -> Option<&Token<'_>> { + let index = self.index.checked_sub(idx); + if let Some(index) = index { + self.tokens.get(index) + } else { + None + } + } + + fn next_token(&self, idx: usize) -> Option<&Token<'_>> { + let index = self.index.checked_add(idx); if let Some(index) = index { self.tokens.get(index) } else { diff --git a/src/lib.rs b/src/lib.rs index d594dea..154e05a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1504,4 +1504,91 @@ mod tests { assert_eq!(format(input, &QueryParams::None, options), expected); } + + #[test] + fn it_handles_comments_correctly() { + let input = indoc!( + " + -- 创建一个外部表,存储销售数据 + CREATE EXTERNAL TABLE IF NOT EXISTS sales_data ( + -- 唯一标识订单ID + order_id BIGINT COMMENT 'Unique identifier for the order', + + -- 客户ID + customer_id BIGINT COMMENT 'Unique identifier for the customer', + ) + COMMENT 'Sales data table for storing transaction records'; + + -- 按销售日期和城市进行分区 + PARTITIONED BY ( + sale_year STRING COMMENT 'Year of the sale', + sale_month STRING COMMENT 'Month of the sale' + ) + + -- 设置数据存储位置 + LOCATION '/user/hive/warehouse/sales_data' + + -- 使用 ORC 存储格式 + STORED AS ORC + + -- 设置表的行格式 + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + + -- 设置表属性 + TBLPROPERTIES ( + 'orc.compress' = 'SNAPPY', -- 使用SNAPPY压缩 + 'transactional' = 'true', -- 启用事务支持 + 'orc.create.index' = 'true', -- 创建索引 + 'skip.header.line.count' = '1', -- 跳过CSV文件的第一行 + 'external.table.purge' = 'true' -- 在删除表时自动清理数据 + ); + + -- 自动加载数据到 Hive 分区中 + ALTER TABLE sales_data + ADD PARTITION (sale_year = '2024', sale_month = '08') + LOCATION '/user/hive/warehouse/sales_data/2024/08';" + ); + let options = FormatOptions { + indent: Indent::Spaces(4), + ..Default::default() + }; + let expected = indoc!( + " + -- 创建一个外部表,存储销售数据 + CREATE EXTERNAL TABLE IF NOT EXISTS sales_data ( + -- 唯一标识订单ID + order_id BIGINT COMMENT 'Unique identifier for the order', + -- 客户ID + customer_id BIGINT COMMENT 'Unique identifier for the customer', + ) COMMENT 'Sales data table for storing transaction records'; + -- 按销售日期和城市进行分区 + PARTITIONED BY ( + sale_year STRING COMMENT 'Year of the sale', + sale_month STRING COMMENT 'Month of the sale' + ) + -- 设置数据存储位置 + LOCATION '/user/hive/warehouse/sales_data' + -- 使用 ORC 存储格式 + STORED AS ORC + -- 设置表的行格式 + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' + -- 设置表属性 + TBLPROPERTIES ( + 'orc.compress' = 'SNAPPY', -- 使用SNAPPY压缩 + 'transactional' = 'true', -- 启用事务支持 + 'orc.create.index' = 'true', -- 创建索引 + 'skip.header.line.count' = '1', -- 跳过CSV文件的第一行 + 'external.table.purge' = 'true' -- 在删除表时自动清理数据 + ); + -- 自动加载数据到 Hive 分区中 + ALTER TABLE + sales_data + ADD + PARTITION (sale_year = '2024', sale_month = '08') LOCATION '/user/hive/warehouse/sales_data/2024/08';" + ); + + assert_eq!(format(input, &QueryParams::None, options), expected); + } }