Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix/#639/support databricks query hints #647

Merged
merged 3 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.

- sqlfmt no longer adds a space between the function name and parens for `filter()` and `isnull()` (but it also permits `filter ()` and `isnull ()` to support dialects where those are operators, not function names) ([#641](https://github.com/tconbeer/sqlfmt/issues/641) - thank you [@williamscs](https://github.com/williamscs) and [@hongtron](https://github.com/hongtron)!).
- sqlfmt now supports Spark type-hinted numeric literals like `32y` and `+3.2e6bd` and will not introduce a space between the digits and their type suffix ([#640](https://github.com/tconbeer/sqlfmt/issues/640) - thank you [@ShaneMazur](https://github.com/ShaneMazur)!).
- sqlfmt now supports Databricks query hint comments like `/*+ COALESCE(3) */` ([#639](https://github.com/tconbeer/sqlfmt/issues/639) - thank you [@wr-atlas](https://github.com/wr-atlas)!).

## [0.23.3] - 2024-11-12

Expand Down
14 changes: 11 additions & 3 deletions src/sqlfmt/comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,18 @@ def __str__(self) -> str:
without preceding whitespace, with a single space between the marker
and the comment text.
"""
if self.is_multiline or self.formatting_disabled:
return f"{self.token.token}"
if (
self.is_multiline
or self.formatting_disabled
or self.is_databricks_query_hint
):
return self.token.token
else:
marker, comment_text = self._comment_parts()
if comment_text:
return f"{marker} {comment_text}"
else:
return f"{marker}"
return marker

def __len__(self) -> int:
return len(str(self))
Expand Down Expand Up @@ -85,6 +89,10 @@ def is_multiline(self) -> bool:
def is_c_style(self) -> bool:
return self.token.token.startswith("/*")

@property
def is_databricks_query_hint(self) -> bool:
return self.token.token.startswith("/*+")

@property
def is_inline(self) -> bool:
return not self.is_standalone and not self.is_multiline and not self.is_c_style
Expand Down
6 changes: 6 additions & 0 deletions src/sqlfmt/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def _extract_components(
raise CannotMergeException(
"Can't merge lines with inline comments and other comments"
)
elif any(
[comment.is_databricks_query_hint for comment in line.comments]
):
raise CannotMergeException(
"Can't merge lines with a databricks type hint comment"
)
elif (
len(line.comments) == 1
and len(line.nodes) > 1
Expand Down
102 changes: 102 additions & 0 deletions tests/data/unformatted/134_databricks_type_hints.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
-- see: https://github.com/tconbeer/sqlfmt/issues/639
-- source: https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-hints.html
SELECT /*+ COALESCE(3) */ * FROM t;
SELECT /*+ REPARTITION(3) */ * FROM t;
SELECT /*+ REPARTITION(c) */ * FROM t;
SELECT /*+ REPARTITION(3, c) */ * FROM t;
SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t;
SELECT /*+ REPARTITION_BY_RANGE(3, c) */ * FROM t;
SELECT /*+ BROADCAST(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ BROADCASTJOIN (t1) */ * FROM t1 left JOIN t2 ON t1.key = t2.key;
SELECT /*+ MAPJOIN(t2) */ * FROM t1 right JOIN t2 ON t1.key = t2.key;

-- Join Hints for shuffle sort merge join
SELECT /*+ SHUFFLE_MERGE(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ MERGEJOIN(t2) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ MERGE(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;

-- Join Hints for shuffle hash join
SELECT /*+ SHUFFLE_HASH(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;

-- Join Hints for shuffle-and-replicate nested loop join
SELECT /*+ SHUFFLE_REPLICATE_NL(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ BROADCAST(t1), MERGE(t1, t2) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
)))))__SQLFMT_OUTPUT__(((((
-- see: https://github.com/tconbeer/sqlfmt/issues/639
-- source:
-- https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-hints.html
select /*+ COALESCE(3) */
*
from t
;
select /*+ REPARTITION(3) */
*
from t
;
select /*+ REPARTITION(c) */
*
from t
;
select /*+ REPARTITION(3, c) */
*
from t
;
select /*+ REPARTITION_BY_RANGE(c) */
*
from t
;
select /*+ REPARTITION_BY_RANGE(3, c) */
*
from t
;
select /*+ BROADCAST(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;
select /*+ BROADCASTJOIN (t1) */
*
from t1
left join t2 on t1.key = t2.key
;
select /*+ MAPJOIN(t2) */
*
from t1
right join t2 on t1.key = t2.key
;

-- Join Hints for shuffle sort merge join
select /*+ SHUFFLE_MERGE(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;
select /*+ MERGEJOIN(t2) */
*
from t1
inner join t2 on t1.key = t2.key
;
select /*+ MERGE(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;

-- Join Hints for shuffle hash join
select /*+ SHUFFLE_HASH(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;

-- Join Hints for shuffle-and-replicate nested loop join
select /*+ SHUFFLE_REPLICATE_NL(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;
select /*+ BROADCAST(t1), MERGE(t1, t2) */
*
from t1
inner join t2 on t1.key = t2.key
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
select /*+ my query hint */
1
)))))__SQLFMT_OUTPUT__(((((
select
1
1 change: 1 addition & 0 deletions tests/functional_tests/test_general_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"unformatted/131_assignment_statement.sql",
"unformatted/132_spark_number_literals.sql",
"unformatted/133_for_else.sql",
"unformatted/134_databricks_type_hints.sql",
"unformatted/200_base_model.sql",
"unformatted/201_basic_snapshot.sql",
"unformatted/202_unpivot_macro.sql",
Expand Down
31 changes: 31 additions & 0 deletions tests/unit_tests/test_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
from sqlfmt.comment import Comment
from sqlfmt.node import Node
from sqlfmt.node_manager import NodeManager
from sqlfmt.token import Token, TokenType

Expand Down Expand Up @@ -60,6 +61,30 @@ def multiline_comment() -> Comment:
return comment


@pytest.fixture
def datbricks_query_hint_comment() -> Comment:
n = Node(
Token(
type=TokenType.UNTERM_KEYWORD,
prefix="",
token="select",
spos=0,
epos=6,
),
previous_node=None,
prefix="",
value="select",
open_brackets=[],
open_jinja_blocks=[],
formatting_disabled=[],
)
t = Token(
type=TokenType.COMMENT, prefix=" ", token="/*+ hint here */", spos=6, epos=23
)
comment = Comment(t, is_standalone=False, previous_node=n)
return comment


@pytest.fixture
def fmt_disabled_comment() -> Comment:
t = Token(type=TokenType.FMT_OFF, prefix="", token="--fmt: off", spos=0, epos=10)
Expand All @@ -81,11 +106,13 @@ def test_get_marker(
short_mysql_comment: Comment,
nospace_comment: Comment,
short_js_comment: Comment,
datbricks_query_hint_comment: Comment,
) -> None:
assert short_comment._get_marker() == ("--", 3)
assert short_mysql_comment._get_marker() == ("#", 2)
assert short_js_comment._get_marker() == ("//", 3)
assert nospace_comment._get_marker() == ("--", 2)
assert datbricks_query_hint_comment._get_marker() == ("/*", 2)


def test_comment_parts(
Expand Down Expand Up @@ -210,3 +237,7 @@ def test_no_wrap_long_jinja_comments() -> None:
rendered = comment.render_standalone(88, "")

assert rendered == comment_str + "\n"


def test_no_add_space_databricks_hint(datbricks_query_hint_comment: Comment) -> None:
assert str(datbricks_query_hint_comment) == datbricks_query_hint_comment.token.token
12 changes: 12 additions & 0 deletions tests/unit_tests/test_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,3 +560,15 @@ def test_do_not_merge_operator_sequences_across_commas(merger: LineMerger) -> No
merged_lines = merger.maybe_merge_lines(raw_query.lines)
result_string = "".join([str(line) for line in merged_lines])
assert result_string == expected_string


def test_do_not_merge_databricks_query_hints(merger: LineMerger) -> None:
source_string, expected_string = read_test_data(
"unit_tests/test_merger/test_no_merge_databricks_query_hints.sql"
)
raw_query = merger.mode.dialect.initialize_analyzer(
merger.mode.line_length
).parse_query(source_string)
merged_lines = merger.maybe_merge_lines(raw_query.lines)
result_string = "".join([str(line) for line in merged_lines])
assert result_string == expected_string