Skip to content

Commit

Permalink
Add SQLFLUFF linter for clickhouse queries (#6209)
Browse files Browse the repository at this point in the history
add sqlfluff linter using clickhouse dialect on for use on linting
clickhouse queries
  • Loading branch information
Camyll authored Jan 25, 2025
1 parent cd68c38 commit f4c813d
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 8 deletions.
19 changes: 19 additions & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,25 @@ init_command = [
]
is_formatter = true

[[linter]]
code = 'SQLFLUFF'
# include_patterns = ['torchci/clickhouse_queries/**/*.sql']
include_patterns = ['torchci/clickhouse_queries/workflow_load/query.sql']
exclude_patterns = [
]
command = [
'python3',
'tools/linter/adapters/sqlfluff_linter.py',
'@{{PATHSFILE}}',
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'sqlfluff==3.3.0',
]
is_formatter = true

[[linter]]
code = 'RUSTFMT'
include_patterns = ['**/*.rs']
Expand Down
2 changes: 2 additions & 0 deletions .sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[sqlfluff]
exclude_rules = capitalisation.functions
156 changes: 156 additions & 0 deletions tools/linter/adapters/sqlfluff_linter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import argparse
import concurrent.futures
import json
import logging
import os
import re
import subprocess
import tempfile
import time
from enum import Enum
from typing import List, NamedTuple, Optional, Pattern

from isort.api import _tmp_file


LINTER_CODE = "SQLFLUFF"


class LintSeverity(str, Enum):
ERROR = "error"
WARNING = "warning"
ADVICE = "advice"
DISABLED = "disabled"


class LintMessage(NamedTuple):
path: Optional[str]
line: Optional[int]
char: Optional[int]
code: str
severity: LintSeverity
name: str
original: Optional[str]
replacement: Optional[str]
description: Optional[str]


RESULTS_RE: Pattern[str] = re.compile(
r"""(?mx)
^
(?P<file>.*?):
(?P<line>\d+):
(?P<char>\d+):
\s(?P<message>.*)
\s(?P<code>\[.*\])
$
"""
)


def run_command(
args: List[str],
) -> "subprocess.CompletedProcess[bytes]":
logging.debug("$ %s", " ".join(args))
start_time = time.monotonic()
try:
return subprocess.run(
args,
capture_output=True,
)
finally:
end_time = time.monotonic()
logging.debug("took %dms", (end_time - start_time) * 1000)


def check_file(
filename: str,
) -> List[LintMessage]:
with open(filename, "r") as f:
original = f.read()
original_edited = original.replace("{", "'{").replace("}", "}'")

tmp = tempfile.NamedTemporaryFile(suffix=".sql")
with open(tmp.name, "w") as f:
f.write(original_edited)
try:
proc = run_command(
[
"sqlfluff",
"format",
"--dialect",
"clickhouse",
tmp.name,
]
)
except OSError as err:
return [
LintMessage(
path=None,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.ERROR,
name="command-failed",
original=None,
replacement=None,
description=(f"Failed due to {err.__class__.__name__}:\n{err}"),
)
]

with open(tmp.name, "r") as f:
replacement = f.read().replace("'{", "{").replace("}'", "}")
if original == replacement:
return []
lint_message = proc.stdout

return [
LintMessage(
path=filename,
line=None,
char=None,
code=LINTER_CODE,
severity=LintSeverity.WARNING,
name="format",
original=original,
replacement=replacement,
description=lint_message.decode("utf-8"),
)
]


def main() -> None:
parser = argparse.ArgumentParser(
description=f"sqlfluff format linter for sql queries.",
fromfile_prefix_chars="@",
)
parser.add_argument(
"filenames",
nargs="+",
help="paths to lint",
)

args = parser.parse_args()

with concurrent.futures.ThreadPoolExecutor(
max_workers=os.cpu_count(),
thread_name_prefix="Thread",
) as executor:
futures = {
executor.submit(
check_file,
filename,
): filename
for filename in args.filenames
}
for future in concurrent.futures.as_completed(futures):
try:
for lint_message in future.result():
print(json.dumps(lint_message._asdict()), flush=True)
except Exception:
logging.critical('Failed at "%s".', futures[future])
raise


if __name__ == "__main__":
main()
17 changes: 9 additions & 8 deletions torchci/clickhouse_queries/workflow_load/query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ SELECT
workflow.created_at
) AS granularity_bucket,
workflow.name,
COUNT(*) as count
COUNT(*) AS count
FROM
default.workflow_run workflow final
default.workflow_run workflow FINAL
WHERE
-- optimization to make query faster
workflow.id in (
select id from materialized_views.workflow_run_by_created_at
where created_at >= {startTime: DateTime64(9)}
AND created_at <= {stopTime: DateTime64(9)}
workflow.id IN (
SELECT id FROM materialized_views.workflow_run_by_created_at
WHERE
created_at >= {startTime: DateTime64(9)}
AND created_at <= {stopTime: DateTime64(9)}
)
-- re check for final
and workflow.created_at >= {startTime: DateTime64(9)}
AND workflow.created_at >= {startTime: DateTime64(9)}
AND workflow.created_at <= {stopTime: DateTime64(9)}
AND workflow.name IN (
'pull',
Expand All @@ -30,7 +31,7 @@ WHERE
'rocm',
'inductor-rocm'
)
AND workflow.repository.'full_name' like {repo: String}
AND workflow.repository.'full_name' LIKE {repo: String}
GROUP BY
granularity_bucket,
workflow.name
Expand Down

0 comments on commit f4c813d

Please sign in to comment.