Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support parsing of SQL code blocks in Markdown files #598

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: |
poetry env use ${{ steps.setup-python.outputs.python-version }}
poetry install --sync --no-interaction -E jinjafmt --only main,test
poetry install --sync --no-interaction -E "jinjafmt markdownfmt" --only main,test
poetry env info

- name: Run unit tests and generate cov report
Expand All @@ -72,4 +72,4 @@ jobs:
CC_TEST_REPORTER_ID: ${{ secrets.CODE_CLIMATE_TEST_REPORTER_ID }}
with:
coverageLocations: |
${{ github.workspace }}/tests/.coverage/cov.xml:coverage.py
${{ github.workspace }}/tests/.coverage/cov.xml:coverage.py
6 changes: 5 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,13 @@ jobs:
version: 1.8.3

- name: Install python dependencies
<<<<<<< HEAD
run: poetry install --sync --no-interaction -E "jinjafmt markdownfmt" --only main,test
=======
run: |
poetry install --sync --no-interaction -E jinjafmt --only main,test

>>>>>>> origin/main
- name: Run tests
run: poetry run pytest

Expand Down Expand Up @@ -118,7 +122,7 @@ jobs:
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: |
poetry env use ${{ steps.setup-python.outputs.python-version }}
poetry install --sync --no-interaction -E jinjafmt --only main,test
poetry install --sync --no-interaction -E "jinjafmt markdownfmt" --only main,test

- name: Run tests
run: |
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Features

- Add support for formatting SQL code blocks in Markdown files. Introduces a new extra install (`pipx install shandy-sqlfmt[markdownfmt]`) and CLI option (`--no-markdownfmt`) ([#593](https://github.com/tconbeer/sqlfmt/issues/593) - thank you, [@michael-the1](https://github.com/michael-the1)).

## [0.24.0] - 2024-11-22

### Formatting Changes and Bug Fixes
Expand Down
3 changes: 3 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ warn_unused_configs=True
no_implicit_reexport=True
strict_equality=True
extra_checks=True

[mypy-mistletoe.*]
ignore_missing_imports=True
70 changes: 41 additions & 29 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ packages = [

[tool.poetry.extras]
jinjafmt = ["black"]
markdownfmt = ["mistletoe"]
sqlfmt_primer = ["gitpython"]

[tool.poetry.dependencies]
Expand All @@ -40,6 +41,7 @@ jinja2 = "^3.0"

black = { version = "*", optional = true }
gitpython = { version = "^3.1.24", optional = true }
mistletoe = { version = '*', optional = true}

[tool.poetry.group.dev.dependencies]
pre-commit = ">=2.20,<4.0"
Expand Down
48 changes: 45 additions & 3 deletions src/sqlfmt/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@

from sqlfmt.analyzer import Analyzer
from sqlfmt.cache import Cache, check_cache, clear_cache, load_cache, write_cache
from sqlfmt.exception import SqlfmtEquivalenceError, SqlfmtError, SqlfmtUnicodeError
from sqlfmt.exception import (
SqlfmtEquivalenceError,
SqlfmtError,
SqlfmtImportError,
SqlfmtUnicodeError,
)
from sqlfmt.formatter import QueryFormatter
from sqlfmt.mode import Mode as Mode
from sqlfmt.query import Query
Expand Down Expand Up @@ -54,6 +59,38 @@ def format_string(source_string: str, mode: Mode) -> str:
return result


def format_markdown_string(source_string: str, mode: Mode) -> str:
"""Format the SQL code blocks of a Markdown string."""
if mode.no_markdownfmt:
return source_string

try:
from mistletoe import Document
from mistletoe.block_token import BlockToken, CodeFence
from mistletoe.markdown_renderer import MarkdownRenderer
except ImportError as e:
raise SqlfmtImportError(
"Tried to format a Markdown file but markdownfmt extras are not installed."
) from e

def format_sql_code_blocks(token: BlockToken) -> None:
"""Walk through the AST and format any SQL code blocks."""
if isinstance(token, CodeFence) and token.language == "sql":
raw_text = token.children[0]
raw_text.content = format_string(raw_text.content, mode)

for child in getattr(token, "children", []):
if isinstance(child, BlockToken):
format_sql_code_blocks(child)

with MarkdownRenderer() as renderer:
doc = Document(source_string)
format_sql_code_blocks(doc)
formatted_markdown_string: str = renderer.render(doc)

return formatted_markdown_string


def run(
files: Collection[Path],
mode: Mode,
Expand Down Expand Up @@ -154,7 +191,7 @@ def _get_included_paths(paths: Iterable[Path], mode: Mode) -> Set[Path]:
for p in paths:
if p == STDIN_PATH:
include_set.add(p)
elif p.is_file() and str(p).endswith(tuple(mode.SQL_EXTENSIONS)):
elif p.is_file() and str(p).endswith(mode.included_file_extensions):
include_set.add(p)
elif p.is_dir():
include_set |= _get_included_paths(p.iterdir(), mode)
Expand Down Expand Up @@ -233,10 +270,15 @@ def _format_one(path: Path, mode: Mode) -> SqlFormatResult:
"""
Runs format_string on the contents of a single file (found at path). Handles
potential user errors in formatted code, and returns a SqlfmtResult

If the file is a Markdown file, only format its SQL code blocks.
"""
source, encoding, utf_bom = _read_path_or_stdin(path, mode)
try:
formatted = format_string(source, mode)
if path.is_file() and path.suffix == ".md":
formatted = format_markdown_string(source, mode)
else:
formatted = format_string(source, mode)
return SqlFormatResult(
source_path=path,
source_string=source,
Expand Down
9 changes: 9 additions & 0 deletions src/sqlfmt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@
"or if black was already available in this environment."
),
)
@click.option(
"--no-markdownfmt",
envvar="SQLFMT_NO_MARKDOWNFMT",
is_flag=True,
help=(
"Do not format sql code blocks in markdown files. Only necessary "
"to specify this flag if sqlfmt was installed with the markdownfmt extra."
),
)
@click.option(
"-l",
"--line-length",
Expand Down
8 changes: 8 additions & 0 deletions src/sqlfmt/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ class SqlfmtEquivalenceError(SqlfmtError):
pass


class SqlfmtImportError(SqlfmtError):
"""
Raised when an extra library is imported while not installed
"""

pass


class SqlfmtControlFlowException(Exception):
"""
Generic exception for exceptions used to manage control
Expand Down
16 changes: 14 additions & 2 deletions src/sqlfmt/mode.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
from dataclasses import dataclass, field
from importlib.util import find_spec
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Tuple

from sqlfmt.dialect import ClickHouse, Polyglot
from sqlfmt.exception import SqlfmtConfigError
Expand All @@ -14,7 +15,6 @@ class Mode:
report config. For more info on each option, see cli.py
"""

SQL_EXTENSIONS: List[str] = field(default_factory=lambda: [".sql", ".sql.jinja"])
dialect_name: str = "polyglot"
line_length: int = 88
check: bool = False
Expand All @@ -25,6 +25,7 @@ class Mode:
fast: bool = False
single_process: bool = False
no_jinjafmt: bool = False
no_markdownfmt: bool = False
reset_cache: bool = False
verbose: bool = False
quiet: bool = False
Expand All @@ -46,6 +47,17 @@ def __post_init__(self) -> None:
"which is not supported. Did you mean 'polyglot'?"
) from e

@property
def included_file_extensions(self) -> Tuple[str, ...]:
"""List of file extensions to parse.

Only parses Markdown files if mistletoe is installed and no_markdownfmt is not
set.
"""
if not self.no_markdownfmt and find_spec("mistletoe"):
return (".sql", ".sql.jinja", ".md")
return (".sql", ".sql.jinja")

@property
def color(self) -> bool:
"""
Expand Down
14 changes: 7 additions & 7 deletions src/sqlfmt_primer/primer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,25 @@ def get_projects() -> List[SQLProject]:
name="rittman",
git_url="https://github.com/tconbeer/rittman_ra_data_warehouse.git",
git_ref="418af64", # sqlfmt cd38a6c
expected_changed=0,
expected_unchanged=307,
expected_changed=3,
expected_unchanged=309,
expected_errored=4, # true mismatching brackets
sub_directory=Path(""),
),
SQLProject(
name="http_archive",
git_url="https://github.com/tconbeer/http_archive_almanac.git",
git_ref="414b535", # sqlfmt faaf71b
expected_changed=0,
expected_unchanged=1729,
expected_changed=1,
expected_unchanged=1802,
expected_errored=0,
sub_directory=Path("sql"),
),
SQLProject(
name="aqi",
git_url="https://github.com/tconbeer/aqi_livibility_analysis.git",
git_ref="f313f5b", # sqlfmt 54b8edd
expected_changed=0,
expected_changed=2,
expected_unchanged=7,
expected_errored=0,
sub_directory=Path("src/aqi_dbt/models"),
Expand All @@ -66,8 +66,8 @@ def get_projects() -> List[SQLProject]:
name="jaffle_shop",
git_url="https://github.com/tconbeer/jaffle_shop.git",
git_ref="5e63c7c", # sqlfmt 54b8edd
expected_changed=0,
expected_unchanged=5,
expected_changed=2,
expected_unchanged=6,
expected_errored=0,
sub_directory=Path(""),
),
Expand Down
5 changes: 5 additions & 0 deletions tests/data/fast/preformatted/007_markdown_file.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Hello

```sql
select 1
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Hello again

```python
import antigravity
```

```
SELECT 1
```
11 changes: 11 additions & 0 deletions tests/data/fast/unformatted/107_markdown_file.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Hello

```sql
SELECT 1
```
)))))__SQLFMT_OUTPUT__(((((
# Hello

```sql
select 1
```
Loading