Skip to content

Commit

Permalink
Improve SplitSummaryAndDocstringFormatter (#69)
Browse files Browse the repository at this point in the history
Co-authored-by: Pierre Sassoulas <pierre.sassoulas@gmail.com>
  • Loading branch information
DanielNoord and Pierre-Sassoulas authored Mar 13, 2022
1 parent f01ed1b commit 26c6f1b
Show file tree
Hide file tree
Showing 23 changed files with 265 additions and 32 deletions.
15 changes: 10 additions & 5 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Current usage of ``pydocstringformatter``:
.. code-block:: shell
usage: pydocstringformatter [-h] [-w] [--quiet] [-v] [--exclude EXCLUDE]
[--max-summary-lines MAX_SUMMARY_LINES]
[--summary-quotes-same-line]
[--split-summary-body --no-split-summary-body]
[--strip-whitespaces --no-strip-whitespaces]
Expand All @@ -29,6 +30,9 @@ Current usage of ``pydocstringformatter``:
configuration:
--exclude EXCLUDE A comma separated list of glob patterns of file path
names not to be formatted.
--max-summary-lines MAX_SUMMARY_LINES
The maximum numbers of lines a summary can span. The
default value is 1.
--summary-quotes-same-line
Force the start of a multi-line docstring to be on the
same line as the opening quotes. Similar to how this
Expand Down Expand Up @@ -65,8 +69,9 @@ Current usage of ``pydocstringformatter``:
--split-summary-body, --no-split-summary-body
Activate or deactivate split-summary-body: Split the
summary and body of a docstring based on a period in
between them. This formatter is currently optional as
its considered somwehat opinionated and might require
major refactoring for existing projects. (default:
False)
summary and body of a docstring based on a period and
max length. The maximum length of a summary can be set
with the --max-summary-lines option. This formatter is
currently optional as its considered somwehat
opinionated and might require major refactoring for
existing projects. (default: False)
11 changes: 11 additions & 0 deletions pydocstringformatter/configuration/arguments_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@ def _register_arguments(self, version: str) -> None:
),
)

self.configuration_group.add_argument(
"--max-summary-lines",
action="store",
default=1,
type=int,
help=(
"The maximum numbers of lines a summary can span. "
"The default value is 1."
),
)

self.configuration_group.add_argument(
"--summary-quotes-same-line",
action="store_true",
Expand Down
100 changes: 75 additions & 25 deletions pydocstringformatter/formatting/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,42 +115,92 @@ def _treat_string(
return tokeninfo.string


class SplitSummaryAndDocstringFormatter(StringFormatter):
"""Split the summary and body of a docstring based on a period in between them.
class SplitSummaryAndDocstringFormatter(StringAndQuotesFormatter):
"""Split the summary and body of a docstring based on a period and max length.
The maximum length of a summary can be set with the --max-summary-lines option.
This formatter is currently optional as its considered somwehat opinionated
and might require major refactoring for existing projects.
"""

name = "split-summary-body"
# TODO(#68): Make this non-optional
optional = True

def _treat_string(self, tokeninfo: tokenize.TokenInfo, indent_length: int) -> str:
end_of_sentence_period = re.compile(
r"""
(?<!e.g|i.e|etc) # Not preceded by 'e.g', 'i.e', 'etc'
\. # A dot
(?!\w) # Not followed by a letter
""",
re.X,
)
"""Pattern to match against an end of sentence period."""

# pylint: disable-next=too-many-branches
def _treat_string(
self,
tokeninfo: tokenize.TokenInfo,
indent_length: int,
quotes: str,
_: Literal[1, 3],
) -> str:
"""Split a summary and body if there is a period after the summary."""
if index := tokeninfo.string.find("."):
if (
index not in (-1, len(tokeninfo.string) - 4)
and "\n" not in tokeninfo.string[:index] # Skip multi-line summaries
):
# Handle summary with part of docstring body on same line
if tokeninfo.string[index + 1] == " ":
return (
tokeninfo.string[:index]
+ f".\n\n{' ' * indent_length}"
+ tokeninfo.string[index + 2 :]
)
if "\n\n" in tokeninfo.string:
summary, description = tokeninfo.string.split("\n\n", maxsplit=1)
else:
summary, description = tokeninfo.string, None

new_summary = None

# Handle summary with part of docstring body on same line
if (
tokeninfo.string[index + 1] == "\n"
and tokeninfo.string[index + 2] != "\n"
):
return (
tokeninfo.string[:index]
+ ".\n\n"
+ tokeninfo.string[index + 2 :]
# Try to split on period
if match := re.search(self.end_of_sentence_period, summary):
index = match.start()

if summary[: index - 1].count("\n") < self.config.max_summary_lines:
if len(summary) == index + 1:
new_summary = summary

# Handle summaries with more text on same line after the period
elif summary[index + 1] == " ":
new_summary = (
summary[:index]
+ f"\n\n{' ' * indent_length}"
+ summary[index + 2 :]
)
return tokeninfo.string

# Handle summaries that end with a period and a direct new line
# but not a double new line.
elif summary[index + 1] == "\n":
# If this is the end of the docstring, don't do anything
if summary[index + 2 :] == indent_length * " " + quotes:
new_summary = summary
# Split between period and rest of docstring
else:
new_summary = summary[:index] + ".\n\n" + summary[index + 2 :]

# Try to split on max length
if not new_summary and summary.count("\n") > self.config.max_summary_lines - 1:
lines = summary.splitlines()
new_summary = "\n".join(lines[: self.config.max_summary_lines])

# Handle summaries without any additional text beyond max lines
if lines[self.config.max_summary_lines] == indent_length * " " + quotes:
new_summary += "\n" + lines[self.config.max_summary_lines]

# Split between max lines and rest of docstring
else:
new_summary += "\n\n" + "\n".join(
lines[self.config.max_summary_lines :]
)

# Re-concatenate summary and description
# TODO(#67): Create 'SummaryFormatter' class
docstring = new_summary or summary
if description:
docstring += "\n\n" + description
return docstring


class StripWhitespacesFormatter(StringAndQuotesFormatter):
Expand Down
3 changes: 2 additions & 1 deletion tests/data/format/summary_splitter/class_docstring.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class MyClass:


class MyClass:
"""Summary over multiple
"""Summary over multiple.

lines.
"""
2 changes: 2 additions & 0 deletions tests/data/format/summary_splitter/dots.args
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--split-summary-body
--max-summary-lines=2
23 changes: 23 additions & 0 deletions tests/data/format/summary_splitter/dots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
class MyClass:
"""We shouldn't split sys.path."""


def func():
"""We should not add extra lines after the dot
to it.
"""


def func():
"""We should not split for e.g. here."""


def func():
"""We should not split for i.e. here."""


def func():
"""We should not add line after i.e., sys.path, e.g., etc. but etc. in particular
is harder right ?
"""
23 changes: 23 additions & 0 deletions tests/data/format/summary_splitter/dots.py.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
class MyClass:
"""We shouldn't split sys.path."""


def func():
"""We should not add extra lines after the dot
to it.
"""


def func():
"""We should not split for e.g. here."""


def func():
"""We should not split for i.e. here."""


def func():
"""We should not add line after i.e., sys.path, e.g., etc. but etc. in particular.

is harder right ?
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--split-summary-body
--max-summary-lines=1
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
def func():
"""My long
summary
is way
too long.
Description
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def func():
"""My long.

summary
is way
too long.

Description
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--split-summary-body
--max-summary-lines=2
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
def func():
"""My long
summary
is way
too long.
Description
"""


def func():
"""A long summary
without a period
"""


def func():
"""A summary.
A body
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
def func():
"""My long
summary

is way
too long.

Description
"""


def func():
"""A long summary
without a period
"""


def func():
"""A summary.

A body
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--split-summary-body
--max-summary-lines=3
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
def func():
"""My long
summary
is way
too long.
Description
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def func():
"""My long
summary
is way

too long.

Description
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--split-summary-body
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
def func():
"""My long
summary
is way
too long.
Description
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def func():
"""My long.

summary
is way
too long.

Description
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--split-summary-body
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
def func():
"""My long. summary
is way
too long.
Description
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def func():
"""My long.

summary
is way
too long.

Description
"""
2 changes: 1 addition & 1 deletion tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_formatting(
additional_args: List[str] = []
if os.path.exists(test_file.replace(".py", ".args")):
with open(test_file.replace(".py", ".args"), encoding="utf-8") as args_file:
additional_args = args_file.readlines()[0].split()
additional_args = [i.rstrip("\n") for i in args_file.readlines()]

pydocstringformatter.run_docstring_formatter(
[temp_file_name, "--write"] + additional_args
Expand Down

0 comments on commit 26c6f1b

Please sign in to comment.