From 26c6f1baceb2eba80992358418e5e355e798410a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Noord?= <13665637+DanielNoord@users.noreply.github.com> Date: Sun, 13 Mar 2022 11:19:27 +0100 Subject: [PATCH] Improve ``SplitSummaryAndDocstringFormatter`` (#69) Co-authored-by: Pierre Sassoulas --- docs/usage.rst | 15 ++- .../configuration/arguments_manager.py | 11 ++ pydocstringformatter/formatting/formatter.py | 100 +++++++++++++----- .../summary_splitter/class_docstring.py.out | 3 +- tests/data/format/summary_splitter/dots.args | 2 + tests/data/format/summary_splitter/dots.py | 23 ++++ .../data/format/summary_splitter/dots.py.out | 23 ++++ .../max_summary_lines/max_lines_is_1.args | 2 + .../max_summary_lines/max_lines_is_1.py | 8 ++ .../max_summary_lines/max_lines_is_1.py.out | 9 ++ .../max_summary_lines/max_lines_is_2.args | 2 + .../max_summary_lines/max_lines_is_2.py | 21 ++++ .../max_summary_lines/max_lines_is_2.py.out | 22 ++++ .../max_summary_lines/max_lines_is_3.args | 2 + .../max_summary_lines/max_lines_is_3.py | 8 ++ .../max_summary_lines/max_lines_is_3.py.out | 9 ++ .../max_lines_is_default.args | 1 + .../max_summary_lines/max_lines_is_default.py | 8 ++ .../max_lines_is_default.py.out | 9 ++ .../max_summary_lines/max_lines_with_dot.args | 1 + .../max_summary_lines/max_lines_with_dot.py | 7 ++ .../max_lines_with_dot.py.out | 9 ++ tests/test_formatting.py | 2 +- 23 files changed, 265 insertions(+), 32 deletions(-) create mode 100644 tests/data/format/summary_splitter/dots.args create mode 100644 tests/data/format/summary_splitter/dots.py create mode 100644 tests/data/format/summary_splitter/dots.py.out create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.args create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.py create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.py.out create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.args create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.py create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.py.out create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.args create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.py create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.py.out create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.args create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.py create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.py.out create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.args create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.py create mode 100644 tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.py.out diff --git a/docs/usage.rst b/docs/usage.rst index 38e48bd4..a143f9b0 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -6,6 +6,7 @@ Current usage of ``pydocstringformatter``: .. code-block:: shell usage: pydocstringformatter [-h] [-w] [--quiet] [-v] [--exclude EXCLUDE] + [--max-summary-lines MAX_SUMMARY_LINES] [--summary-quotes-same-line] [--split-summary-body --no-split-summary-body] [--strip-whitespaces --no-strip-whitespaces] @@ -29,6 +30,9 @@ Current usage of ``pydocstringformatter``: configuration: --exclude EXCLUDE A comma separated list of glob patterns of file path names not to be formatted. + --max-summary-lines MAX_SUMMARY_LINES + The maximum numbers of lines a summary can span. The + default value is 1. --summary-quotes-same-line Force the start of a multi-line docstring to be on the same line as the opening quotes. Similar to how this @@ -65,8 +69,9 @@ Current usage of ``pydocstringformatter``: --split-summary-body, --no-split-summary-body Activate or deactivate split-summary-body: Split the - summary and body of a docstring based on a period in - between them. This formatter is currently optional as - its considered somwehat opinionated and might require - major refactoring for existing projects. (default: - False) + summary and body of a docstring based on a period and + max length. The maximum length of a summary can be set + with the --max-summary-lines option. This formatter is + currently optional as its considered somwehat + opinionated and might require major refactoring for + existing projects. (default: False) diff --git a/pydocstringformatter/configuration/arguments_manager.py b/pydocstringformatter/configuration/arguments_manager.py index abfa4680..f81b22fc 100644 --- a/pydocstringformatter/configuration/arguments_manager.py +++ b/pydocstringformatter/configuration/arguments_manager.py @@ -76,6 +76,17 @@ def _register_arguments(self, version: str) -> None: ), ) + self.configuration_group.add_argument( + "--max-summary-lines", + action="store", + default=1, + type=int, + help=( + "The maximum numbers of lines a summary can span. " + "The default value is 1." + ), + ) + self.configuration_group.add_argument( "--summary-quotes-same-line", action="store_true", diff --git a/pydocstringformatter/formatting/formatter.py b/pydocstringformatter/formatting/formatter.py index f10554e3..38197dcf 100644 --- a/pydocstringformatter/formatting/formatter.py +++ b/pydocstringformatter/formatting/formatter.py @@ -115,42 +115,92 @@ def _treat_string( return tokeninfo.string -class SplitSummaryAndDocstringFormatter(StringFormatter): - """Split the summary and body of a docstring based on a period in between them. +class SplitSummaryAndDocstringFormatter(StringAndQuotesFormatter): + """Split the summary and body of a docstring based on a period and max length. + + The maximum length of a summary can be set with the --max-summary-lines option. This formatter is currently optional as its considered somwehat opinionated and might require major refactoring for existing projects. """ name = "split-summary-body" + # TODO(#68): Make this non-optional optional = True - def _treat_string(self, tokeninfo: tokenize.TokenInfo, indent_length: int) -> str: + end_of_sentence_period = re.compile( + r""" + (? str: """Split a summary and body if there is a period after the summary.""" - if index := tokeninfo.string.find("."): - if ( - index not in (-1, len(tokeninfo.string) - 4) - and "\n" not in tokeninfo.string[:index] # Skip multi-line summaries - ): - # Handle summary with part of docstring body on same line - if tokeninfo.string[index + 1] == " ": - return ( - tokeninfo.string[:index] - + f".\n\n{' ' * indent_length}" - + tokeninfo.string[index + 2 :] - ) + if "\n\n" in tokeninfo.string: + summary, description = tokeninfo.string.split("\n\n", maxsplit=1) + else: + summary, description = tokeninfo.string, None + + new_summary = None - # Handle summary with part of docstring body on same line - if ( - tokeninfo.string[index + 1] == "\n" - and tokeninfo.string[index + 2] != "\n" - ): - return ( - tokeninfo.string[:index] - + ".\n\n" - + tokeninfo.string[index + 2 :] + # Try to split on period + if match := re.search(self.end_of_sentence_period, summary): + index = match.start() + + if summary[: index - 1].count("\n") < self.config.max_summary_lines: + if len(summary) == index + 1: + new_summary = summary + + # Handle summaries with more text on same line after the period + elif summary[index + 1] == " ": + new_summary = ( + summary[:index] + + f"\n\n{' ' * indent_length}" + + summary[index + 2 :] ) - return tokeninfo.string + + # Handle summaries that end with a period and a direct new line + # but not a double new line. + elif summary[index + 1] == "\n": + # If this is the end of the docstring, don't do anything + if summary[index + 2 :] == indent_length * " " + quotes: + new_summary = summary + # Split between period and rest of docstring + else: + new_summary = summary[:index] + ".\n\n" + summary[index + 2 :] + + # Try to split on max length + if not new_summary and summary.count("\n") > self.config.max_summary_lines - 1: + lines = summary.splitlines() + new_summary = "\n".join(lines[: self.config.max_summary_lines]) + + # Handle summaries without any additional text beyond max lines + if lines[self.config.max_summary_lines] == indent_length * " " + quotes: + new_summary += "\n" + lines[self.config.max_summary_lines] + + # Split between max lines and rest of docstring + else: + new_summary += "\n\n" + "\n".join( + lines[self.config.max_summary_lines :] + ) + + # Re-concatenate summary and description + # TODO(#67): Create 'SummaryFormatter' class + docstring = new_summary or summary + if description: + docstring += "\n\n" + description + return docstring class StripWhitespacesFormatter(StringAndQuotesFormatter): diff --git a/tests/data/format/summary_splitter/class_docstring.py.out b/tests/data/format/summary_splitter/class_docstring.py.out index 9acbc824..4de324ea 100644 --- a/tests/data/format/summary_splitter/class_docstring.py.out +++ b/tests/data/format/summary_splitter/class_docstring.py.out @@ -45,6 +45,7 @@ class MyClass: class MyClass: - """Summary over multiple + """Summary over multiple. + lines. """ diff --git a/tests/data/format/summary_splitter/dots.args b/tests/data/format/summary_splitter/dots.args new file mode 100644 index 00000000..b10603fd --- /dev/null +++ b/tests/data/format/summary_splitter/dots.args @@ -0,0 +1,2 @@ +--split-summary-body +--max-summary-lines=2 diff --git a/tests/data/format/summary_splitter/dots.py b/tests/data/format/summary_splitter/dots.py new file mode 100644 index 00000000..5b85153c --- /dev/null +++ b/tests/data/format/summary_splitter/dots.py @@ -0,0 +1,23 @@ +class MyClass: + """We shouldn't split sys.path.""" + + +def func(): + """We should not add extra lines after the dot + to it. + """ + + +def func(): + """We should not split for e.g. here.""" + + +def func(): + """We should not split for i.e. here.""" + + +def func(): + """We should not add line after i.e., sys.path, e.g., etc. but etc. in particular + + is harder right ? + """ diff --git a/tests/data/format/summary_splitter/dots.py.out b/tests/data/format/summary_splitter/dots.py.out new file mode 100644 index 00000000..179efcda --- /dev/null +++ b/tests/data/format/summary_splitter/dots.py.out @@ -0,0 +1,23 @@ +class MyClass: + """We shouldn't split sys.path.""" + + +def func(): + """We should not add extra lines after the dot + to it. + """ + + +def func(): + """We should not split for e.g. here.""" + + +def func(): + """We should not split for i.e. here.""" + + +def func(): + """We should not add line after i.e., sys.path, e.g., etc. but etc. in particular. + + is harder right ? + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.args b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.args new file mode 100644 index 00000000..042f786f --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.args @@ -0,0 +1,2 @@ +--split-summary-body +--max-summary-lines=1 diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.py b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.py new file mode 100644 index 00000000..133763ca --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.py @@ -0,0 +1,8 @@ +def func(): + """My long + summary + is way + too long. + + Description + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.py.out b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.py.out new file mode 100644 index 00000000..604de3ee --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_1.py.out @@ -0,0 +1,9 @@ +def func(): + """My long. + + summary + is way + too long. + + Description + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.args b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.args new file mode 100644 index 00000000..b10603fd --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.args @@ -0,0 +1,2 @@ +--split-summary-body +--max-summary-lines=2 diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.py b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.py new file mode 100644 index 00000000..91719f40 --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.py @@ -0,0 +1,21 @@ +def func(): + """My long + summary + is way + too long. + + Description + """ + + +def func(): + """A long summary + without a period + """ + + +def func(): + """A summary. + + A body + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.py.out b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.py.out new file mode 100644 index 00000000..a33dd28c --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_2.py.out @@ -0,0 +1,22 @@ +def func(): + """My long + summary + + is way + too long. + + Description + """ + + +def func(): + """A long summary + without a period + """ + + +def func(): + """A summary. + + A body + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.args b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.args new file mode 100644 index 00000000..71170d97 --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.args @@ -0,0 +1,2 @@ +--split-summary-body +--max-summary-lines=3 diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.py b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.py new file mode 100644 index 00000000..133763ca --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.py @@ -0,0 +1,8 @@ +def func(): + """My long + summary + is way + too long. + + Description + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.py.out b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.py.out new file mode 100644 index 00000000..4095b418 --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_3.py.out @@ -0,0 +1,9 @@ +def func(): + """My long + summary + is way + + too long. + + Description + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.args b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.args new file mode 100644 index 00000000..86899aef --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.args @@ -0,0 +1 @@ +--split-summary-body diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.py b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.py new file mode 100644 index 00000000..133763ca --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.py @@ -0,0 +1,8 @@ +def func(): + """My long + summary + is way + too long. + + Description + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.py.out b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.py.out new file mode 100644 index 00000000..604de3ee --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_is_default.py.out @@ -0,0 +1,9 @@ +def func(): + """My long. + + summary + is way + too long. + + Description + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.args b/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.args new file mode 100644 index 00000000..86899aef --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.args @@ -0,0 +1 @@ +--split-summary-body diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.py b/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.py new file mode 100644 index 00000000..671498b9 --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.py @@ -0,0 +1,7 @@ +def func(): + """My long. summary + is way + too long. + + Description + """ diff --git a/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.py.out b/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.py.out new file mode 100644 index 00000000..604de3ee --- /dev/null +++ b/tests/data/format/summary_splitter/max_summary_lines/max_lines_with_dot.py.out @@ -0,0 +1,9 @@ +def func(): + """My long. + + summary + is way + too long. + + Description + """ diff --git a/tests/test_formatting.py b/tests/test_formatting.py index 17c412f6..0ef0bf6e 100644 --- a/tests/test_formatting.py +++ b/tests/test_formatting.py @@ -50,7 +50,7 @@ def test_formatting( additional_args: List[str] = [] if os.path.exists(test_file.replace(".py", ".args")): with open(test_file.replace(".py", ".args"), encoding="utf-8") as args_file: - additional_args = args_file.readlines()[0].split() + additional_args = [i.rstrip("\n") for i in args_file.readlines()] pydocstringformatter.run_docstring_formatter( [temp_file_name, "--write"] + additional_args