From 904287a01e5c2e51c38dbdc7564d1b0eac07d66f Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Mon, 22 Aug 2022 11:23:38 +0000 Subject: [PATCH 01/14] Fix test for missing arg before -- It was throwing an error due to the options: section occurring in usage:, but the test was passing because the message of the exception was not checked. --- tests/test_docopt.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 4d662f6..6420698 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -679,12 +679,13 @@ def test_issue_65_evaluate_argv_when_called_not_when_imported(): def test_issue_71_double_dash_is_not_a_valid_option_argument(): - with raises(DocoptExit): + with raises(DocoptExit, match=r"--log requires argument"): docopt("usage: prog [--log=LEVEL] [--] ...", "--log -- 1 2") - with raises(DocoptExit): + with raises(DocoptExit, match=r"-l requires argument"): docopt( - """usage: prog [-l LEVEL] [--] ... - options: -l LEVEL""", + """\ +usage: prog [-l LEVEL] [--] ... +options: -l LEVEL""", "-l -- 1 2", ) From d36d69f6593eb00fbe35049d415df6af1e9d6803 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Mon, 22 Aug 2022 10:25:45 +0000 Subject: [PATCH 02/14] Create a single definition of the docstring sections parse_docstring_sections() divides the docstring into the main non-overlapping sections that are to be further analysed. These are text before the usage, the usage header ("usage:" or similar), the usage body (" proc [options]") and text following the usage. The intention of this partitioning is to facilitate restoring the option description parsing behaviour of the last stable docopt release (0.6.2), while retaining the current improved behaviour of not requiring a blank line after the usage: section; and also removing a small quirk in the 0.6.2 parser, which is that option-defaults can occur before the usage: section. (But this partitioning provides access to the text before the usage section, so this behaviour could be retained if desired.) --- docopt/__init__.py | 45 ++++++++++++++++++- tests/test_docopt.py | 105 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index 2ead853..89cd27f 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -27,7 +27,7 @@ import re import inspect -from typing import Any, Callable, cast, Type, Tuple, Union +from typing import Any, Callable, NamedTuple, cast, Type, Tuple, Union __all__ = ["docopt", "magic_docopt", "magic", "DocoptExit"] __version__ = "0.8.1" @@ -709,6 +709,49 @@ def isanumber(x): return parsed +class DocSections(NamedTuple): + before_usage: str + usage_header: str + usage_body: str + after_usage: str + + +def parse_docstring_sections(docstring: str) -> DocSections: + """Partition the docstring into the main sections. + + The docstring is returned, split into a tuple of 4 pieces: text before the + usage section, the usage section header, the usage section body and text + following the usage section. + """ + usage_pattern = r""" + # Any number of lines precede the usage section + \A(?P(?:.*\n)*?) + # The `usage:` section header. + ^(?P.*usage:) + (?P + # The first line of the body may follow the header without a line break: + (?: + # Some non-whitespace content + [ \t]*\S.*(?:\n|\Z) + # Or after a newline, followed by indentation + |(?:[ \t]*\n[ \t].*(?:\n|\Z)) + ) + # Any number of additional indented lines + (?:[ \t].*(?:\n|\Z))* + ) + # Everything else + (?P(?:.|\n)*)\Z + """ + match = re.match(usage_pattern, docstring, flags=re.M | re.I | re.VERBOSE) + if not match: + raise DocoptLanguageError( + 'Failed to parse doc: "usage:" section (case-insensitive) not found. ' + "Check http://docopt.org/ for examples of how your doc should look." + ) + before, header, body, after = match.groups() + return DocSections(before, header, body, after) + + def parse_defaults(docstring: str) -> list[Option]: defaults = [] for s in parse_section("options:", docstring): diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 6420698..e548fc6 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -1,4 +1,7 @@ from __future__ import with_statement +import re +from textwrap import dedent + from docopt import ( docopt, DocoptExit, @@ -12,6 +15,7 @@ Either, OneOrMore, parse_argv, + parse_docstring_sections, parse_pattern, parse_section, parse_defaults, @@ -19,6 +23,7 @@ Tokens, transform, ) +import pytest from pytest import raises @@ -728,3 +733,103 @@ def test_parse_section(): def test_issue_126_defaults_not_parsed_correctly_when_tabs(): section = "Options:\n\t--foo= [default: bar]" assert parse_defaults(section) == [Option(None, "--foo", 1, "bar")] + + +@pytest.mark.parametrize( + "before", + [ + pytest.param("", id="empty"), + pytest.param("This is a prog\n", id="1line"), + pytest.param("This is a prog\n\nInfo:\n Blah blah\n", id="preceding_sections"), + ], +) +@pytest.mark.parametrize( + "header", + [ + pytest.param("usage:", id="simple"), + pytest.param("uSaGe:", id="odd_case"), + pytest.param("My Program's Usage:", id="long"), + pytest.param(" Indented Usage:", id="indented"), + ], +) +@pytest.mark.parametrize( + "body", + [ + pytest.param("prog [options]", id="simple"), + pytest.param(" prog [options]", id="space_simple"), + pytest.param("\tprog [options]", id="tab_simple"), + pytest.param(" \t prog [options]", id="WS_simple"), + pytest.param("\n prog [options]", id="LF_simple"), + pytest.param("\n prog [options]\n", id="LF_simple_LF"), + pytest.param("prog [options] cmd1\n prog [options] cmd2\n", id="multiple_LF"), + pytest.param("\n prog [options] cmd1\n prog [options] cmd2", id="LF_multiple"), + pytest.param( + "\n prog [options] cmd1\n prog [options] cmd2\n", id="LF_multiple_LF" + ), + pytest.param( + """\ + prog [options] cmd1 + [--foo --bar] + [--baz --boz] + prog [options] cmd2 +""", + id="wrapped_arguments", + ), + ], +) +@pytest.mark.parametrize( + "after", + [ + pytest.param("", id="empty"), + pytest.param("This can be\nany content.\n", id="text"), + pytest.param("Options: -a All", id="single_line"), + ], +) +def test_parse_docstring_sections(before: str, header: str, body: str, after: str): + if after and not body.endswith("\n"): + body = body + "\n" + assert parse_docstring_sections(before + header + body + after) == ( + (before, header, body, after) + ) + + +@pytest.mark.parametrize( + "invalid_docstring", + [ + pytest.param("", id="empty"), + pytest.param( + """\ + This doc has no usage heading + + myprog [options] + + Options: + --foo + --bar + """, + id="no_usage_heading", + ), + pytest.param( + """\ + This doc has a blank line after the usage heading + + Usage: + + myprog [options] + + Options: + --foo + --bar + """, + id="blank_line_after_usage_heading", + ), + ], +) +def test_parse_docstring_sections__reports_invalid_docstrings(invalid_docstring: str): + with pytest.raises( + DocoptLanguageError, + match=re.escape( + 'Failed to parse doc: "usage:" section (case-insensitive) not found' + ), + ): + parse_docstring_sections(dedent(invalid_docstring)) From bb13708f50ebd23e4f023962b4f194a8a33ab9c3 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Thu, 25 Aug 2022 14:31:34 +0000 Subject: [PATCH 03/14] Implement backwards compatible option parsing parse_options() parses option descriptions in a way that is compatible with docopt 0.6.2, while also compatible with docopt-ng's current behaviour. The differences are: - docopt-ng requires option descriptions to be in an "options:" section, docopt allows them to be anywhere outside the "usage:" section. - docopt-ng requires options descriptions have leading whitespace, docopt allows them to start at column 0. - docopt-ng allows options descriptions to begin on the same line as a section heading, docopt does not. e.g. `options: --foo` is OK with docopt-ng. parse_options() parses options following either docopt or docopt-ng's behaviour. Although it expects the `docstring` argument to be a sub-section of the overall docstring, so the caller is in control of where in the docstring options are parsed from. --- docopt/__init__.py | 34 ++++++++++++++++ tests/test_docopt.py | 96 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 4 deletions(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index 89cd27f..33a77c6 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -752,6 +752,40 @@ def parse_docstring_sections(docstring: str) -> DocSections: return DocSections(before, header, body, after) +def parse_options(docstring: str) -> list[Option]: + """Parse the option descriptions from the help text. + + `docstring` is the sub-section of the overall docstring that option + descriptions should be parsed from. It must not contain the "usage:" + section, as wrapped lines in the usage pattern can be misinterpreted as + option descriptions. + + Option descriptions appear below the usage patterns, They define synonymous + long and short options, options that have arguments, and the default values + of options' arguments. They look like this: + + ``` + -v, --verbose Be more verbose + -n COUNT, --number COUNT The number of times to + do the thing [default: 42] + ``` + """ + option_start = r""" + # Option descriptions begin on a new line + ^ + # They may be occur on the same line as an options: section heading + (?:.*options:)? + # They can be indented with whitespace + [ \t]* + # The description itself starts with the short or long flag (-x or --xxx) + (-\S+?) + """ + parts = re.split(option_start, docstring, flags=re.M | re.I | re.VERBOSE)[1:] + return [ + Option.parse(start + rest) for (start, rest) in zip(parts[0::2], parts[1::2]) + ] + + def parse_defaults(docstring: str) -> list[Option]: defaults = [] for s in parse_section("options:", docstring): diff --git a/tests/test_docopt.py b/tests/test_docopt.py index e548fc6..4b3c0bf 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -1,4 +1,5 @@ -from __future__ import with_statement +from __future__ import annotations, with_statement +from typing import Sequence import re from textwrap import dedent @@ -16,6 +17,7 @@ OneOrMore, parse_argv, parse_docstring_sections, + parse_options, parse_pattern, parse_section, parse_defaults, @@ -730,9 +732,95 @@ def test_parse_section(): ] -def test_issue_126_defaults_not_parsed_correctly_when_tabs(): - section = "Options:\n\t--foo= [default: bar]" - assert parse_defaults(section) == [Option(None, "--foo", 1, "bar")] +option_examples: Sequence[tuple[str, Sequence[Option]]] = [ + ("", []), + ("Some content\nbefore the first option.", []), + ("-f", [Option("-f", None, 0, False)]), + ("-f Description.", [Option("-f", None, 0, False)]), + ("-f ARG Description.", [Option("-f", None, 1, None)]), + ("-f ARG Description. [default: 42]", [Option("-f", None, 1, "42")]), + ("--foo", [Option(None, "--foo", 0, False)]), + ("--foo Description.", [Option(None, "--foo", 0, False)]), + ("--foo ARG Description.", [Option(None, "--foo", 1, None)]), + ("--foo ARG Description. [default: 42]", [Option(None, "--foo", 1, "42")]), + # Options can wrap over multiple lines + ( + """\ + \t --foo ARG, -f ARG With a long + + wrapped description + \t [default: 42] + """, + [Option("-f", "--foo", 1, "42")], + ), + # Options can start after whitespace + ( + "\t--foo= [default: bar]", + [Option(None, "--foo", 1, "bar")], + ), + ( + " \t -f ARG, --foo ARG Description. [default: 42]", + [Option("-f", "--foo", 1, "42")], + ), + # Options can start on the same line as an "options:" heading + ( + "options:-f ARG, --foo ARG Description. [default: 42]", + [Option("-f", "--foo", 1, "42")], + ), + ( + " Special oPtioNs: --foo ARG Description. [default: 42]", + [Option(None, "--foo", 1, "42")], + ), + ( + " other options: --foo ARG Description. [default: 42]", + [Option(None, "--foo", 1, "42")], + ), + ( + """\ + -a This is the first option + + -b= Options don't have to be in an options section + + Options: + -c, --charlie This describes the option. + --delta, -d + This option has the desc on another line. + + --echo This option starts after a blank line. + + -f --foxtrot This option has no comma + + Other Options: + -g VAL This option is after another section heading. + [default: gval] + options:-h This option is on the same line as a heading + oPtioNs:--india + oPtIons: -j X + + [default: jval] + and more Options: --k X [default: kval] + """, + [ + Option("-a", None, 0, False), + Option("-b", None, 1, None), + Option("-c", "--charlie", 0, False), + Option("-d", "--delta", 0, False), + Option(None, "--echo", 0, False), + Option("-f", "--foxtrot", 0, False), + Option("-g", None, 1, "gval"), + Option("-h", None, 0, False), + Option(None, "--india", 0, False), + Option("-j", None, 1, "jval"), + Option(None, "--k", 1, "kval"), + ], + ), +] +option_examples = [(dedent(doc), options) for (doc, options) in option_examples] + + +@pytest.mark.parametrize("descriptions, options", option_examples) +def test_parse_options(descriptions, options): + assert parse_options(descriptions) == options @pytest.mark.parametrize( From e131f0a6530ca9f9848915e7c52b44f07f4a2d0a Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Mon, 22 Aug 2022 16:12:01 +0000 Subject: [PATCH 04/14] Restore compatibility with docopt 0.6.2 docstrings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit uses parse_docstring_sections() and parse_options() to parse docstrings accepted by docopt 0.6.2, while retaining docopt-ng's improvements to supported syntax. Currently, docopt-ng parses option-defaults using a strategy that was in docopt's master branch, but considered unstable by the author, and was not released in docopt. It looks for option descriptions in an "options:" section, which is ended on the first blank line. This has the side-effect that options defined in a man-page style — with blank lines in-between — are not found. Neither are options outside an options: section (docopt allows options to follow the usage with no section heading). parse_docstring_sections() is used to separate the usage section from the rest of the docstring. The text before the usage is ignored. The usage body (without its header) is parsed for the argument pattern and the usage header with its body is used to print the usage summary help. The text following the usage is parsed for options descriptions, using parse_options(), which supports option the description syntax of both docopt and the current docopt-ng. Note that docopt 0.6.2 recognises option descriptions in the text prior to the usage section, but this change does not, as it seems like an unintended side-effect of the previous parser's implementation, and seems unlikely to be used in practice. The testcases have two cases added for docopt 0.6.2 compatibility. This fixes https://github.com/jazzband/docopt-ng/issues/33 --- docopt/__init__.py | 72 +++++++++++------------------------- tests/test_docopt.py | 83 ++++++++++++++++++++++-------------------- tests/testcases.docopt | 30 ++++++++++++++- 3 files changed, 93 insertions(+), 92 deletions(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index 33a77c6..9645c5e 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -786,39 +786,24 @@ def parse_options(docstring: str) -> list[Option]: ] -def parse_defaults(docstring: str) -> list[Option]: - defaults = [] - for s in parse_section("options:", docstring): - options_literal, _, s = s.partition(":") - if " " in options_literal: - _, _, options_literal = options_literal.partition(" ") - assert options_literal.lower().strip() == "options" - split = re.split(r"\n[ \t]*(-\S+?)", "\n" + s)[1:] - split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])] - for s in split: - if s.startswith("-"): - arg, _, description = s.partition(" ") - flag, _, var = arg.replace("=", " ").partition(" ") - option = Option.parse(s) - defaults.append(option) - return defaults - - -def parse_section(name: str, source: str) -> list[str]: - pattern = re.compile( - "^([^\n]*" + name + "[^\n]*\n?(?:[ \t].*?(?:\n|$))*)", - re.IGNORECASE | re.MULTILINE, - ) - r = [ - s.strip() for s in pattern.findall(source) if s.strip().lower() != name.lower() - ] - return r +def lint_docstring(sections: DocSections): + """Report apparent mistakes in the docstring format.""" + if re.search("options:", sections.usage_body, flags=re.I): + raise DocoptLanguageError( + 'Failed to parse docstring: "options:" (case-insensitive) was ' + 'found in "usage:" section. Use a blank line after the usage, or ' + "start the next section without leading whitespace." + ) + if re.search("usage:", sections.usage_body + sections.after_usage, flags=re.I): + raise DocoptLanguageError( + 'Failed to parse docstring: More than one "usage:" ' + "(case-insensitive) section found." + ) -def formal_usage(section: str) -> str: - _, _, section = section.partition(":") # drop "usage:" - pu = section.split() - return "( " + " ".join(") | (" if s == pu[0] else s for s in pu[1:]) + " )" +def formal_usage(usage: str) -> str: + program_name, *tokens = usage.split() + return "( " + " ".join(") | (" if s == program_name else s for s in tokens) + " )" def extras( @@ -958,28 +943,15 @@ def docopt( or MAYBE_STORE.opname.startswith("RETURN") ): output_value_assigned = True - usage_sections = parse_section("usage:", docstring) - if len(usage_sections) == 0: - raise DocoptLanguageError( - '"usage:" section (case-insensitive) not found. ' - "Perhaps missing indentation?" - ) - if len(usage_sections) > 1: - raise DocoptLanguageError('More than one "usage:" (case-insensitive).') - options_pattern = re.compile(r"\n\s*?options:", re.IGNORECASE) - if options_pattern.search(usage_sections[0]): - raise DocoptExit( - "Warning: options (case-insensitive) was found in usage." - "Use a blank line between each section.." - ) - DocoptExit.usage = usage_sections[0] - options = parse_defaults(docstring) - pattern = parse_pattern(formal_usage(DocoptExit.usage), options) + sections = parse_docstring_sections(docstring) + lint_docstring(sections) + DocoptExit.usage = sections.usage_header + sections.usage_body + options = parse_options(sections.after_usage) + pattern = parse_pattern(formal_usage(sections.usage_body), options) pattern_options = set(pattern.flat(Option)) for options_shortcut in pattern.flat(OptionsShortcut): - doc_options = parse_defaults(docstring) options_shortcut.children = [ - opt for opt in doc_options if opt not in pattern_options + opt for opt in options if opt not in pattern_options ] parsed_arg_vector = parse_argv( Tokens(argv), list(options), options_first, more_magic diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 4b3c0bf..67b0df8 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -15,12 +15,11 @@ NotRequired, Either, OneOrMore, + lint_docstring, parse_argv, parse_docstring_sections, parse_options, parse_pattern, - parse_section, - parse_defaults, formal_usage, Tokens, transform, @@ -98,9 +97,9 @@ def test_formal_usage(): prog N M prog is a program.""" - (usage,) = parse_section("usage:", doc) - assert usage == "Usage: prog [-hv] ARG\n prog N M" - assert formal_usage(usage) == "( [-hv] ARG ) | ( N M )" + _, _, usage_body, _ = parse_docstring_sections(doc) + assert usage_body == " prog [-hv] ARG\n prog N M\n" + assert formal_usage(usage_body) == "( [-hv] ARG ) | ( N M )" def test_parse_argv(): @@ -697,41 +696,6 @@ def test_issue_71_double_dash_is_not_a_valid_option_argument(): ) -usage = """usage: this - -usage:hai -usage: this that - -usage: foo - bar - -PROGRAM USAGE: - foo - bar -usage: -\ttoo -\ttar -Usage: eggs spam -BAZZ -usage: pit stop""" - - -def test_parse_section(): - assert parse_section("usage:", "foo bar fizz buzz") == [] - assert parse_section("usage:", "usage: prog") == ["usage: prog"] - assert parse_section("usage:", "usage: -x\n -y") == ["usage: -x\n -y"] - assert parse_section("usage:", usage) == [ - "usage: this", - "usage:hai", - "usage: this that", - "usage: foo\n bar", - "PROGRAM USAGE:\n foo\n bar", - "usage:\n\ttoo\n\ttar", - "Usage: eggs spam", - "usage: pit stop", - ] - - option_examples: Sequence[tuple[str, Sequence[Option]]] = [ ("", []), ("Some content\nbefore the first option.", []), @@ -921,3 +885,42 @@ def test_parse_docstring_sections__reports_invalid_docstrings(invalid_docstring: ), ): parse_docstring_sections(dedent(invalid_docstring)) + + +@pytest.mark.parametrize( + "doc, error_message", + [ + pytest.param( + """\ + My prog. + + Usage: + myprog [options] + Options: + --foo + --bar + """, + 'Failed to parse docstring: "options:" (case-insensitive) was ' + 'found in "usage:" section.', + id="options_in_usage", + ), + pytest.param( + """\ + My prog. + + Usage: + myprog [options] + + More Usage: + Blah blah. + """, + 'Failed to parse docstring: More than one "usage:" ' + "(case-insensitive) section found.", + id="multiple_usage_sections", + ), + ], +) +def test_lint_docstring(doc: str, error_message: str): + doc_sections = parse_docstring_sections(dedent(doc)) + with pytest.raises(DocoptLanguageError, match=re.escape(error_message)): + lint_docstring(doc_sections) diff --git a/tests/testcases.docopt b/tests/testcases.docopt index efe9a07..87ca7c3 100644 --- a/tests/testcases.docopt +++ b/tests/testcases.docopt @@ -950,8 +950,34 @@ local options: --baz other options: --egg --spam --not-an-option- - """ $ prog --baz --egg {"--foo": false, "--baz": true, "--bar": false, "--egg": true, "--spam": false} + +# +# docopt 0.6.2 compatibility: Blank line in options section +# https://github.com/jazzband/docopt-ng/issues/33 +# +r"""Usage: prog [options] + + -h, --help + -v, --verbose be verbose + + -i, --interactive interactive picking + -p, --patch select hunks interactively +""" +$ prog --interactive +{"--help": false, "--verbose": false, "--interactive": true, "--patch": false} + +# +# docopt 0.6.2 compatibility: Options without leading whitespace +# +r"""Usage: prog [options] + +--alpha +-b, --bravo +-c ARG, --charlie ARG Something [default: foo] +""" + +$ prog +{"--alpha": false, "--bravo": false, "--charlie": "foo"} From da4d736e9aee7ab5e0e61720d5f85c593bd2f408 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Wed, 7 Sep 2022 09:54:17 +0000 Subject: [PATCH 05/14] test: fail if testcases.docopt examples have no tests This implements Nick's suggestion from #36. When generating tests from testcases.docopt, we now raise an error if an example doc has no testcases following it to actually exercise the example. Also, tests with invalid JSON testcases are reported using the same error reporting mechanism. --- tests/conftest.py | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c51d368..24d3a66 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,14 +16,37 @@ def parse_test(raw: str): if raw.startswith('"""'): raw = raw[3:] - for fixture in raw.split('r"""'): - doc, _, body = fixture.partition('"""') - cases = [] - for case in body.split("$")[1:]: - argv, _, expect = case.strip().partition("\n") - expect = json.loads(expect) - prog, _, argv = argv.strip().partition(" ") - cases.append((prog, argv, expect)) + for i, fixture in enumerate(raw.split('r"""')): + if i == 0: + if not fixture.strip() == "": + raise DocoptTestException( + f"Unexpected content before first testcase: {fixture}" + ) + continue + + try: + doc, _, body = fixture.partition('"""') + cases = [] + for case in body.split("$")[1:]: + argv, _, expect = case.strip().partition("\n") + try: + expect = json.loads(expect) + except json.JSONDecodeError as e: + raise DocoptTestException( + f"The test case JSON is invalid: {expect!r} - {e}." + ) + prog, _, argv = argv.strip().partition(" ") + cases.append((prog, argv, expect)) + if len(cases) == 0: + raise DocoptTestException( + "No test cases follow the doc. Each example must have at " + "least one test case starting with '$'" + ) + except Exception as e: + raise DocoptTestException( + f"Failed to parse test case {i}. {e}\n" + f'The test\'s definition is:\nr"""{fixture}' + ) from None yield doc, cases From 9f850d148d3b36360c498d2fb3f241967a8cc3e6 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Wed, 7 Sep 2022 10:55:10 +0000 Subject: [PATCH 06/14] fix: handle options with descriptions on new line This fixes a pre-existing bug in docopt-ng that wasn't previously triggered by a test. Options in the options: section without a description on the same line and with content on the following line (e.g. a line-wrapped description) would be parsed as if the content on the following line was part of the option name & arguments. Option.parse() now ends the option name & arguments section on the end of the line, not just on two consecutive spaces. --- docopt/__init__.py | 4 +++- tests/test_docopt.py | 20 ++++++++++++++++++++ tests/testcases.docopt | 5 +++-- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index 9645c5e..d9f7803 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -303,7 +303,9 @@ def __init__( @classmethod def parse(class_, option_description: str) -> Option: short, longer, argcount, value = None, None, 0, False - options, _, description = option_description.strip().partition(" ") + options, description = re.split( + r"(?: )|$", option_description.strip(), flags=re.M, maxsplit=1 + ) options = options.replace(",", " ").replace("=", " ") for s in options.split(): if s.startswith("--"): diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 67b0df8..6374098 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -778,6 +778,26 @@ def test_issue_71_double_dash_is_not_a_valid_option_argument(): Option(None, "--k", 1, "kval"), ], ), + # Option with description (or other content) on following line. + ( + """ + Options: + -a + -b + description of b + -c + Other Options: + -d + Other Options:-e + """, + [ + Option("-a", None, 0, False), + Option("-b", None, 0, False), + Option("-c", None, 0, False), + Option("-d", None, 0, False), + Option("-e", None, 0, False), + ], + ), ] option_examples = [(dedent(doc), options) for (doc, options) in option_examples] diff --git a/tests/testcases.docopt b/tests/testcases.docopt index 87ca7c3..599983c 100644 --- a/tests/testcases.docopt +++ b/tests/testcases.docopt @@ -949,10 +949,11 @@ local options: --baz --bar other options: --egg + wrapped description of egg. --spam """ -$ prog --baz --egg -{"--foo": false, "--baz": true, "--bar": false, "--egg": true, "--spam": false} +$ prog --baz --spam +{"--foo": false, "--baz": true, "--bar": false, "--egg": false, "--spam": true} # # docopt 0.6.2 compatibility: Blank line in options section From 13f2c9daf9c93ea53337cd6e82f201f2b0dd658e Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Tue, 6 Sep 2022 17:06:31 +0000 Subject: [PATCH 07/14] refactor: avoid non-greedy matching in parse_docstring_sections It's a bit of an obscure and non-obvious regex feature. The regex uses a negative lookahead assertion now, which is still a little odd, but it's clearer I think. --- docopt/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index d9f7803..cd3596d 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -726,8 +726,8 @@ def parse_docstring_sections(docstring: str) -> DocSections: following the usage section. """ usage_pattern = r""" - # Any number of lines precede the usage section - \A(?P(?:.*\n)*?) + # Any number of lines (that don't include usage:) precede the usage section + \A(?P(?:(?!.*\busage:).*\n)*) # The `usage:` section header. ^(?P.*usage:) (?P From 33c85287e85a1eb4ddc54577954dd9f329da1a54 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Tue, 6 Sep 2022 17:14:58 +0000 Subject: [PATCH 08/14] fix: don't match usage-suffix words as usage sections The usage section heading now needs to have a word break before usage:, so e.g sausage: doesn't match as a usage: section. --- docopt/__init__.py | 2 +- tests/test_docopt.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index cd3596d..41423ee 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -729,7 +729,7 @@ def parse_docstring_sections(docstring: str) -> DocSections: # Any number of lines (that don't include usage:) precede the usage section \A(?P(?:(?!.*\busage:).*\n)*) # The `usage:` section header. - ^(?P.*usage:) + ^(?P.*\busage:) (?P # The first line of the body may follow the header without a line break: (?: diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 6374098..86b4231 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -812,7 +812,12 @@ def test_parse_options(descriptions, options): [ pytest.param("", id="empty"), pytest.param("This is a prog\n", id="1line"), - pytest.param("This is a prog\n\nInfo:\n Blah blah\n", id="preceding_sections"), + pytest.param( + "This is a prog\n\nInfo:\n Blah blah\n\n" + # contains usage: but not a usage section + "Ingredients in pork sausage:\nBlah blah\n", + id="preceding_sections", + ), ], ) @pytest.mark.parametrize( From 428cda50a6b7ecb8642f3a9ac4d6b401700b23d8 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Tue, 6 Sep 2022 17:17:16 +0000 Subject: [PATCH 09/14] chore: fix typo --- docopt/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index 41423ee..c39d64f 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -775,7 +775,7 @@ def parse_options(docstring: str) -> list[Option]: option_start = r""" # Option descriptions begin on a new line ^ - # They may be occur on the same line as an options: section heading + # They may occur on the same line as an options: section heading (?:.*options:)? # They can be indented with whitespace [ \t]* From 7e3117b4cd6b2f27bcd6d4ea5c9ada3160693468 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Tue, 6 Sep 2022 17:19:17 +0000 Subject: [PATCH 10/14] refactor: remove unnecessary non-greedy regex modifier --- docopt/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index c39d64f..1535007 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -780,7 +780,7 @@ def parse_options(docstring: str) -> list[Option]: # They can be indented with whitespace [ \t]* # The description itself starts with the short or long flag (-x or --xxx) - (-\S+?) + (-\S) """ parts = re.split(option_start, docstring, flags=re.M | re.I | re.VERBOSE)[1:] return [ From efd60176999d4333f2653be4a5f306b8df16dd1b Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Wed, 7 Sep 2022 08:12:44 +0000 Subject: [PATCH 11/14] refactor: reject empty usage sections in lint_docstring lint_docstring() now checks for the usage_body being empty, and fails with a message indicating this. --- docopt/__init__.py | 5 +++++ tests/test_docopt.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/docopt/__init__.py b/docopt/__init__.py index 1535007..80ae7c6 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -801,6 +801,11 @@ def lint_docstring(sections: DocSections): 'Failed to parse docstring: More than one "usage:" ' "(case-insensitive) section found." ) + if sections.usage_body.strip() == "": + raise DocoptLanguageError( + 'Failed to parse docstring: "usage:" section is empty.' + "Check http://docopt.org/ for examples of how your doc should look." + ) def formal_usage(usage: str) -> str: diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 86b4231..3c2afc3 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -943,6 +943,24 @@ def test_parse_docstring_sections__reports_invalid_docstrings(invalid_docstring: "(case-insensitive) section found.", id="multiple_usage_sections", ), + pytest.param( + """\ + This docstring has nothing in its usage. + + Usage:""", + 'Failed to parse docstring: "usage:" section is empty.', + id="empty_usage_section", + ), + pytest.param( + """\ + This docstring has only whitespace in its usage. + + Usage: + + Options:""", + 'Failed to parse docstring: "usage:" section is empty.', + id="whitespace_usage_section", + ), ], ) def test_lint_docstring(doc: str, error_message: str): From d8c22774335ad62774c90cab3f46a7ebe8aaef86 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Wed, 7 Sep 2022 06:28:14 +0000 Subject: [PATCH 12/14] refactor: simplify usage section regex parse_docstring_sections() is no longer responsible for rejecting docs with empty usage sections. Previously it did so to some extent, but failed to reject whitespace-only usage bodies. lint_docstring() now rejects empty usage bodies, which allows the regex's usage_body group to be simplified. The removed test for empty usage bodies is tested via test_lint_docstring. --- docopt/__init__.py | 7 +------ tests/test_docopt.py | 14 -------------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index 80ae7c6..d248eec 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -732,12 +732,7 @@ def parse_docstring_sections(docstring: str) -> DocSections: ^(?P.*\busage:) (?P # The first line of the body may follow the header without a line break: - (?: - # Some non-whitespace content - [ \t]*\S.*(?:\n|\Z) - # Or after a newline, followed by indentation - |(?:[ \t]*\n[ \t].*(?:\n|\Z)) - ) + (?:.*(?:\n|\Z)) # Any number of additional indented lines (?:[ \t].*(?:\n|\Z))* ) diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 3c2afc3..129c280 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -886,20 +886,6 @@ def test_parse_docstring_sections(before: str, header: str, body: str, after: st """, id="no_usage_heading", ), - pytest.param( - """\ - This doc has a blank line after the usage heading - - Usage: - - myprog [options] - - Options: - --foo - --bar - """, - id="blank_line_after_usage_heading", - ), ], ) def test_parse_docstring_sections__reports_invalid_docstrings(invalid_docstring: str): From 046c3278968a9ce254c4ea9d23951718feec8c2d Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Wed, 7 Sep 2022 12:11:56 +0000 Subject: [PATCH 13/14] fix: allow option descriptions before the usage: section Docopt 0.6.2 allows option descriptions anywhere in the doc, not just after the usage: section. With this change, we parse both the section prior to the usage: and the section after for option descriptions. --- docopt/__init__.py | 5 ++++- tests/test_docopt.py | 29 +++++++++++++++++++++++++++++ tests/testcases.docopt | 16 ++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/docopt/__init__.py b/docopt/__init__.py index d248eec..f92b1f2 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -948,7 +948,10 @@ def docopt( sections = parse_docstring_sections(docstring) lint_docstring(sections) DocoptExit.usage = sections.usage_header + sections.usage_body - options = parse_options(sections.after_usage) + options = [ + *parse_options(sections.before_usage), + *parse_options(sections.after_usage), + ] pattern = parse_pattern(formal_usage(sections.usage_body), options) pattern_options = set(pattern.flat(Option)) for options_shortcut in pattern.flat(OptionsShortcut): diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 129c280..adc0eee 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -563,6 +563,35 @@ def test_docopt(): docopt(doc, "--hel") +@pytest.mark.parametrize( + "args, before_usage_val", [("", None), ("--before-usage=2", "2")] +) +def test_docopt__usage_descriptions_cant_bridge_usage_section( + args: str, before_usage_val: str | None +): + # For compatibility with docopt 0.6.2 we support option descriptions + # before the usage and after (but not inside usage). However, a + # description cannot start in one part and continue in the next. + # i.e. the default value after Usage does not apply to + # --before-usage + usage = """\ +My prog + +--before-usage VAL + +Usage: + prog [options] + +[default: 42] +Options: + --after-usage +""" + assert docopt(usage, args) == { + "--before-usage": before_usage_val, + "--after-usage": False, + } + + def test_language_errors(): with raises(DocoptLanguageError): docopt("no usage with colon here") diff --git a/tests/testcases.docopt b/tests/testcases.docopt index 599983c..4336dd4 100644 --- a/tests/testcases.docopt +++ b/tests/testcases.docopt @@ -982,3 +982,19 @@ r"""Usage: prog [options] $ prog {"--alpha": false, "--bravo": false, "--charlie": "foo"} + +# +# docopt 0.6.2 compatibility: Options anywhere in doc +# +r"""My CLI program + +--speed Is allowed to be defined here in docopt 0.6.2 + [default: 9000] +-e, --extra-speed + +usage: prog [options] +options: + --direction +""" +$ prog --direction -e +{"--direction": true, "--extra-speed": true, "--speed": "9000"} From abf13dda5c07a79dc6ee2624f70848039b2e0a1c Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Thu, 8 Sep 2022 08:53:48 +0000 Subject: [PATCH 14/14] test: add examples of options not being parsed This adds another case for test_parse_options demonstrating how parse_options() parses docs which contain both options and text that mentions options that should not be interpreted as options. This includes an example (the '-b' option) where we do currently parse an option mentioned in prose as a real option. In principle we could fix this, but it's here for now to document current behaviour. And it's not hard to work around when writing a usage doc - just need to adjust where a line is wrapped. --- tests/test_docopt.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/test_docopt.py b/tests/test_docopt.py index adc0eee..df78572 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -827,6 +827,41 @@ def test_issue_71_double_dash_is_not_a_valid_option_argument(): Option("-e", None, 0, False), ], ), + # Option-like things which aren't actually options + ( + """ + --option1 This really is an option. + And it has a default [default: 42] + + Talking about options: + Here we're talking about options and defaults, like [default: 3] and + options such as --foo, but we're not intending to define them. And + although the default of 3 I just mentioned does not get picked up as + the default of --option1, defined above. + + But if we happen to start a line of our prose with an option, like + -b then we are unfortunately defining an option. And "then" acts as + an argument for -b, so it accepts an argument. + + Options are also allowed to start on the same line as an option + heading, so this is an option: + options: --option2 + + And this also works after several words, so options: --option3 is + also an option. But options after other heading-like things aren't + picked up, so this isn't an option: + things: --not-an-option + + -o, --option4 This is also a real option + """, + [ + Option(None, "--option1", 1, "42"), + Option("-b", None, 1, None), + Option(None, "--option2", 0, False), + Option(None, "--option3", 0, False), + Option("-o", "--option4", 1, None), + ], + ), ] option_examples = [(dedent(doc), options) for (doc, options) in option_examples]