diff --git a/docopt/__init__.py b/docopt/__init__.py index 2ead853..f92b1f2 100644 --- a/docopt/__init__.py +++ b/docopt/__init__.py @@ -27,7 +27,7 @@ import re import inspect -from typing import Any, Callable, cast, Type, Tuple, Union +from typing import Any, Callable, NamedTuple, cast, Type, Tuple, Union __all__ = ["docopt", "magic_docopt", "magic", "DocoptExit"] __version__ = "0.8.1" @@ -303,7 +303,9 @@ def __init__( @classmethod def parse(class_, option_description: str) -> Option: short, longer, argcount, value = None, None, 0, False - options, _, description = option_description.strip().partition(" ") + options, description = re.split( + r"(?: )|$", option_description.strip(), flags=re.M, maxsplit=1 + ) options = options.replace(",", " ").replace("=", " ") for s in options.split(): if s.startswith("--"): @@ -709,39 +711,101 @@ def isanumber(x): return parsed -def parse_defaults(docstring: str) -> list[Option]: - defaults = [] - for s in parse_section("options:", docstring): - options_literal, _, s = s.partition(":") - if " " in options_literal: - _, _, options_literal = options_literal.partition(" ") - assert options_literal.lower().strip() == "options" - split = re.split(r"\n[ \t]*(-\S+?)", "\n" + s)[1:] - split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])] - for s in split: - if s.startswith("-"): - arg, _, description = s.partition(" ") - flag, _, var = arg.replace("=", " ").partition(" ") - option = Option.parse(s) - defaults.append(option) - return defaults - - -def parse_section(name: str, source: str) -> list[str]: - pattern = re.compile( - "^([^\n]*" + name + "[^\n]*\n?(?:[ \t].*?(?:\n|$))*)", - re.IGNORECASE | re.MULTILINE, +class DocSections(NamedTuple): + before_usage: str + usage_header: str + usage_body: str + after_usage: str + + +def parse_docstring_sections(docstring: str) -> DocSections: + """Partition the docstring into the main sections. + + The docstring is returned, split into a tuple of 4 pieces: text before the + usage section, the usage section header, the usage section body and text + following the usage section. + """ + usage_pattern = r""" + # Any number of lines (that don't include usage:) precede the usage section + \A(?P(?:(?!.*\busage:).*\n)*) + # The `usage:` section header. + ^(?P.*\busage:) + (?P + # The first line of the body may follow the header without a line break: + (?:.*(?:\n|\Z)) + # Any number of additional indented lines + (?:[ \t].*(?:\n|\Z))* ) - r = [ - s.strip() for s in pattern.findall(source) if s.strip().lower() != name.lower() + # Everything else + (?P(?:.|\n)*)\Z + """ + match = re.match(usage_pattern, docstring, flags=re.M | re.I | re.VERBOSE) + if not match: + raise DocoptLanguageError( + 'Failed to parse doc: "usage:" section (case-insensitive) not found. ' + "Check http://docopt.org/ for examples of how your doc should look." + ) + before, header, body, after = match.groups() + return DocSections(before, header, body, after) + + +def parse_options(docstring: str) -> list[Option]: + """Parse the option descriptions from the help text. + + `docstring` is the sub-section of the overall docstring that option + descriptions should be parsed from. It must not contain the "usage:" + section, as wrapped lines in the usage pattern can be misinterpreted as + option descriptions. + + Option descriptions appear below the usage patterns, They define synonymous + long and short options, options that have arguments, and the default values + of options' arguments. They look like this: + + ``` + -v, --verbose Be more verbose + -n COUNT, --number COUNT The number of times to + do the thing [default: 42] + ``` + """ + option_start = r""" + # Option descriptions begin on a new line + ^ + # They may occur on the same line as an options: section heading + (?:.*options:)? + # They can be indented with whitespace + [ \t]* + # The description itself starts with the short or long flag (-x or --xxx) + (-\S) + """ + parts = re.split(option_start, docstring, flags=re.M | re.I | re.VERBOSE)[1:] + return [ + Option.parse(start + rest) for (start, rest) in zip(parts[0::2], parts[1::2]) ] - return r -def formal_usage(section: str) -> str: - _, _, section = section.partition(":") # drop "usage:" - pu = section.split() - return "( " + " ".join(") | (" if s == pu[0] else s for s in pu[1:]) + " )" +def lint_docstring(sections: DocSections): + """Report apparent mistakes in the docstring format.""" + if re.search("options:", sections.usage_body, flags=re.I): + raise DocoptLanguageError( + 'Failed to parse docstring: "options:" (case-insensitive) was ' + 'found in "usage:" section. Use a blank line after the usage, or ' + "start the next section without leading whitespace." + ) + if re.search("usage:", sections.usage_body + sections.after_usage, flags=re.I): + raise DocoptLanguageError( + 'Failed to parse docstring: More than one "usage:" ' + "(case-insensitive) section found." + ) + if sections.usage_body.strip() == "": + raise DocoptLanguageError( + 'Failed to parse docstring: "usage:" section is empty.' + "Check http://docopt.org/ for examples of how your doc should look." + ) + + +def formal_usage(usage: str) -> str: + program_name, *tokens = usage.split() + return "( " + " ".join(") | (" if s == program_name else s for s in tokens) + " )" def extras( @@ -881,28 +945,18 @@ def docopt( or MAYBE_STORE.opname.startswith("RETURN") ): output_value_assigned = True - usage_sections = parse_section("usage:", docstring) - if len(usage_sections) == 0: - raise DocoptLanguageError( - '"usage:" section (case-insensitive) not found. ' - "Perhaps missing indentation?" - ) - if len(usage_sections) > 1: - raise DocoptLanguageError('More than one "usage:" (case-insensitive).') - options_pattern = re.compile(r"\n\s*?options:", re.IGNORECASE) - if options_pattern.search(usage_sections[0]): - raise DocoptExit( - "Warning: options (case-insensitive) was found in usage." - "Use a blank line between each section.." - ) - DocoptExit.usage = usage_sections[0] - options = parse_defaults(docstring) - pattern = parse_pattern(formal_usage(DocoptExit.usage), options) + sections = parse_docstring_sections(docstring) + lint_docstring(sections) + DocoptExit.usage = sections.usage_header + sections.usage_body + options = [ + *parse_options(sections.before_usage), + *parse_options(sections.after_usage), + ] + pattern = parse_pattern(formal_usage(sections.usage_body), options) pattern_options = set(pattern.flat(Option)) for options_shortcut in pattern.flat(OptionsShortcut): - doc_options = parse_defaults(docstring) options_shortcut.children = [ - opt for opt in doc_options if opt not in pattern_options + opt for opt in options if opt not in pattern_options ] parsed_arg_vector = parse_argv( Tokens(argv), list(options), options_first, more_magic diff --git a/tests/conftest.py b/tests/conftest.py index c51d368..24d3a66 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,14 +16,37 @@ def parse_test(raw: str): if raw.startswith('"""'): raw = raw[3:] - for fixture in raw.split('r"""'): - doc, _, body = fixture.partition('"""') - cases = [] - for case in body.split("$")[1:]: - argv, _, expect = case.strip().partition("\n") - expect = json.loads(expect) - prog, _, argv = argv.strip().partition(" ") - cases.append((prog, argv, expect)) + for i, fixture in enumerate(raw.split('r"""')): + if i == 0: + if not fixture.strip() == "": + raise DocoptTestException( + f"Unexpected content before first testcase: {fixture}" + ) + continue + + try: + doc, _, body = fixture.partition('"""') + cases = [] + for case in body.split("$")[1:]: + argv, _, expect = case.strip().partition("\n") + try: + expect = json.loads(expect) + except json.JSONDecodeError as e: + raise DocoptTestException( + f"The test case JSON is invalid: {expect!r} - {e}." + ) + prog, _, argv = argv.strip().partition(" ") + cases.append((prog, argv, expect)) + if len(cases) == 0: + raise DocoptTestException( + "No test cases follow the doc. Each example must have at " + "least one test case starting with '$'" + ) + except Exception as e: + raise DocoptTestException( + f"Failed to parse test case {i}. {e}\n" + f'The test\'s definition is:\nr"""{fixture}' + ) from None yield doc, cases diff --git a/tests/test_docopt.py b/tests/test_docopt.py index 4d662f6..df78572 100644 --- a/tests/test_docopt.py +++ b/tests/test_docopt.py @@ -1,4 +1,8 @@ -from __future__ import with_statement +from __future__ import annotations, with_statement +from typing import Sequence +import re +from textwrap import dedent + from docopt import ( docopt, DocoptExit, @@ -11,14 +15,16 @@ NotRequired, Either, OneOrMore, + lint_docstring, parse_argv, + parse_docstring_sections, + parse_options, parse_pattern, - parse_section, - parse_defaults, formal_usage, Tokens, transform, ) +import pytest from pytest import raises @@ -91,9 +97,9 @@ def test_formal_usage(): prog N M prog is a program.""" - (usage,) = parse_section("usage:", doc) - assert usage == "Usage: prog [-hv] ARG\n prog N M" - assert formal_usage(usage) == "( [-hv] ARG ) | ( N M )" + _, _, usage_body, _ = parse_docstring_sections(doc) + assert usage_body == " prog [-hv] ARG\n prog N M\n" + assert formal_usage(usage_body) == "( [-hv] ARG ) | ( N M )" def test_parse_argv(): @@ -557,6 +563,35 @@ def test_docopt(): docopt(doc, "--hel") +@pytest.mark.parametrize( + "args, before_usage_val", [("", None), ("--before-usage=2", "2")] +) +def test_docopt__usage_descriptions_cant_bridge_usage_section( + args: str, before_usage_val: str | None +): + # For compatibility with docopt 0.6.2 we support option descriptions + # before the usage and after (but not inside usage). However, a + # description cannot start in one part and continue in the next. + # i.e. the default value after Usage does not apply to + # --before-usage + usage = """\ +My prog + +--before-usage VAL + +Usage: + prog [options] + +[default: 42] +Options: + --after-usage +""" + assert docopt(usage, args) == { + "--before-usage": before_usage_val, + "--after-usage": False, + } + + def test_language_errors(): with raises(DocoptLanguageError): docopt("no usage with colon here") @@ -679,51 +714,306 @@ def test_issue_65_evaluate_argv_when_called_not_when_imported(): def test_issue_71_double_dash_is_not_a_valid_option_argument(): - with raises(DocoptExit): + with raises(DocoptExit, match=r"--log requires argument"): docopt("usage: prog [--log=LEVEL] [--] ...", "--log -- 1 2") - with raises(DocoptExit): + with raises(DocoptExit, match=r"-l requires argument"): docopt( - """usage: prog [-l LEVEL] [--] ... - options: -l LEVEL""", + """\ +usage: prog [-l LEVEL] [--] ... +options: -l LEVEL""", "-l -- 1 2", ) -usage = """usage: this - -usage:hai -usage: this that - -usage: foo - bar - -PROGRAM USAGE: - foo - bar -usage: -\ttoo -\ttar -Usage: eggs spam -BAZZ -usage: pit stop""" - - -def test_parse_section(): - assert parse_section("usage:", "foo bar fizz buzz") == [] - assert parse_section("usage:", "usage: prog") == ["usage: prog"] - assert parse_section("usage:", "usage: -x\n -y") == ["usage: -x\n -y"] - assert parse_section("usage:", usage) == [ - "usage: this", - "usage:hai", - "usage: this that", - "usage: foo\n bar", - "PROGRAM USAGE:\n foo\n bar", - "usage:\n\ttoo\n\ttar", - "Usage: eggs spam", - "usage: pit stop", - ] - - -def test_issue_126_defaults_not_parsed_correctly_when_tabs(): - section = "Options:\n\t--foo= [default: bar]" - assert parse_defaults(section) == [Option(None, "--foo", 1, "bar")] +option_examples: Sequence[tuple[str, Sequence[Option]]] = [ + ("", []), + ("Some content\nbefore the first option.", []), + ("-f", [Option("-f", None, 0, False)]), + ("-f Description.", [Option("-f", None, 0, False)]), + ("-f ARG Description.", [Option("-f", None, 1, None)]), + ("-f ARG Description. [default: 42]", [Option("-f", None, 1, "42")]), + ("--foo", [Option(None, "--foo", 0, False)]), + ("--foo Description.", [Option(None, "--foo", 0, False)]), + ("--foo ARG Description.", [Option(None, "--foo", 1, None)]), + ("--foo ARG Description. [default: 42]", [Option(None, "--foo", 1, "42")]), + # Options can wrap over multiple lines + ( + """\ + \t --foo ARG, -f ARG With a long + + wrapped description + \t [default: 42] + """, + [Option("-f", "--foo", 1, "42")], + ), + # Options can start after whitespace + ( + "\t--foo= [default: bar]", + [Option(None, "--foo", 1, "bar")], + ), + ( + " \t -f ARG, --foo ARG Description. [default: 42]", + [Option("-f", "--foo", 1, "42")], + ), + # Options can start on the same line as an "options:" heading + ( + "options:-f ARG, --foo ARG Description. [default: 42]", + [Option("-f", "--foo", 1, "42")], + ), + ( + " Special oPtioNs: --foo ARG Description. [default: 42]", + [Option(None, "--foo", 1, "42")], + ), + ( + " other options: --foo ARG Description. [default: 42]", + [Option(None, "--foo", 1, "42")], + ), + ( + """\ + -a This is the first option + + -b= Options don't have to be in an options section + + Options: + -c, --charlie This describes the option. + --delta, -d + This option has the desc on another line. + + --echo This option starts after a blank line. + + -f --foxtrot This option has no comma + + Other Options: + -g VAL This option is after another section heading. + [default: gval] + options:-h This option is on the same line as a heading + oPtioNs:--india + oPtIons: -j X + + [default: jval] + and more Options: --k X [default: kval] + """, + [ + Option("-a", None, 0, False), + Option("-b", None, 1, None), + Option("-c", "--charlie", 0, False), + Option("-d", "--delta", 0, False), + Option(None, "--echo", 0, False), + Option("-f", "--foxtrot", 0, False), + Option("-g", None, 1, "gval"), + Option("-h", None, 0, False), + Option(None, "--india", 0, False), + Option("-j", None, 1, "jval"), + Option(None, "--k", 1, "kval"), + ], + ), + # Option with description (or other content) on following line. + ( + """ + Options: + -a + -b + description of b + -c + Other Options: + -d + Other Options:-e + """, + [ + Option("-a", None, 0, False), + Option("-b", None, 0, False), + Option("-c", None, 0, False), + Option("-d", None, 0, False), + Option("-e", None, 0, False), + ], + ), + # Option-like things which aren't actually options + ( + """ + --option1 This really is an option. + And it has a default [default: 42] + + Talking about options: + Here we're talking about options and defaults, like [default: 3] and + options such as --foo, but we're not intending to define them. And + although the default of 3 I just mentioned does not get picked up as + the default of --option1, defined above. + + But if we happen to start a line of our prose with an option, like + -b then we are unfortunately defining an option. And "then" acts as + an argument for -b, so it accepts an argument. + + Options are also allowed to start on the same line as an option + heading, so this is an option: + options: --option2 + + And this also works after several words, so options: --option3 is + also an option. But options after other heading-like things aren't + picked up, so this isn't an option: + things: --not-an-option + + -o, --option4 This is also a real option + """, + [ + Option(None, "--option1", 1, "42"), + Option("-b", None, 1, None), + Option(None, "--option2", 0, False), + Option(None, "--option3", 0, False), + Option("-o", "--option4", 1, None), + ], + ), +] +option_examples = [(dedent(doc), options) for (doc, options) in option_examples] + + +@pytest.mark.parametrize("descriptions, options", option_examples) +def test_parse_options(descriptions, options): + assert parse_options(descriptions) == options + + +@pytest.mark.parametrize( + "before", + [ + pytest.param("", id="empty"), + pytest.param("This is a prog\n", id="1line"), + pytest.param( + "This is a prog\n\nInfo:\n Blah blah\n\n" + # contains usage: but not a usage section + "Ingredients in pork sausage:\nBlah blah\n", + id="preceding_sections", + ), + ], +) +@pytest.mark.parametrize( + "header", + [ + pytest.param("usage:", id="simple"), + pytest.param("uSaGe:", id="odd_case"), + pytest.param("My Program's Usage:", id="long"), + pytest.param(" Indented Usage:", id="indented"), + ], +) +@pytest.mark.parametrize( + "body", + [ + pytest.param("prog [options]", id="simple"), + pytest.param(" prog [options]", id="space_simple"), + pytest.param("\tprog [options]", id="tab_simple"), + pytest.param(" \t prog [options]", id="WS_simple"), + pytest.param("\n prog [options]", id="LF_simple"), + pytest.param("\n prog [options]\n", id="LF_simple_LF"), + pytest.param("prog [options] cmd1\n prog [options] cmd2\n", id="multiple_LF"), + pytest.param("\n prog [options] cmd1\n prog [options] cmd2", id="LF_multiple"), + pytest.param( + "\n prog [options] cmd1\n prog [options] cmd2\n", id="LF_multiple_LF" + ), + pytest.param( + """\ + prog [options] cmd1 + [--foo --bar] + [--baz --boz] + prog [options] cmd2 +""", + id="wrapped_arguments", + ), + ], +) +@pytest.mark.parametrize( + "after", + [ + pytest.param("", id="empty"), + pytest.param("This can be\nany content.\n", id="text"), + pytest.param("Options: -a All", id="single_line"), + ], +) +def test_parse_docstring_sections(before: str, header: str, body: str, after: str): + if after and not body.endswith("\n"): + body = body + "\n" + assert parse_docstring_sections(before + header + body + after) == ( + (before, header, body, after) + ) + + +@pytest.mark.parametrize( + "invalid_docstring", + [ + pytest.param("", id="empty"), + pytest.param( + """\ + This doc has no usage heading + + myprog [options] + + Options: + --foo + --bar + """, + id="no_usage_heading", + ), + ], +) +def test_parse_docstring_sections__reports_invalid_docstrings(invalid_docstring: str): + with pytest.raises( + DocoptLanguageError, + match=re.escape( + 'Failed to parse doc: "usage:" section (case-insensitive) not found' + ), + ): + parse_docstring_sections(dedent(invalid_docstring)) + + +@pytest.mark.parametrize( + "doc, error_message", + [ + pytest.param( + """\ + My prog. + + Usage: + myprog [options] + Options: + --foo + --bar + """, + 'Failed to parse docstring: "options:" (case-insensitive) was ' + 'found in "usage:" section.', + id="options_in_usage", + ), + pytest.param( + """\ + My prog. + + Usage: + myprog [options] + + More Usage: + Blah blah. + """, + 'Failed to parse docstring: More than one "usage:" ' + "(case-insensitive) section found.", + id="multiple_usage_sections", + ), + pytest.param( + """\ + This docstring has nothing in its usage. + + Usage:""", + 'Failed to parse docstring: "usage:" section is empty.', + id="empty_usage_section", + ), + pytest.param( + """\ + This docstring has only whitespace in its usage. + + Usage: + + Options:""", + 'Failed to parse docstring: "usage:" section is empty.', + id="whitespace_usage_section", + ), + ], +) +def test_lint_docstring(doc: str, error_message: str): + doc_sections = parse_docstring_sections(dedent(doc)) + with pytest.raises(DocoptLanguageError, match=re.escape(error_message)): + lint_docstring(doc_sections) diff --git a/tests/testcases.docopt b/tests/testcases.docopt index efe9a07..4336dd4 100644 --- a/tests/testcases.docopt +++ b/tests/testcases.docopt @@ -949,9 +949,52 @@ local options: --baz --bar other options: --egg + wrapped description of egg. --spam --not-an-option- +""" +$ prog --baz --spam +{"--foo": false, "--baz": true, "--bar": false, "--egg": false, "--spam": true} + +# +# docopt 0.6.2 compatibility: Blank line in options section +# https://github.com/jazzband/docopt-ng/issues/33 +# +r"""Usage: prog [options] + + -h, --help + -v, --verbose be verbose + -i, --interactive interactive picking + -p, --patch select hunks interactively +""" +$ prog --interactive +{"--help": false, "--verbose": false, "--interactive": true, "--patch": false} + +# +# docopt 0.6.2 compatibility: Options without leading whitespace +# +r"""Usage: prog [options] + +--alpha +-b, --bravo +-c ARG, --charlie ARG Something [default: foo] +""" + +$ prog +{"--alpha": false, "--bravo": false, "--charlie": "foo"} + +# +# docopt 0.6.2 compatibility: Options anywhere in doc +# +r"""My CLI program + +--speed Is allowed to be defined here in docopt 0.6.2 + [default: 9000] +-e, --extra-speed + +usage: prog [options] +options: + --direction """ -$ prog --baz --egg -{"--foo": false, "--baz": true, "--bar": false, "--egg": true, "--spam": false} +$ prog --direction -e +{"--direction": true, "--extra-speed": true, "--speed": "9000"}