jazzband · NickCrews · Sep 8, 2022 · Aug 22, 2022 · Aug 22, 2022 · Aug 25, 2022
diff --git a/docopt/__init__.py b/docopt/__init__.py
@@ -27,7 +27,7 @@
 import re
 import inspect
 
-from typing import Any, Callable, cast, Type, Tuple, Union
+from typing import Any, Callable, NamedTuple, cast, Type, Tuple, Union
 
 __all__ = ["docopt", "magic_docopt", "magic", "DocoptExit"]
 __version__ = "0.8.1"
@@ -303,7 +303,9 @@ def __init__(
     @classmethod
     def parse(class_, option_description: str) -> Option:
         short, longer, argcount, value = None, None, 0, False
-        options, _, description = option_description.strip().partition("  ")
+        options, description = re.split(
+            r"(?:  )|$", option_description.strip(), flags=re.M, maxsplit=1
+        )
         options = options.replace(",", " ").replace("=", " ")
         for s in options.split():
             if s.startswith("--"):
@@ -709,39 +711,101 @@ def isanumber(x):
     return parsed
 
 
-def parse_defaults(docstring: str) -> list[Option]:
-    defaults = []
-    for s in parse_section("options:", docstring):
-        options_literal, _, s = s.partition(":")
-        if " " in options_literal:
-            _, _, options_literal = options_literal.partition(" ")
-        assert options_literal.lower().strip() == "options"
-        split = re.split(r"\n[ \t]*(-\S+?)", "\n" + s)[1:]
-        split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])]
-        for s in split:
-            if s.startswith("-"):
-                arg, _, description = s.partition("  ")
-                flag, _, var = arg.replace("=", " ").partition(" ")
-                option = Option.parse(s)
-                defaults.append(option)
-    return defaults
-
-
-def parse_section(name: str, source: str) -> list[str]:
-    pattern = re.compile(
-        "^([^\n]*" + name + "[^\n]*\n?(?:[ \t].*?(?:\n|$))*)",
-        re.IGNORECASE | re.MULTILINE,
+class DocSections(NamedTuple):
+    before_usage: str
+    usage_header: str
+    usage_body: str
+    after_usage: str
+
+
+def parse_docstring_sections(docstring: str) -> DocSections:
+    """Partition the docstring into the main sections.
+
+    The docstring is returned, split into a tuple of 4 pieces: text before the
+    usage section, the usage section header, the usage section body and text
+    following the usage section.
+    """
+    usage_pattern = r"""
+    # Any number of lines (that don't include usage:) precede the usage section
+    \A(?P<before_usage>(?:(?!.*\busage:).*\n)*)
+    # The `usage:` section header.
+    ^(?P<usage_header>.*\busage:)
+    (?P<usage_body>
+        # The first line of the body may follow the header without a line break:
+        (?:.*(?:\n|\Z))
+        # Any number of additional indented lines
+        (?:[ \t].*(?:\n|\Z))*
     )
-    r = [
-        s.strip() for s in pattern.findall(source) if s.strip().lower() != name.lower()
+    # Everything else
+    (?P<after_usage>(?:.|\n)*)\Z
+    """
+    match = re.match(usage_pattern, docstring, flags=re.M | re.I | re.VERBOSE)
+    if not match:
+        raise DocoptLanguageError(
+            'Failed to parse doc: "usage:" section (case-insensitive) not found. '
+            "Check http://docopt.org/ for examples of how your doc should look."
+        )
+    before, header, body, after = match.groups()
+    return DocSections(before, header, body, after)
+
+
+def parse_options(docstring: str) -> list[Option]:
+    """Parse the option descriptions from the help text.
+
+    `docstring` is the sub-section of the overall docstring that option
+    descriptions should be parsed from. It must not contain the "usage:"
+    section, as wrapped lines in the usage pattern can be misinterpreted as
+    option descriptions.
+
+    Option descriptions appear below the usage patterns, They define synonymous
+    long and short options, options that have arguments, and the default values
+    of options' arguments. They look like this:
+
+    ```
+        -v, --verbose             Be more verbose
+        -n COUNT, --number COUNT  The number of times to
+                                do the thing  [default: 42]
+    ```
+    """
+    option_start = r"""
+    # Option descriptions begin on a new line
+    ^
+    # They may occur on the same line as an options: section heading
+    (?:.*options:)?
+    # They can be indented with whitespace
+    [ \t]*
+    # The description itself starts with the short or long flag (-x or --xxx)
+    (-\S)
+    """
+    parts = re.split(option_start, docstring, flags=re.M | re.I | re.VERBOSE)[1:]
+    return [
+        Option.parse(start + rest) for (start, rest) in zip(parts[0::2], parts[1::2])
     ]
-    return r
 
 
-def formal_usage(section: str) -> str:
-    _, _, section = section.partition(":")  # drop "usage:"
-    pu = section.split()
-    return "( " + " ".join(") | (" if s == pu[0] else s for s in pu[1:]) + " )"
+def lint_docstring(sections: DocSections):
+    """Report apparent mistakes in the docstring format."""
+    if re.search("options:", sections.usage_body, flags=re.I):
+        raise DocoptLanguageError(
+            'Failed to parse docstring: "options:" (case-insensitive) was '
+            'found in "usage:" section. Use a blank line after the usage, or '
+            "start the next section without leading whitespace."
+        )
+    if re.search("usage:", sections.usage_body + sections.after_usage, flags=re.I):
+        raise DocoptLanguageError(
+            'Failed to parse docstring: More than one "usage:" '
+            "(case-insensitive) section found."
+        )
+    if sections.usage_body.strip() == "":
+        raise DocoptLanguageError(
+            'Failed to parse docstring: "usage:" section is empty.'
+            "Check http://docopt.org/ for examples of how your doc should look."
+        )
+
+
+def formal_usage(usage: str) -> str:
+    program_name, *tokens = usage.split()
+    return "( " + " ".join(") | (" if s == program_name else s for s in tokens) + " )"
 
 
 def extras(
@@ -881,28 +945,18 @@ def docopt(
             or MAYBE_STORE.opname.startswith("RETURN")
         ):
             output_value_assigned = True
-    usage_sections = parse_section("usage:", docstring)
-    if len(usage_sections) == 0:
-        raise DocoptLanguageError(
-            '"usage:" section (case-insensitive) not found. '
-            "Perhaps missing indentation?"
-        )
-    if len(usage_sections) > 1:
-        raise DocoptLanguageError('More than one "usage:" (case-insensitive).')
-    options_pattern = re.compile(r"\n\s*?options:", re.IGNORECASE)
-    if options_pattern.search(usage_sections[0]):
-        raise DocoptExit(
-            "Warning: options (case-insensitive) was found in usage."
-            "Use a blank line between each section.."
-        )
-    DocoptExit.usage = usage_sections[0]
-    options = parse_defaults(docstring)
-    pattern = parse_pattern(formal_usage(DocoptExit.usage), options)
+    sections = parse_docstring_sections(docstring)
+    lint_docstring(sections)
+    DocoptExit.usage = sections.usage_header + sections.usage_body
+    options = [
+        *parse_options(sections.before_usage),
+        *parse_options(sections.after_usage),
+    ]
+    pattern = parse_pattern(formal_usage(sections.usage_body), options)
     pattern_options = set(pattern.flat(Option))
     for options_shortcut in pattern.flat(OptionsShortcut):
-        doc_options = parse_defaults(docstring)
         options_shortcut.children = [
-            opt for opt in doc_options if opt not in pattern_options
+            opt for opt in options if opt not in pattern_options
         ]
     parsed_arg_vector = parse_argv(
         Tokens(argv), list(options), options_first, more_magic

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -16,14 +16,37 @@ def parse_test(raw: str):
     if raw.startswith('"""'):
         raw = raw[3:]
 
-    for fixture in raw.split('r"""'):
-        doc, _, body = fixture.partition('"""')
-        cases = []
-        for case in body.split("$")[1:]:
-            argv, _, expect = case.strip().partition("\n")
-            expect = json.loads(expect)
-            prog, _, argv = argv.strip().partition(" ")
-            cases.append((prog, argv, expect))
+    for i, fixture in enumerate(raw.split('r"""')):
+        if i == 0:
+            if not fixture.strip() == "":
+                raise DocoptTestException(
+                    f"Unexpected content before first testcase: {fixture}"
+                )
+            continue
+
+        try:
+            doc, _, body = fixture.partition('"""')
+            cases = []
+            for case in body.split("$")[1:]:
+                argv, _, expect = case.strip().partition("\n")
+                try:
+                    expect = json.loads(expect)
+                except json.JSONDecodeError as e:
+                    raise DocoptTestException(
+                        f"The test case JSON is invalid: {expect!r} - {e}."
+                    )
+                prog, _, argv = argv.strip().partition(" ")
+                cases.append((prog, argv, expect))
+            if len(cases) == 0:
+                raise DocoptTestException(
+                    "No test cases follow the doc. Each example must have at "
+                    "least one test case starting with '$'"
+                )
+        except Exception as e:
+            raise DocoptTestException(
+                f"Failed to parse test case {i}. {e}\n"
+                f'The test\'s definition is:\nr"""{fixture}'
+            ) from None
         yield doc, cases