From 788a5eb9fd9118698246f13a53c5327df7a3f354 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Thu, 16 Mar 2023 07:27:05 +0100 Subject: [PATCH] Move parser impl code out. --- src/antsibull_docs/markup/_parser_impl.py | 86 +++++++++++++++++++++ src/antsibull_docs/markup/parser.py | 91 +++-------------------- tests/units/test_markup.py | 58 +++++++++++++++ 3 files changed, 154 insertions(+), 81 deletions(-) create mode 100644 src/antsibull_docs/markup/_parser_impl.py create mode 100644 tests/units/test_markup.py diff --git a/src/antsibull_docs/markup/_parser_impl.py b/src/antsibull_docs/markup/_parser_impl.py new file mode 100644 index 00000000..d7fdcdb1 --- /dev/null +++ b/src/antsibull_docs/markup/_parser_impl.py @@ -0,0 +1,86 @@ +# Author: Felix Fontein +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or +# https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later +# SPDX-FileCopyrightText: 2022, Ansible Project +""" +Internal parsing code. +""" + +import re +import typing as t + +_ESCAPE_OR_COMMA = re.compile(r'\\(.)| *(,) *') +_ESCAPE_OR_CLOSING = re.compile(r'\\(.)|([)])') + + +def parse_parameters_escaped(text: str, index: int, parameter_count: int, + ) -> t.Tuple[int, t.List[str], t.Optional[str]]: + result: t.List[str] = [] + parameters_left = parameter_count + while parameters_left > 1: + parameters_left -= 1 + value: t.List[str] = [] + while True: + match = _ESCAPE_OR_COMMA.search(text, pos=index) + if not match: + result.append(''.join(value)) + return ( + len(text), + result, + 'Cannot find comma separating parameter' + f' {parameter_count - parameters_left} from the next one' + ) + value.append(text[index:match.start(0)]) + index = match.end(0) + if match.group(1): + value.append(match.group(1)) + else: + break + result.append(''.join(value)) + value = [] + while True: + match = _ESCAPE_OR_CLOSING.search(text, pos=index) + if not match: + return len(text), result, 'Cannot find ")" closing after the last parameter' + value.append(text[index:match.start(0)]) + index = match.end(0) + if match.group(1): + value.append(match.group(1)) + else: + break + result.append(''.join(value)) + return index, result, None + + +def parse_parameters_unescaped(text: str, index: int, parameter_count: int, + ) -> t.Tuple[int, t.List[str], t.Optional[str]]: + result: t.List[str] = [] + first = True + parameters_left = parameter_count + while parameters_left > 1: + parameters_left -= 1 + next_index = text.find(',', index) + if next_index < 0: + return ( + len(text), + result, + 'Cannot find comma separating parameter' + f' {parameter_count - parameters_left} from the next one' + ) + parameter = text[index:next_index].rstrip(' ') + if not first: + parameter = parameter.lstrip(' ') + else: + first = False + result.append(parameter) + index = next_index + 1 + next_index = text.find(')', index) + if next_index < 0: + return len(text), result, 'Cannot find ")" closing after the last parameter' + parameter = text[index:next_index] + if not first: + parameter = parameter.lstrip(' ') + result.append(parameter) + index = next_index + 1 + return index, result, None diff --git a/src/antsibull_docs/markup/parser.py b/src/antsibull_docs/markup/parser.py index 7231592b..5887fc2c 100644 --- a/src/antsibull_docs/markup/parser.py +++ b/src/antsibull_docs/markup/parser.py @@ -11,8 +11,7 @@ import re import typing as t -_ESCAPE_OR_COMMA = re.compile(r'\\(.)| *(,) *') -_ESCAPE_OR_CLOSING = re.compile(r'\\(.)|([)])') +from ._parser_impl import parse_parameters_escaped, parse_parameters_unescaped class ParsingException(Exception): @@ -72,83 +71,6 @@ class CommandData(t.NamedTuple): Part = t.Union[str, CommandData] -def parse_parameters_escaped(text: str, index: int, command: Command, - command_start: int) -> t.Tuple[int, t.List[str]]: - result = [] - parameter_count = command.parameter_count - while parameter_count > 1: - parameter_count -= 1 - value = [] - while True: - match = _ESCAPE_OR_COMMA.search(text, pos=index) - if not match: - raise ParsingException( - f'Cannot find comma separating ' - f'parameter {command.parameter_count - parameter_count}' - f' from the next one for command "{command.command}"' - f' starting at index {command_start} in {text!r}' - ) - value.append(text[index:match.start(0)]) - index = match.end(0) - if match.group(1): - value.append(match.group(1)) - else: - break - result.append(''.join(value)) - value = [] - while True: - match = _ESCAPE_OR_CLOSING.search(text, pos=index) - if not match: - raise ParsingException( - f'Cannot find ")" closing after the last parameter for' - f' command "{command.command}" starting at index {command_start} in {text!r}' - ) - value.append(text[index:match.start(0)]) - index = match.end(0) - if match.group(1): - value.append(match.group(1)) - else: - break - result.append(''.join(value)) - return index, result - - -def parse_parameters_unescaped(text: str, index: int, command: Command, - command_start: int) -> t.Tuple[int, t.List[str]]: - result = [] - first = True - parameter_count = command.parameter_count - while parameter_count > 1: - parameter_count -= 1 - next_index = text.find(',', index) - if next_index < 0: - raise ParsingException( - f'Cannot find comma separating ' - f'parameter {command.parameter_count - parameter_count}' - f' from the next one for command "{command.command}"' - f' starting at index {command_start} in {text!r}' - ) - parameter = text[index:next_index].rstrip(' ') - if not first: - parameter = parameter.lstrip(' ') - else: - first = False - result.append(parameter) - index = next_index + 1 - next_index = text.find(')', index) - if next_index < 0: - raise ParsingException( - f'Cannot find ")" closing after the last parameter for' - f' command "{command.command}" starting at index {command_start} in {text!r}' - ) - parameter = text[index:next_index] - if not first: - parameter = parameter.lstrip(' ') - result.append(parameter) - index = next_index + 1 - return index, result - - def parse_text(text: str, commands: CommandSet) -> t.List[Part]: result: t.List[Part] = [] index = 0 @@ -167,9 +89,16 @@ def parse_text(text: str, commands: CommandSet) -> t.List[Part]: continue index += 1 if command.escaped_content: - index, parameters = parse_parameters_escaped(text, index, command, command_start) + index, parameters, error = parse_parameters_escaped( + text, index, command.parameter_count) else: - index, parameters = parse_parameters_unescaped(text, index, command, command_start) + index, parameters, error = parse_parameters_unescaped( + text, index, command.parameter_count) + if error is not None: + raise ParsingException( + error + + f' for command "{command.command}" starting at index {command_start} in {text!r}' + ) result.append(CommandData(command=command, parameters=parameters)) return result diff --git a/tests/units/test_markup.py b/tests/units/test_markup.py new file mode 100644 index 00000000..c8b90fba --- /dev/null +++ b/tests/units/test_markup.py @@ -0,0 +1,58 @@ +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later +# SPDX-FileCopyrightText: 2020, Ansible Project + +import pytest + +from antsibull_docs.markup.rstify import rst_escape, rst_ify + +RST_IFY_DATA = { + # No substitutions + 'no-op': 'no-op', + 'no-op Z(test)': 'no-op Z(test)', + # Simple cases of all substitutions + 'I(italic)': r'\ :emphasis:`italic`\ ', + 'B(bold)': r'\ :strong:`bold`\ ', + 'M(ansible.builtin.yum)': r'\ :ref:`ansible.builtin.yum' + r' `\ ', + 'U(https://docs.ansible.com)': r'\ https://docs.ansible.com\ ', + 'L(the user guide,https://docs.ansible.com/user-guide.html)': r'\ `the user guide' + r' `__\ ', + 'R(the user guide,user-guide)': r'\ :ref:`the user guide `\ ', + 'C(/usr/bin/file)': r'\ :literal:`/usr/bin/file`\ ', + 'HORIZONTALLINE': '\n\n.. raw:: html\n\n
\n\n', + # Multiple substitutions + 'The M(ansible.builtin.yum) module B(MUST) be given the C(package) parameter. See the R(looping docs,using-loops) for more info': + r'The \ :ref:`ansible.builtin.yum `\ module \ :strong:`MUST`\ be given the \ :literal:`package`\ parameter. See the \ :ref:`looping docs `\ for more info', + # Problem cases + 'IBM(International Business Machines)': 'IBM(International Business Machines)', + 'L(the user guide, https://docs.ansible.com/)': r'\ `the user guide `__\ ', + 'R(the user guide, user-guide)': r'\ :ref:`the user guide `\ ', +} + + +@pytest.mark.parametrize('text, expected', RST_IFY_DATA.items()) +def test_rst_ify(text, expected): + assert rst_ify(text, plugin_name='foo.bar.baz', plugin_type='module') == expected + + +RST_ESCAPE_DATA = { + '': '', + 'no-op': 'no-op', + None: 'None', + 1: '1', + '*': '\\*', + '_': '\\_', + '<': '\\<', + '>': '\\>', + '`': '\\`', + '\\': '\\\\', + '\\*': '\\\\\\*', + '*\\': '\\*\\\\', + ':role:`test`': ':role:\\`test\\`', +} + + +@pytest.mark.parametrize('value, expected', RST_ESCAPE_DATA.items()) +def test_escape_ify(value, expected): + assert rst_escape(value) == expected