Skip to content

Commit

Permalink
Move parser impl code out.
Browse files Browse the repository at this point in the history
  • Loading branch information
felixfontein committed Mar 16, 2023
1 parent 98d3cf0 commit 788a5eb
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 81 deletions.
86 changes: 86 additions & 0 deletions src/antsibull_docs/markup/_parser_impl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Author: Felix Fontein <felix@fontein.de>
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or
# https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
# SPDX-FileCopyrightText: 2022, Ansible Project
"""
Internal parsing code.
"""

import re
import typing as t

_ESCAPE_OR_COMMA = re.compile(r'\\(.)| *(,) *')
_ESCAPE_OR_CLOSING = re.compile(r'\\(.)|([)])')


def parse_parameters_escaped(text: str, index: int, parameter_count: int,
) -> t.Tuple[int, t.List[str], t.Optional[str]]:
result: t.List[str] = []
parameters_left = parameter_count
while parameters_left > 1:
parameters_left -= 1
value: t.List[str] = []
while True:
match = _ESCAPE_OR_COMMA.search(text, pos=index)
if not match:
result.append(''.join(value))
return (
len(text),
result,
'Cannot find comma separating parameter'
f' {parameter_count - parameters_left} from the next one'
)
value.append(text[index:match.start(0)])
index = match.end(0)
if match.group(1):
value.append(match.group(1))
else:
break
result.append(''.join(value))
value = []
while True:
match = _ESCAPE_OR_CLOSING.search(text, pos=index)
if not match:
return len(text), result, 'Cannot find ")" closing after the last parameter'
value.append(text[index:match.start(0)])
index = match.end(0)
if match.group(1):
value.append(match.group(1))
else:
break
result.append(''.join(value))
return index, result, None


def parse_parameters_unescaped(text: str, index: int, parameter_count: int,
) -> t.Tuple[int, t.List[str], t.Optional[str]]:
result: t.List[str] = []
first = True
parameters_left = parameter_count
while parameters_left > 1:
parameters_left -= 1
next_index = text.find(',', index)
if next_index < 0:
return (
len(text),
result,
'Cannot find comma separating parameter'
f' {parameter_count - parameters_left} from the next one'
)
parameter = text[index:next_index].rstrip(' ')
if not first:
parameter = parameter.lstrip(' ')
else:
first = False
result.append(parameter)
index = next_index + 1
next_index = text.find(')', index)
if next_index < 0:
return len(text), result, 'Cannot find ")" closing after the last parameter'
parameter = text[index:next_index]
if not first:
parameter = parameter.lstrip(' ')
result.append(parameter)
index = next_index + 1
return index, result, None
91 changes: 10 additions & 81 deletions src/antsibull_docs/markup/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
import re
import typing as t

_ESCAPE_OR_COMMA = re.compile(r'\\(.)| *(,) *')
_ESCAPE_OR_CLOSING = re.compile(r'\\(.)|([)])')
from ._parser_impl import parse_parameters_escaped, parse_parameters_unescaped


class ParsingException(Exception):
Expand Down Expand Up @@ -72,83 +71,6 @@ class CommandData(t.NamedTuple):
Part = t.Union[str, CommandData]


def parse_parameters_escaped(text: str, index: int, command: Command,
command_start: int) -> t.Tuple[int, t.List[str]]:
result = []
parameter_count = command.parameter_count
while parameter_count > 1:
parameter_count -= 1
value = []
while True:
match = _ESCAPE_OR_COMMA.search(text, pos=index)
if not match:
raise ParsingException(
f'Cannot find comma separating '
f'parameter {command.parameter_count - parameter_count}'
f' from the next one for command "{command.command}"'
f' starting at index {command_start} in {text!r}'
)
value.append(text[index:match.start(0)])
index = match.end(0)
if match.group(1):
value.append(match.group(1))
else:
break
result.append(''.join(value))
value = []
while True:
match = _ESCAPE_OR_CLOSING.search(text, pos=index)
if not match:
raise ParsingException(
f'Cannot find ")" closing after the last parameter for'
f' command "{command.command}" starting at index {command_start} in {text!r}'
)
value.append(text[index:match.start(0)])
index = match.end(0)
if match.group(1):
value.append(match.group(1))
else:
break
result.append(''.join(value))
return index, result


def parse_parameters_unescaped(text: str, index: int, command: Command,
command_start: int) -> t.Tuple[int, t.List[str]]:
result = []
first = True
parameter_count = command.parameter_count
while parameter_count > 1:
parameter_count -= 1
next_index = text.find(',', index)
if next_index < 0:
raise ParsingException(
f'Cannot find comma separating '
f'parameter {command.parameter_count - parameter_count}'
f' from the next one for command "{command.command}"'
f' starting at index {command_start} in {text!r}'
)
parameter = text[index:next_index].rstrip(' ')
if not first:
parameter = parameter.lstrip(' ')
else:
first = False
result.append(parameter)
index = next_index + 1
next_index = text.find(')', index)
if next_index < 0:
raise ParsingException(
f'Cannot find ")" closing after the last parameter for'
f' command "{command.command}" starting at index {command_start} in {text!r}'
)
parameter = text[index:next_index]
if not first:
parameter = parameter.lstrip(' ')
result.append(parameter)
index = next_index + 1
return index, result


def parse_text(text: str, commands: CommandSet) -> t.List[Part]:
result: t.List[Part] = []
index = 0
Expand All @@ -167,9 +89,16 @@ def parse_text(text: str, commands: CommandSet) -> t.List[Part]:
continue
index += 1
if command.escaped_content:
index, parameters = parse_parameters_escaped(text, index, command, command_start)
index, parameters, error = parse_parameters_escaped(
text, index, command.parameter_count)
else:
index, parameters = parse_parameters_unescaped(text, index, command, command_start)
index, parameters, error = parse_parameters_unescaped(
text, index, command.parameter_count)
if error is not None:
raise ParsingException(
error +
f' for command "{command.command}" starting at index {command_start} in {text!r}'
)
result.append(CommandData(command=command, parameters=parameters))
return result

Expand Down
58 changes: 58 additions & 0 deletions tests/units/test_markup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
# SPDX-FileCopyrightText: 2020, Ansible Project

import pytest

from antsibull_docs.markup.rstify import rst_escape, rst_ify

RST_IFY_DATA = {
# No substitutions
'no-op': 'no-op',
'no-op Z(test)': 'no-op Z(test)',
# Simple cases of all substitutions
'I(italic)': r'\ :emphasis:`italic`\ ',
'B(bold)': r'\ :strong:`bold`\ ',
'M(ansible.builtin.yum)': r'\ :ref:`ansible.builtin.yum'
r' <ansible_collections.ansible.builtin.yum_module>`\ ',
'U(https://docs.ansible.com)': r'\ https://docs.ansible.com\ ',
'L(the user guide,https://docs.ansible.com/user-guide.html)': r'\ `the user guide'
r' <https://docs.ansible.com/user-guide.html>`__\ ',
'R(the user guide,user-guide)': r'\ :ref:`the user guide <user-guide>`\ ',
'C(/usr/bin/file)': r'\ :literal:`/usr/bin/file`\ ',
'HORIZONTALLINE': '\n\n.. raw:: html\n\n <hr>\n\n',
# Multiple substitutions
'The M(ansible.builtin.yum) module B(MUST) be given the C(package) parameter. See the R(looping docs,using-loops) for more info':
r'The \ :ref:`ansible.builtin.yum <ansible_collections.ansible.builtin.yum_module>`\ module \ :strong:`MUST`\ be given the \ :literal:`package`\ parameter. See the \ :ref:`looping docs <using-loops>`\ for more info',
# Problem cases
'IBM(International Business Machines)': 'IBM(International Business Machines)',
'L(the user guide, https://docs.ansible.com/)': r'\ `the user guide <https://docs.ansible.com/>`__\ ',
'R(the user guide, user-guide)': r'\ :ref:`the user guide <user-guide>`\ ',
}


@pytest.mark.parametrize('text, expected', RST_IFY_DATA.items())
def test_rst_ify(text, expected):
assert rst_ify(text, plugin_name='foo.bar.baz', plugin_type='module') == expected


RST_ESCAPE_DATA = {
'': '',
'no-op': 'no-op',
None: 'None',
1: '1',
'*': '\\*',
'_': '\\_',
'<': '\\<',
'>': '\\>',
'`': '\\`',
'\\': '\\\\',
'\\*': '\\\\\\*',
'*\\': '\\*\\\\',
':role:`test`': ':role:\\`test\\`',
}


@pytest.mark.parametrize('value, expected', RST_ESCAPE_DATA.items())
def test_escape_ify(value, expected):
assert rst_escape(value) == expected

0 comments on commit 788a5eb

Please sign in to comment.