Skip to content

Commit

Permalink
pep440: replace lark in favour of regex (#152)
Browse files Browse the repository at this point in the history
* tests: fix incorrect parameters

* pep440: replace lark in favour of regex

The lark early implementation of PEP440 that was introduced was not
performant. The implementation added on average around 7 seconds to
the test suite execution time.

This change drops the lark parser, and uses `packaging.version`
provided regex for version string parsing. New dataclass interface for
version instances remain unaffected.
  • Loading branch information
abn authored Mar 31, 2021
1 parent 4a2ccc3 commit 8996496
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 113 deletions.
2 changes: 0 additions & 2 deletions poetry/core/version/grammars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

GRAMMAR_DIR = Path(__file__).parent

GRAMMAR_PEP_440 = GRAMMAR_DIR / "pep440.lark"

GRAMMAR_PEP_508_CONSTRAINTS = GRAMMAR_DIR / "pep508.lark"

GRAMMAR_PEP_508_MARKERS = GRAMMAR_DIR / "markers.lark"
32 changes: 0 additions & 32 deletions poetry/core/version/grammars/pep440.lark

This file was deleted.

145 changes: 69 additions & 76 deletions poetry/core/version/pep440/parser.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,88 @@
import re

from typing import TYPE_CHECKING
from typing import List
from typing import AnyStr
from typing import Match
from typing import Optional
from typing import Type

from lark import LarkError
from lark import Transformer
from packaging.version import VERSION_PATTERN

from poetry.core.version.exceptions import InvalidVersion
from poetry.core.version.grammars import GRAMMAR_PEP_440
from poetry.core.version.parser import Parser
from poetry.core.version.pep440 import LocalSegmentType
from poetry.core.version.pep440 import Release
from poetry.core.version.pep440 import ReleaseTag


if TYPE_CHECKING:
from poetry.core.version.pep440.version import PEP440Version

# Parser: PEP 440
# we use earley because the grammar is ambiguous
PARSER_PEP_440 = Parser(GRAMMAR_PEP_440, "earley", False)


class _Transformer(Transformer):
def NUMERIC_IDENTIFIER(self, data: "Token"): # noqa
return int(data.value)

def LOCAL_IDENTIFIER(self, data: "Token"): # noqa
try:
return int(data.value)
except ValueError:
return data.value

def POST_RELEASE_TAG(self, data: "Token"): # noqa
return data.value

def PRE_RELEASE_TAG(self, data: "Token"): # noqa
return data.value

def DEV_RELEASE_TAG(self, data: "Token"): # noqa
return data.value

def LOCAL(self, data: "Token"): # noqa
return data.value

def INT(self, data: "Token"): # noqa
return int(data.value)

def version(self, children: List["Tree"]): # noqa
epoch, release, dev, pre, post, local = 0, None, None, None, None, None

for child in children:
if child.data == "epoch":
# epoch is always a single numeric value
epoch = child.children[0]
elif child.data == "release":
# release segment is of the form N(.N)*
release = Release.from_parts(*child.children)
elif child.data == "pre_release":
# pre-release tag is of the form (a|b|rc)N
pre = ReleaseTag(*child.children)
elif child.data == "post_release":
# post-release tags are of the form N (shortened) or post(N)*
if len(child.children) == 1 and isinstance(child.children[0], int):
post = ReleaseTag("post", child.children[0])
else:
post = ReleaseTag(*child.children)
elif child.data == "dev_release":
# dev-release tag is of the form dev(N)*
dev = ReleaseTag(*child.children)
elif child.data == "local":
local = tuple(child.children)

return epoch, release, pre, post, dev, local

def start(self, children: List["Tree"]): # noqa
return children[0]


_TRANSFORMER = _Transformer()
class PEP440Parser:
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_local_version_separators = re.compile(r"[._-]")

@classmethod
def _get_release(cls, match: Optional[Match[AnyStr]]) -> Release:
if not match or match.group("release") is None:
return Release(0)
return Release.from_parts(*(int(i) for i in match.group("release").split(".")))

@classmethod
def _get_prerelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]:
if not match or match.group("pre") is None:
return None
return ReleaseTag(match.group("pre_l"), int(match.group("pre_n") or 0))

@classmethod
def _get_postrelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]:
if not match or match.group("post") is None:
return None

return ReleaseTag(
match.group("post_l") or "post",
int(match.group("post_n1") or match.group("post_n2") or 0),
)

@classmethod
def _get_devrelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]:
if not match or match.group("dev") is None:
return None
return ReleaseTag(match.group("dev_l"), int(match.group("dev_n") or 0))

@classmethod
def _get_local(cls, match: Optional[Match[AnyStr]]) -> Optional[LocalSegmentType]:
if not match or match.group("local") is None:
return None

return tuple(
part.lower() if not part.isdigit() else int(part)
for part in cls._local_version_separators.split(match.group("local"))
)

@classmethod
def parse(cls, value: str, version_class: Optional[Type["PEP440Version"]] = None):
match = cls._regex.search(value) if value else None
if not match:
raise InvalidVersion(f"Invalid PEP 440 version: '{value}'")

if version_class is None:
from poetry.core.version.pep440.version import PEP440Version

version_class = PEP440Version

return version_class(
epoch=int(match.group("epoch")) if match.group("epoch") else 0,
release=cls._get_release(match),
pre=cls._get_prerelease(match),
post=cls._get_postrelease(match),
dev=cls._get_devrelease(match),
local=cls._get_local(match),
text=value,
)


def parse_pep440(
value: str, version_class: Optional[Type["PEP440Version"]] = None
) -> "PEP440Version":
if version_class is None:
from poetry.core.version.pep440.version import PEP440Version

version_class = PEP440Version

try:
tree = PARSER_PEP_440.parse(text=value)
return version_class(*_TRANSFORMER.transform(tree), text=value)
except (TypeError, LarkError):
raise InvalidVersion(f"Invalid PEP 440 version: '{value}'")
return PEP440Parser.parse(value, version_class)
6 changes: 3 additions & 3 deletions tests/semver/test_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ def test_parse_valid(text, version):
assert parsed.text == text


@pytest.mark.parametrize("input", [(None, "example")])
def test_parse_invalid(input):
@pytest.mark.parametrize("value", [None, "example"])
def test_parse_invalid(value):
with pytest.raises(InvalidVersion):
Version.parse(input)
Version.parse(value)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 8996496

Please sign in to comment.