-
Notifications
You must be signed in to change notification settings - Fork 254
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pep440: replace lark in favour of regex (#152)
* tests: fix incorrect parameters * pep440: replace lark in favour of regex The lark early implementation of PEP440 that was introduced was not performant. The implementation added on average around 7 seconds to the test suite execution time. This change drops the lark parser, and uses `packaging.version` provided regex for version string parsing. New dataclass interface for version instances remain unaffected.
- Loading branch information
Showing
4 changed files
with
72 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,95 +1,88 @@ | ||
import re | ||
|
||
from typing import TYPE_CHECKING | ||
from typing import List | ||
from typing import AnyStr | ||
from typing import Match | ||
from typing import Optional | ||
from typing import Type | ||
|
||
from lark import LarkError | ||
from lark import Transformer | ||
from packaging.version import VERSION_PATTERN | ||
|
||
from poetry.core.version.exceptions import InvalidVersion | ||
from poetry.core.version.grammars import GRAMMAR_PEP_440 | ||
from poetry.core.version.parser import Parser | ||
from poetry.core.version.pep440 import LocalSegmentType | ||
from poetry.core.version.pep440 import Release | ||
from poetry.core.version.pep440 import ReleaseTag | ||
|
||
|
||
if TYPE_CHECKING: | ||
from poetry.core.version.pep440.version import PEP440Version | ||
|
||
# Parser: PEP 440 | ||
# we use earley because the grammar is ambiguous | ||
PARSER_PEP_440 = Parser(GRAMMAR_PEP_440, "earley", False) | ||
|
||
|
||
class _Transformer(Transformer): | ||
def NUMERIC_IDENTIFIER(self, data: "Token"): # noqa | ||
return int(data.value) | ||
|
||
def LOCAL_IDENTIFIER(self, data: "Token"): # noqa | ||
try: | ||
return int(data.value) | ||
except ValueError: | ||
return data.value | ||
|
||
def POST_RELEASE_TAG(self, data: "Token"): # noqa | ||
return data.value | ||
|
||
def PRE_RELEASE_TAG(self, data: "Token"): # noqa | ||
return data.value | ||
|
||
def DEV_RELEASE_TAG(self, data: "Token"): # noqa | ||
return data.value | ||
|
||
def LOCAL(self, data: "Token"): # noqa | ||
return data.value | ||
|
||
def INT(self, data: "Token"): # noqa | ||
return int(data.value) | ||
|
||
def version(self, children: List["Tree"]): # noqa | ||
epoch, release, dev, pre, post, local = 0, None, None, None, None, None | ||
|
||
for child in children: | ||
if child.data == "epoch": | ||
# epoch is always a single numeric value | ||
epoch = child.children[0] | ||
elif child.data == "release": | ||
# release segment is of the form N(.N)* | ||
release = Release.from_parts(*child.children) | ||
elif child.data == "pre_release": | ||
# pre-release tag is of the form (a|b|rc)N | ||
pre = ReleaseTag(*child.children) | ||
elif child.data == "post_release": | ||
# post-release tags are of the form N (shortened) or post(N)* | ||
if len(child.children) == 1 and isinstance(child.children[0], int): | ||
post = ReleaseTag("post", child.children[0]) | ||
else: | ||
post = ReleaseTag(*child.children) | ||
elif child.data == "dev_release": | ||
# dev-release tag is of the form dev(N)* | ||
dev = ReleaseTag(*child.children) | ||
elif child.data == "local": | ||
local = tuple(child.children) | ||
|
||
return epoch, release, pre, post, dev, local | ||
|
||
def start(self, children: List["Tree"]): # noqa | ||
return children[0] | ||
|
||
|
||
_TRANSFORMER = _Transformer() | ||
class PEP440Parser: | ||
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) | ||
_local_version_separators = re.compile(r"[._-]") | ||
|
||
@classmethod | ||
def _get_release(cls, match: Optional[Match[AnyStr]]) -> Release: | ||
if not match or match.group("release") is None: | ||
return Release(0) | ||
return Release.from_parts(*(int(i) for i in match.group("release").split("."))) | ||
|
||
@classmethod | ||
def _get_prerelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]: | ||
if not match or match.group("pre") is None: | ||
return None | ||
return ReleaseTag(match.group("pre_l"), int(match.group("pre_n") or 0)) | ||
|
||
@classmethod | ||
def _get_postrelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]: | ||
if not match or match.group("post") is None: | ||
return None | ||
|
||
return ReleaseTag( | ||
match.group("post_l") or "post", | ||
int(match.group("post_n1") or match.group("post_n2") or 0), | ||
) | ||
|
||
@classmethod | ||
def _get_devrelease(cls, match: Optional[Match[AnyStr]]) -> Optional[ReleaseTag]: | ||
if not match or match.group("dev") is None: | ||
return None | ||
return ReleaseTag(match.group("dev_l"), int(match.group("dev_n") or 0)) | ||
|
||
@classmethod | ||
def _get_local(cls, match: Optional[Match[AnyStr]]) -> Optional[LocalSegmentType]: | ||
if not match or match.group("local") is None: | ||
return None | ||
|
||
return tuple( | ||
part.lower() if not part.isdigit() else int(part) | ||
for part in cls._local_version_separators.split(match.group("local")) | ||
) | ||
|
||
@classmethod | ||
def parse(cls, value: str, version_class: Optional[Type["PEP440Version"]] = None): | ||
match = cls._regex.search(value) if value else None | ||
if not match: | ||
raise InvalidVersion(f"Invalid PEP 440 version: '{value}'") | ||
|
||
if version_class is None: | ||
from poetry.core.version.pep440.version import PEP440Version | ||
|
||
version_class = PEP440Version | ||
|
||
return version_class( | ||
epoch=int(match.group("epoch")) if match.group("epoch") else 0, | ||
release=cls._get_release(match), | ||
pre=cls._get_prerelease(match), | ||
post=cls._get_postrelease(match), | ||
dev=cls._get_devrelease(match), | ||
local=cls._get_local(match), | ||
text=value, | ||
) | ||
|
||
|
||
def parse_pep440( | ||
value: str, version_class: Optional[Type["PEP440Version"]] = None | ||
) -> "PEP440Version": | ||
if version_class is None: | ||
from poetry.core.version.pep440.version import PEP440Version | ||
|
||
version_class = PEP440Version | ||
|
||
try: | ||
tree = PARSER_PEP_440.parse(text=value) | ||
return version_class(*_TRANSFORMER.transform(tree), text=value) | ||
except (TypeError, LarkError): | ||
raise InvalidVersion(f"Invalid PEP 440 version: '{value}'") | ||
return PEP440Parser.parse(value, version_class) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters