From 89ee17074435402fbbfcbaedf338f12083dfc209 Mon Sep 17 00:00:00 2001 From: Jan Kaliszewski Date: Sun, 24 Oct 2021 01:37:23 +0200 Subject: [PATCH] Fix erroneous non-ASCII in local version (#469) * Fix validation in the packaging.version.Version's constructor * Fix validation in the packaging.specifiers.Specifier's constructor * Complement relevant tests, including adding cases related to presence of non-ASCII whitespace characters in input strings * Fix docs of packaging.version.VERSION_PATTERN by mentioning necessity of the re.ASCII flag --- CHANGELOG.rst | 5 ++++- docs/version.rst | 2 +- packaging/specifiers.py | 24 ++++++++++++++++++++++++ packaging/version.py | 12 ++++++++++-- tests/test_specifiers.py | 5 +++++ tests/test_version.py | 4 ++++ 6 files changed, 48 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 97148a51..a0ad0e77 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,10 @@ Changelog *unreleased* ~~~~~~~~~~~~ -No unreleased changes. +* Fix parsing of ``Version`` and ``Specifier``, to prevent certain + non-ASCII letters from being accepted as a part of the local version + segment (:issue:`469`); also, fix the docs of ``VERSION_PATTERN``, to + mention necessity of the ``re.ASCII`` flag 21.0 - 2021-07-03 ~~~~~~~~~~~~~~~~~ diff --git a/docs/version.rst b/docs/version.rst index a43cf786..33e14647 100644 --- a/docs/version.rst +++ b/docs/version.rst @@ -284,7 +284,7 @@ Reference The pattern is not anchored at either end, and is intended for embedding in larger expressions (for example, matching a version number as part of a file name). The regular expression should be compiled with the - ``re.VERBOSE`` and ``re.IGNORECASE`` flags set. + ``re.VERBOSE``, ``re.IGNORECASE`` and ``re.ASCII`` flags set. .. _PEP 440: https://www.python.org/dev/peps/pep-0440/ diff --git a/packaging/specifiers.py b/packaging/specifiers.py index ce66bd4a..5925aa14 100644 --- a/packaging/specifiers.py +++ b/packaging/specifiers.py @@ -411,6 +411,23 @@ class Specifier(_IndividualSpecifier): _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + # Note: an additional check, based of the following regular + # expression, is necessary because without it the 'a-z' + # character ranges in the above regular expression, in + # conjunction with re.IGNORECASE, would cause erroneous + # acceptance of non-ASCII letters in the local version segment + # (see: https://docs.python.org/library/re.html#re.IGNORECASE). + _supplementary_restriction_regex = re.compile( + r""" + \s*===.* # No restriction in the identity operator case. + | + [\s\0-\177]* # In all other cases only whitespace characters + # and ASCII-only non-whitespace characters are + # allowed. + """, + re.VERBOSE, + ) + _operators = { "~=": "compatible", "==": "equal", @@ -422,6 +439,13 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + super().__init__(spec, prereleases) + + match = self._supplementary_restriction_regex.fullmatch(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + @_require_version_compare def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: diff --git a/packaging/version.py b/packaging/version.py index de9a09a4..ec2fce25 100644 --- a/packaging/version.py +++ b/packaging/version.py @@ -256,12 +256,20 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey: class Version(_BaseVersion): - _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + VERSION_PATTERN, + # Note: the re.ASCII flag is necessary because without it the + # 'a-z' character ranges in VERSION_PATTERN, in conjunction + # with re.IGNORECASE, would cause erroneous acceptance of + # non-ASCII letters in the local version segment (see: + # https://docs.python.org/library/re.html#re.IGNORECASE). + re.VERBOSE | re.IGNORECASE | re.ASCII, + ) def __init__(self, version: str) -> None: # Validate the version and parse it into pieces - match = self._regex.search(version) + match = self._regex.fullmatch(version.strip()) if not match: raise InvalidVersion(f"Invalid version: '{version}'") diff --git a/tests/test_specifiers.py b/tests/test_specifiers.py index 0b8e742c..81d9eff1 100644 --- a/tests/test_specifiers.py +++ b/tests/test_specifiers.py @@ -81,6 +81,9 @@ def test_specifiers_valid(self, specifier): # Cannot use a prefix matching after a .devN version "==1.0.dev1.*", "!=1.0.dev1.*", + # Local version which includes a non-ASCII letter that matches + # regex '[a-z]' when re.IGNORECASE is in force and re.ASCII is not + "==1.0+\u0130", ], ) def test_specifiers_invalid(self, specifier): @@ -197,6 +200,7 @@ def test_specifiers_invalid(self, specifier): # Various other normalizations "v1.0", " \r \f \v v1.0\t\n", + " \r\N{NARROW NO-BREAK SPACE}\v v1.0\N{PARAGRAPH SEPARATOR}", ], ) def test_specifiers_normalized(self, version): @@ -221,6 +225,7 @@ def test_specifiers_normalized(self, version): ("~=2.0", "~=2.0"), # Spaces should be removed ("< 2", "<2"), + ("<\N{HAIR SPACE}2", "<2"), ], ) def test_specifiers_str_and_repr(self, specifier, expected): diff --git a/tests/test_version.py b/tests/test_version.py index 8fe56329..7a43fd35 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -96,6 +96,9 @@ def test_valid_versions(self, version): "1.0+_foobar", "1.0+foo&asd", "1.0+1+1", + # Local version which includes a non-ASCII letter that matches + # regex '[a-z]' when re.IGNORECASE is in force and re.ASCII is not + "1.0+\u0130", ], ) def test_invalid_versions(self, version): @@ -218,6 +221,7 @@ def test_invalid_versions(self, version): # Various other normalizations ("v1.0", "1.0"), (" v1.0\t\n", "1.0"), + ("\N{NARROW NO-BREAK SPACE}1.0\t\N{PARAGRAPH SEPARATOR}\n ", "1.0"), ], ) def test_normalized_versions(self, version, normalized):