From d99f274320c006324ea0ac16955947b96d36f0f6 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sat, 15 Jun 2024 12:44:26 -0400 Subject: [PATCH] Started annotating IRIReference and URIReference. - Substituted namedtuple inheritance with common base `typing.NamedTuple` subclass in misc.py, since these classes share almost the exact same interface. - Added a _typing_compat.py module to be able to import typing.Self, or a placeholder for it, in multiple other modules without bloating their code. - Added basic method annotations to the two reference classes. - Not annotations-related: - Move the __hash__ implementation over to IRIReference from URIMixin to be congruent with URIReference. - Made the __eq__ implementations more similar to avoid different behavior in cases of inheritance (rare as that might be). - Added overloads to `normalizers.normalize_query` and `normalizers.normalize_fragment` to clearly indicate that None will get passed through. This behavior is relied upon by the library currently. - Note: The runtime-related changes can be reverted and reattempted later if need be. Still passing all the tests currently. --- src/rfc3986/_mixin.py | 2 -- src/rfc3986/_typing_compat.py | 19 +++++++++++++++++ src/rfc3986/builder.py | 6 +++--- src/rfc3986/iri.py | 39 +++++++++++++++++++++++------------ src/rfc3986/misc.py | 13 +++++++++--- src/rfc3986/normalizers.py | 24 +++++++++++++++++++-- src/rfc3986/uri.py | 28 +++++++++++++++---------- 7 files changed, 97 insertions(+), 34 deletions(-) create mode 100644 src/rfc3986/_typing_compat.py diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index a6278c5..f55d13f 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -10,8 +10,6 @@ class URIMixin: """Mixin with all shared methods for URIs and IRIs.""" - __hash__ = tuple.__hash__ - def authority_info(self): """Return a dictionary with the ``userinfo``, ``host``, and ``port``. diff --git a/src/rfc3986/_typing_compat.py b/src/rfc3986/_typing_compat.py new file mode 100644 index 0000000..4822ecc --- /dev/null +++ b/src/rfc3986/_typing_compat.py @@ -0,0 +1,19 @@ +import sys +import typing as t + +__all__ = ("Self",) + +if sys.version_info >= (3, 11): + from typing import Self +elif t.TYPE_CHECKING: + from typing_extensions import Self +else: + + class _PlaceholderMeta(type): + # This is meant to make it easier to debug the presence of placeholder + # classes. + def __repr__(self): + return f"placeholder for typing.{self.__name__}" + + class Self(metaclass=_PlaceholderMeta): + """Placeholder for "typing.Self".""" diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 9fc2ef8..42d4763 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -47,7 +47,7 @@ def __init__( scheme: t.Optional[str] = None, userinfo: t.Optional[str] = None, host: t.Optional[str] = None, - port: t.Optional[str] = None, + port: t.Optional[t.Union[int, str]] = None, path: t.Optional[str] = None, query: t.Optional[str] = None, fragment: t.Optional[str] = None, @@ -60,7 +60,7 @@ def __init__( (optional) :param str host: (optional) - :param int port: + :param int | str port: (optional) :param str path: (optional) @@ -72,7 +72,7 @@ def __init__( self.scheme = scheme self.userinfo = userinfo self.host = host - self.port = port + self.port = str(port) if port is not None else port self.path = path self.query = query self.fragment = fragment diff --git a/src/rfc3986/iri.py b/src/rfc3986/iri.py index 01f51ef..6578bc0 100644 --- a/src/rfc3986/iri.py +++ b/src/rfc3986/iri.py @@ -14,13 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import typing as t -from collections import namedtuple from . import compat from . import exceptions from . import misc from . import normalizers from . import uri +from ._typing_compat import Self try: @@ -29,9 +29,7 @@ idna = None -class IRIReference( - namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin -): +class IRIReference(misc.URIReferenceBase, uri.URIMixin): """Immutable object representing a parsed IRI Reference. Can be encoded into an URIReference object via the procedure @@ -42,10 +40,16 @@ class IRIReference( the future. Check for changes to the interface when upgrading. """ - slots = () + encoding: str def __new__( - cls, scheme, authority, path, query, fragment, encoding="utf-8" + cls, + scheme: t.Optional[str], + authority: t.Optional[str], + path: t.Optional[str], + query: t.Optional[str], + fragment: t.Optional[str], + encoding: str = "utf-8", ): """Create a new IRIReference.""" ref = super().__new__( @@ -59,14 +63,16 @@ def __new__( ref.encoding = encoding return ref - def __eq__(self, other): + __hash__ = tuple.__hash__ + + def __eq__(self, other: object): """Compare this reference to another.""" other_ref = other if isinstance(other, tuple): - other_ref = self.__class__(*other) + other_ref = type(self)(*other) elif not isinstance(other, IRIReference): try: - other_ref = self.__class__.from_string(other) + other_ref = self.from_string(other) except TypeError: raise TypeError( "Unable to compare {}() to {}()".format( @@ -77,7 +83,7 @@ def __eq__(self, other): # See http://tools.ietf.org/html/rfc3986#section-6.2 return tuple(self) == tuple(other_ref) - def _match_subauthority(self): + def _match_subauthority(self) -> t.Optional[t.Match[str]]: return misc.ISUBAUTHORITY_MATCHER.match(self.authority) @classmethod @@ -85,7 +91,7 @@ def from_string( cls, iri_string: t.Union[str, bytes, bytearray], encoding: str = "utf-8", - ): + ) -> Self: """Parse a IRI reference from the given unicode IRI string. :param str iri_string: Unicode IRI to be parsed into a reference. @@ -104,7 +110,12 @@ def from_string( encoding, ) - def encode(self, idna_encoder=None): # noqa: C901 + def encode( # noqa: C901 + self, + idna_encoder: t.Optional[ # pyright: ignore[reportRedeclaration] + t.Callable[[str], t.Union[str, bytes]] + ] = None, + ) -> "uri.URIReference": """Encode an IRIReference into a URIReference instance. If the ``idna`` module is installed or the ``rfc3986[idna]`` @@ -127,7 +138,9 @@ def encode(self, idna_encoder=None): # noqa: C901 "and the IRI hostname requires encoding" ) - def idna_encoder(name): + def idna_encoder(name: str) -> t.Union[str, bytes]: + assert idna # Known to not be None at this point. + if any(ord(c) > 128 for c in name): try: return idna.encode( diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 5e0d925..7489d8f 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -26,9 +26,16 @@ # Break an import loop. from . import uri -# These are enumerated for the named tuple used as a superclass of -# URIReference -URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"] + +class URIReferenceBase(t.NamedTuple): + """The namedtuple used as a superclass of URIReference and IRIReference.""" + + scheme: t.Optional[str] + authority: t.Optional[str] + path: t.Optional[str] + query: t.Optional[str] + fragment: t.Optional[str] + important_characters = { "generic_delimiters": abnf_regexp.GENERIC_DELIMITERS, diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index 902c23c..532bfaf 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -82,14 +82,34 @@ def normalize_path(path: str) -> str: return remove_dot_segments(path) -def normalize_query(query: str) -> str: +@t.overload +def normalize_query(query: str) -> str: # noqa: D103 + ... + + +@t.overload +def normalize_query(query: None) -> None: # noqa: D103 + ... + + +def normalize_query(query: t.Optional[str]) -> t.Optional[str]: """Normalize the query string.""" if not query: return query return normalize_percent_characters(query) -def normalize_fragment(fragment: str) -> str: +@t.overload +def normalize_fragment(fragment: str) -> str: # noqa: D103 + ... + + +@t.overload +def normalize_fragment(fragment: None) -> None: # noqa: D103 + ... + + +def normalize_fragment(fragment: t.Optional[str]) -> t.Optional[str]: """Normalize the fragment string.""" if not fragment: return fragment diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index f5448d3..6747447 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -14,15 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. import typing as t -from collections import namedtuple from . import compat from . import misc from . import normalizers from ._mixin import URIMixin +from ._typing_compat import Self -class URIReference(namedtuple("URIReference", misc.URI_COMPONENTS), URIMixin): +class URIReference(misc.URIReferenceBase, URIMixin): """Immutable object representing a parsed URI Reference. .. note:: @@ -80,10 +80,16 @@ class URIReference(namedtuple("URIReference", misc.URI_COMPONENTS), URIMixin): The port parsed from the authority. """ - slots = () + encoding: str def __new__( - cls, scheme, authority, path, query, fragment, encoding="utf-8" + cls, + scheme: t.Optional[str], + authority: t.Optional[str], + path: t.Optional[str], + query: t.Optional[str], + fragment: t.Optional[str], + encoding: str = "utf-8", ): """Create a new URIReference.""" ref = super().__new__( @@ -99,18 +105,18 @@ def __new__( __hash__ = tuple.__hash__ - def __eq__(self, other): + def __eq__(self, other: object): """Compare this reference to another.""" other_ref = other if isinstance(other, tuple): - other_ref = URIReference(*other) + other_ref = type(self)(*other) elif not isinstance(other, URIReference): try: - other_ref = URIReference.from_string(other) + other_ref = self.from_string(other) except TypeError: raise TypeError( - "Unable to compare URIReference() to {}()".format( - type(other).__name__ + "Unable to compare {}() to {}()".format( + type(self).__name__, type(other).__name__ ) ) @@ -118,7 +124,7 @@ def __eq__(self, other): naive_equality = tuple(self) == tuple(other_ref) return naive_equality or self.normalized_equality(other_ref) - def normalize(self): + def normalize(self) -> "URIReference": """Normalize this reference as described in Section 6.2.2. This is not an in-place normalization. Instead this creates a new @@ -145,7 +151,7 @@ def from_string( cls, uri_string: t.Union[str, bytes, bytearray], encoding: str = "utf-8", - ): + ) -> Self: """Parse a URI reference from the given unicode URI string. :param str uri_string: Unicode URI to be parsed into a reference.