diff --git a/src/hyperlink/_url.py b/src/hyperlink/_url.py index 4fb84133..d32829c0 100644 --- a/src/hyperlink/_url.py +++ b/src/hyperlink/_url.py @@ -467,9 +467,11 @@ def _encode_userinfo_part(text, maximal=True): ) # As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc +NO_QUERY_PLUS_SCHEMES = set() -def register_scheme(text, uses_netloc=True, default_port=None): - # type: (Text, bool, Optional[int]) -> None + +def register_scheme(text, uses_netloc=True, default_port=None, query_plus_is_space=True): + # type: (Text, bool, Optional[int], bool) -> None """Registers new scheme information, resulting in correct port and slash behavior from the URL object. There are dozens of standard schemes preregistered, so this function is mostly meant for @@ -485,6 +487,8 @@ def register_scheme(text, uses_netloc=True, default_port=None): not. Defaults to True. default_port: The default port, if any, for netloc-using schemes. + query_plus_is_space: If true, a "+" in the query string should be + decoded as a space by DecodedURL. .. _file an issue: https://github.com/mahmoud/hyperlink/issues """ @@ -510,6 +514,9 @@ def register_scheme(text, uses_netloc=True, default_port=None): else: raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc) + if not query_plus_is_space: + NO_QUERY_PLUS_SCHEMES.add(text) + return @@ -1998,6 +2005,9 @@ class DecodedURL(object): lazy: Set to True to avoid pre-decode all parts of the URL to check for validity. Defaults to False. + query_plus_is_space: + characters in the query string should be treated + as spaces when decoding. If unspecified, the default is taken from + the scheme. .. note:: @@ -2012,9 +2022,12 @@ class DecodedURL(object): .. versionadded:: 18.0.0 """ - def __init__(self, url=_EMPTY_URL, lazy=False): - # type: (URL, bool) -> None + def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None): + # type: (URL, bool, Optional[bool]) -> None self._url = url + if query_plus_is_space is None: + query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES + self._query_plus_is_space = query_plus_is_space if not lazy: # cache the following, while triggering any decoding # issues with decodable fields @@ -2022,7 +2035,7 @@ def __init__(self, url=_EMPTY_URL, lazy=False): return @classmethod - def from_text(cls, text, lazy=False): + def from_text(cls, text, lazy=False, query_plus_is_space=None): # type: (Text, bool) -> DecodedURL """\ Make a `DecodedURL` instance from any text string containing a URL. @@ -2034,7 +2047,7 @@ def from_text(cls, text, lazy=False): Defaults to True. """ _url = URL.from_text(text) - return cls(_url, lazy=lazy) + return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space) @property def encoded_url(self): @@ -2059,6 +2072,14 @@ def to_iri(self): "Passthrough to :meth:`~hyperlink.URL.to_iri()`" return self._url.to_iri() + def _clone(self, url): + # type: (URL) -> DecodedURL + return self.__class__( + url, + # TODO: propagate laziness? + query_plus_is_space=self._query_plus_is_space, + ) + def click(self, href=u""): # type: (Union[Text, URL, DecodedURL]) -> DecodedURL """Return a new DecodedURL wrapping the result of @@ -2066,7 +2087,9 @@ def click(self, href=u""): """ if isinstance(href, DecodedURL): href = href._url - return self.__class__(self._url.click(href=href)) + return self._clone( + self._url.click(href=href), + ) def sibling(self, segment): # type: (Text) -> DecodedURL @@ -2074,7 +2097,9 @@ def sibling(self, segment): return a new `DecodedURL` wrapping the result of :meth:`~hyperlink.URL.sibling()` """ - return self.__class__(self._url.sibling(_encode_reserved(segment))) + return self._clone( + self._url.sibling(_encode_reserved(segment)), + ) def child(self, *segments): # type: (Text) -> DecodedURL @@ -2085,7 +2110,7 @@ def child(self, *segments): if not segments: return self new_segs = [_encode_reserved(s) for s in segments] - return self.__class__(self._url.child(*new_segs)) + return self._clone(self._url.child(*new_segs)) def normalize( self, @@ -2101,7 +2126,7 @@ def normalize( """Return a new `DecodedURL` wrapping the result of :meth:`~hyperlink.URL.normalize()` """ - return self.__class__( + return self._clone( self._url.normalize( scheme, host, path, query, fragment, userinfo, percents ) @@ -2148,11 +2173,16 @@ def path(self): def query(self): # type: () -> QueryPairs if not hasattr(self, "_query"): + if self._query_plus_is_space: + predecode = lambda x: x.replace("+", "%20") + else: + predecode = lambda x: x + self._query = cast( QueryPairs, tuple( tuple( - _percent_decode(x, raise_subencoding_exc=True) + _percent_decode(predecode(x), raise_subencoding_exc=True) if x is not None else None for x in (k, v) @@ -2248,7 +2278,7 @@ def replace( userinfo=userinfo_text, uses_netloc=uses_netloc, ) - return self.__class__(url=new_url) + return self._clone(url=new_url) def get(self, name): # type: (Text) -> List[Optional[Text]] diff --git a/src/hyperlink/test/test_decoded_url.py b/src/hyperlink/test/test_decoded_url.py index 7104bea5..91dce44f 100644 --- a/src/hyperlink/test/test_decoded_url.py +++ b/src/hyperlink/test/test_decoded_url.py @@ -210,3 +210,17 @@ def test_click_decoded_url(self): assert clicked.host == durl.host assert clicked.path == durl_dest.path assert clicked.path == ("tëst",) + + def test_decode_plus(self): + # type: () -> None + durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B") + assert durl.path == ("x+y+",) + assert durl.get("a") == ["b c+"] + assert durl.query == (("a", "b c+"),) + + def test_decode_nonplussed(self): + # type: () -> None + durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B", query_plus_is_space=False) + assert durl.path == ("x+y+",) + assert durl.get("a") == ["b+c+"] + assert durl.query == (("a", "b+c+"),) diff --git a/src/hyperlink/test/test_scheme_registration.py b/src/hyperlink/test/test_scheme_registration.py index f98109a3..35a80f69 100644 --- a/src/hyperlink/test/test_scheme_registration.py +++ b/src/hyperlink/test/test_scheme_registration.py @@ -5,7 +5,7 @@ from .. import _url from .common import HyperlinkTestCase -from .._url import register_scheme, URL +from .._url import register_scheme, URL, DecodedURL class TestSchemeRegistration(HyperlinkTestCase): @@ -70,3 +70,10 @@ def test_register_invalid_port(self): # type: () -> None with self.assertRaises(ValueError): register_scheme("nope", default_port=cast(bool, object())) + + def test_register_no_quote_plus_scheme(self): + # type: () -> None + register_scheme("keepplus", query_plus_is_space=False) + DecodedURL.from_text("keepplus://heyoo/?q=a+b") + plus_is_space = DecodedURL.from_text("https://abc.xyz/x+y%2B?a=b+c%2B", query_plus_is_space=False) +