Skip to content

Commit

Permalink
Merge pull request #145 from python-hyper/ne-plus-ultra
Browse files Browse the repository at this point in the history
make hyperlink handle + like an HTML form post by default
  • Loading branch information
glyph authored Dec 30, 2020
2 parents e362c53 + b06994b commit 1949b07
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 20 deletions.
82 changes: 63 additions & 19 deletions src/hyperlink/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def __nonzero__(self):
_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE
_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?")
_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE
_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&+")
_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&")
_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE
_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=")
_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE
Expand Down Expand Up @@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
)
# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc

NO_QUERY_PLUS_SCHEMES = set()

def register_scheme(text, uses_netloc=True, default_port=None):
# type: (Text, bool, Optional[int]) -> None

def register_scheme(
text, uses_netloc=True, default_port=None, query_plus_is_space=True
):
# type: (Text, bool, Optional[int], bool) -> None
"""Registers new scheme information, resulting in correct port and
slash behavior from the URL object. There are dozens of standard
schemes preregistered, so this function is mostly meant for
Expand All @@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
not. Defaults to True.
default_port: The default port, if any, for
netloc-using schemes.
query_plus_is_space: If true, a "+" in the query string should be
decoded as a space by DecodedURL.
.. _file an issue: https://github.com/mahmoud/hyperlink/issues
"""
Expand All @@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
else:
raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)

if not query_plus_is_space:
NO_QUERY_PLUS_SCHEMES.add(text)

return


Expand Down Expand Up @@ -922,9 +931,9 @@ class URL(object):
https://example.com/hello/world
The constructor runs basic type checks. All strings are expected
to be decoded (:class:`unicode` in Python 2). All arguments are
optional, defaulting to appropriately empty values. A full list of
constructor arguments is below.
to be text (:class:`str` in Python 3, :class:`unicode` in Python 2). All
arguments are optional, defaulting to appropriately empty values. A full
list of constructor arguments is below.
Args:
scheme: The text name of the scheme.
Expand All @@ -934,9 +943,9 @@ class URL(object):
it is known. See the ``SCHEME_PORT_MAP`` and
:func:`register_default_port` for more info.
path: A tuple of strings representing the slash-separated parts of the
path.
path, each percent-encoded.
query: The query parameters, as a dictionary or as an sequence of
key-value pairs.
percent-encoded key-value pairs.
fragment: The fragment part of the URL.
rooted: A rooted URL is one which indicates an absolute path.
This is True on any URL that includes a host, or any relative URL
Expand Down Expand Up @@ -1969,6 +1978,16 @@ def remove(
_EMPTY_URL = URL()


def _replace_plus(text):
# type: (Text) -> Text
return text.replace("+", "%20")


def _no_op(text):
# type: (Text) -> Text
return text


class DecodedURL(object):
"""
:class:`DecodedURL` is a type designed to act as a higher-level
Expand Down Expand Up @@ -1998,6 +2017,9 @@ class DecodedURL(object):
lazy: Set to True to avoid pre-decode all parts of the URL to check for
validity.
Defaults to False.
query_plus_is_space: + characters in the query string should be treated
as spaces when decoding. If unspecified, the default is taken from
the scheme.
.. note::
Expand All @@ -2012,18 +2034,21 @@ class DecodedURL(object):
.. versionadded:: 18.0.0
"""

def __init__(self, url=_EMPTY_URL, lazy=False):
# type: (URL, bool) -> None
def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
# type: (URL, bool, Optional[bool]) -> None
self._url = url
if query_plus_is_space is None:
query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
self._query_plus_is_space = query_plus_is_space
if not lazy:
# cache the following, while triggering any decoding
# issues with decodable fields
self.host, self.userinfo, self.path, self.query, self.fragment
return

@classmethod
def from_text(cls, text, lazy=False):
# type: (Text, bool) -> DecodedURL
def from_text(cls, text, lazy=False, query_plus_is_space=None):
# type: (Text, bool, Optional[bool]) -> DecodedURL
"""\
Make a `DecodedURL` instance from any text string containing a URL.
Expand All @@ -2034,7 +2059,7 @@ def from_text(cls, text, lazy=False):
Defaults to True.
"""
_url = URL.from_text(text)
return cls(_url, lazy=lazy)
return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)

@property
def encoded_url(self):
Expand All @@ -2059,22 +2084,34 @@ def to_iri(self):
"Passthrough to :meth:`~hyperlink.URL.to_iri()`"
return self._url.to_iri()

def _clone(self, url):
# type: (URL) -> DecodedURL
return self.__class__(
url,
# TODO: propagate laziness?
query_plus_is_space=self._query_plus_is_space,
)

def click(self, href=u""):
# type: (Union[Text, URL, DecodedURL]) -> DecodedURL
"""Return a new DecodedURL wrapping the result of
:meth:`~hyperlink.URL.click()`
"""
if isinstance(href, DecodedURL):
href = href._url
return self.__class__(self._url.click(href=href))
return self._clone(
self._url.click(href=href),
)

def sibling(self, segment):
# type: (Text) -> DecodedURL
"""Automatically encode any reserved characters in *segment* and
return a new `DecodedURL` wrapping the result of
:meth:`~hyperlink.URL.sibling()`
"""
return self.__class__(self._url.sibling(_encode_reserved(segment)))
return self._clone(
self._url.sibling(_encode_reserved(segment)),
)

def child(self, *segments):
# type: (Text) -> DecodedURL
Expand All @@ -2085,7 +2122,7 @@ def child(self, *segments):
if not segments:
return self
new_segs = [_encode_reserved(s) for s in segments]
return self.__class__(self._url.child(*new_segs))
return self._clone(self._url.child(*new_segs))

def normalize(
self,
Expand All @@ -2101,7 +2138,7 @@ def normalize(
"""Return a new `DecodedURL` wrapping the result of
:meth:`~hyperlink.URL.normalize()`
"""
return self.__class__(
return self._clone(
self._url.normalize(
scheme, host, path, query, fragment, userinfo, percents
)
Expand Down Expand Up @@ -2148,11 +2185,18 @@ def path(self):
def query(self):
# type: () -> QueryPairs
if not hasattr(self, "_query"):
if self._query_plus_is_space:
predecode = _replace_plus
else:
predecode = _no_op

self._query = cast(
QueryPairs,
tuple(
tuple(
_percent_decode(x, raise_subencoding_exc=True)
_percent_decode(
predecode(x), raise_subencoding_exc=True
)
if x is not None
else None
for x in (k, v)
Expand Down Expand Up @@ -2248,7 +2292,7 @@ def replace(
userinfo=userinfo_text,
uses_netloc=uses_netloc,
)
return self.__class__(url=new_url)
return self._clone(url=new_url)

def get(self, name):
# type: (Text) -> List[Optional[Text]]
Expand Down
16 changes: 16 additions & 0 deletions src/hyperlink/test/test_decoded_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,19 @@ def test_click_decoded_url(self):
assert clicked.host == durl.host
assert clicked.path == durl_dest.path
assert clicked.path == ("tëst",)

def test_decode_plus(self):
# type: () -> None
durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B")
assert durl.path == ("x+y+",)
assert durl.get("a") == ["b c+"]
assert durl.query == (("a", "b c+"),)

def test_decode_nonplussed(self):
# type: () -> None
durl = DecodedURL.from_text(
"/x+y%2B?a=b+c%2B", query_plus_is_space=False
)
assert durl.path == ("x+y+",)
assert durl.get("a") == ["b+c+"]
assert durl.query == (("a", "b+c+"),)
12 changes: 11 additions & 1 deletion src/hyperlink/test/test_scheme_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from .. import _url
from .common import HyperlinkTestCase
from .._url import register_scheme, URL
from .._url import register_scheme, URL, DecodedURL


class TestSchemeRegistration(HyperlinkTestCase):
Expand Down Expand Up @@ -70,3 +70,13 @@ def test_register_invalid_port(self):
# type: () -> None
with self.assertRaises(ValueError):
register_scheme("nope", default_port=cast(bool, object()))

def test_register_no_quote_plus_scheme(self):
# type: () -> None
register_scheme("keepplus", query_plus_is_space=False)
plus_is_not_space = DecodedURL.from_text(
"keepplus://example.com/?q=a+b"
)
plus_is_space = DecodedURL.from_text("https://example.com/?q=a+b")
assert plus_is_not_space.get("q") == ["a+b"]
assert plus_is_space.get("q") == ["a b"]
2 changes: 2 additions & 0 deletions src/hyperlink/test/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@
"https://example.com/?a=%23", # hash in query param value
"https://example.com/?a=%26", # ampersand in query param value
"https://example.com/?a=%3D", # equals in query param value
"https://example.com/?foo+bar=baz", # plus in query param name
"https://example.com/?foo=bar+baz", # plus in query param value
# double-encoded percent sign in all percent-encodable positions:
"http://(%2525):(%2525)@example.com/(%2525)/?(%2525)=(%2525)#(%2525)",
# colon in first part of schemeless relative url
Expand Down

0 comments on commit 1949b07

Please sign in to comment.