Skip to content

Commit

Permalink
Support absolute-form and authority-form URLs by web server (#6409)
Browse files Browse the repository at this point in the history
Co-authored-by: Sviatoslav Sydorenko <sviat@redhat.com>
  • Loading branch information
asvetlov and webknjaz committed Dec 13, 2021
1 parent 83e9b8a commit 535c58e
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 58 deletions.
1 change: 1 addition & 0 deletions CHANGES/6227.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Started supporting ``authority-form`` and ``absolute-form`` URLs on the server-side.
72 changes: 43 additions & 29 deletions aiohttp/_http_parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ cdef class HttpParser:
raw_headers = tuple(self._raw_headers)
headers = CIMultiDictProxy(self._headers)

if upgrade or self._cparser.method == 5: # cparser.CONNECT:
if upgrade or self._cparser.method == cparser.HTTP_CONNECT:
self._upgraded = True

# do not support old websocket spec
Expand Down Expand Up @@ -453,7 +453,7 @@ cdef class HttpParser:

if (
ULLONG_MAX > self._cparser.content_length > 0 or chunked or
self._cparser.method == 5 or # CONNECT: 5
self._cparser.method == cparser.HTTP_CONNECT or
(self._cparser.status_code >= 199 and
self._cparser.content_length == 0 and
self._read_until_eof)
Expand Down Expand Up @@ -586,34 +586,45 @@ cdef class HttpRequestParser(HttpParser):
self._path = self._buf.decode('utf-8', 'surrogateescape')
try:
idx3 = len(self._path)
idx1 = self._path.find("?")
if idx1 == -1:
query = ""
idx2 = self._path.find("#")
if idx2 == -1:
path = self._path
fragment = ""
else:
path = self._path[0: idx2]
fragment = self._path[idx2+1:]
if self._cparser.method == cparser.HTTP_CONNECT:
# authority-form,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
self._url = URL.build(authority=self._path, encoded=True)
elif idx3 > 1 and self._path[0] == '/':
# origin-form,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
idx1 = self._path.find("?")
if idx1 == -1:
query = ""
idx2 = self._path.find("#")
if idx2 == -1:
path = self._path
fragment = ""
else:
path = self._path[0: idx2]
fragment = self._path[idx2+1:]

else:
path = self._path[0:idx1]
idx1 += 1
idx2 = self._path.find("#", idx1+1)
if idx2 == -1:
query = self._path[idx1:]
fragment = ""
else:
query = self._path[idx1: idx2]
fragment = self._path[idx2+1:]

self._url = URL.build(
path=path,
query_string=query,
fragment=fragment,
encoded=True,
)
path = self._path[0:idx1]
idx1 += 1
idx2 = self._path.find("#", idx1+1)
if idx2 == -1:
query = self._path[idx1:]
fragment = ""
else:
query = self._path[idx1: idx2]
fragment = self._path[idx2+1:]

self._url = URL.build(
path=path,
query_string=query,
fragment=fragment,
encoded=True,
)
else:
# absolute-form for proxy maybe,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
self._url = URL(self._path, encoded=True)
finally:
PyByteArray_Resize(self._buf, 0)

Expand Down Expand Up @@ -726,7 +737,10 @@ cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1:
pyparser._last_error = exc
return -1
else:
if pyparser._cparser.upgrade or pyparser._cparser.method == 5: # CONNECT
if (
pyparser._cparser.upgrade or
pyparser._cparser.method == cparser.HTTP_CONNECT
):
return 2
else:
return 0
Expand Down
39 changes: 26 additions & 13 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,9 +532,6 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
"Status line is too long", str(self.max_line_size), str(len(path))
)

path_part, _hash_separator, url_fragment = path.partition("#")
path_part, _question_mark_separator, qs_part = path_part.partition("?")

# method
if not METHRE.match(method):
raise BadStatusLine(method)
Expand All @@ -549,6 +546,31 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
except Exception:
raise BadStatusLine(version)

if method == "CONNECT":
# authority-form,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
url = URL.build(authority=path, encoded=True)
elif path.startswith("/"):
# origin-form,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
path_part, _hash_separator, url_fragment = path.partition("#")
path_part, _question_mark_separator, qs_part = path_part.partition("?")

# NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
# NOTE: parser does, otherwise it results into the same
# NOTE: HTTP Request-Line input producing different
# NOTE: `yarl.URL()` objects
url = URL.build(
path=path_part,
query_string=qs_part,
fragment=url_fragment,
encoded=True,
)
else:
# absolute-form for proxy maybe,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
url = URL(path, encoded=True)

# read headers
(
headers,
Expand All @@ -575,16 +597,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
compression,
upgrade,
chunked,
# NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
# NOTE: parser does, otherwise it results into the same
# NOTE: HTTP Request-Line input producing different
# NOTE: `yarl.URL()` objects
URL.build(
path=path_part,
query_string=qs_part,
fragment=url_fragment,
encoded=True,
),
url,
)


Expand Down
14 changes: 12 additions & 2 deletions aiohttp/web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,24 @@ def __init__(
self._headers = message.headers
self._method = message.method
self._version = message.version
self._rel_url = message.url
self._cache = {} # type: Dict[str, Any]
url = message.url
if url.is_absolute():
# absolute URL is given,
# override auto-calculating url, host, and scheme
# all other properties should be good
self._cache["url"] = url
self._cache["host"] = url.host
self._cache["scheme"] = url.scheme
self._rel_url = url.relative()
else:
self._rel_url = message.url
self._post = (
None
) # type: Optional[MultiDictProxy[Union[str, bytes, FileField]]]
self._read_bytes = None # type: Optional[bytes]

self._state = state
self._cache = {} # type: Dict[str, Any]
self._task = task
self._client_max_size = client_max_size
self._loop = loop
Expand Down
24 changes: 22 additions & 2 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,15 +362,35 @@ def test_compression_unknown(parser) -> None:
assert msg.compression is None


def test_headers_connect(parser) -> None:
def test_url_connect(parser: Any) -> None:
text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n"
messages, upgrade, tail = parser.feed_data(text)
msg, payload = messages[0]
assert upgrade
assert msg.url == URL.build(authority="www.google.com")


def test_headers_connect(parser: Any) -> None:
text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n"
messages, upgrade, tail = parser.feed_data(text)
msg, payload = messages[0]
assert upgrade
assert isinstance(payload, streams.StreamReader)


def test_headers_old_websocket_key1(parser) -> None:
def test_url_absolute(parser: Any) -> None:
text = (
b"GET https://www.google.com/path/to.html HTTP/1.1\r\n"
b"content-length: 0\r\n\r\n"
)
messages, upgrade, tail = parser.feed_data(text)
msg, payload = messages[0]
assert not upgrade
assert msg.method == "GET"
assert msg.url == URL("https://www.google.com/path/to.html")


def test_headers_old_websocket_key1(parser: Any) -> None:
text = b"GET /test HTTP/1.1\r\n" b"SEC-WEBSOCKET-KEY1: line\r\n\r\n"

with pytest.raises(http_exceptions.BadHttpMessage):
Expand Down
23 changes: 11 additions & 12 deletions tests/test_proxy_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,29 +341,28 @@ async def test_proxy_http_absolute_path(proxy_test_server, get_request) -> None:
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path?query=yes"
assert proxy.request.path_qs == "/path?query=yes"


async def test_proxy_http_raw_path(proxy_test_server, get_request) -> None:
url = "http://aiohttp.io:2561/space sheep?q=can:fly"
raw_url = "http://aiohttp.io:2561/space%20sheep?q=can:fly"
raw_url = "/space%20sheep?q=can:fly"
proxy = await proxy_test_server()

await get_request(url=url, proxy=proxy.url)

assert proxy.request.host == "aiohttp.io:2561"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == raw_url


async def test_proxy_http_idna_support(proxy_test_server, get_request) -> None:
url = "http://éé.com/"
raw_url = "http://xn--9caa.com/"
proxy = await proxy_test_server()

await get_request(url=url, proxy=proxy.url)

assert proxy.request.host == "xn--9caa.com"
assert proxy.request.path_qs == raw_url
assert proxy.request.host == "éé.com"
assert proxy.request.path_qs == "/"


async def test_proxy_http_connection_error(get_request) -> None:
Expand Down Expand Up @@ -759,7 +758,7 @@ async def test_proxy_from_env_http(proxy_test_server, get_request, mocker) -> No
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert "Proxy-Authorization" not in proxy.request.headers


Expand All @@ -781,7 +780,7 @@ async def test_proxy_from_env_http_with_auth(proxy_test_server, get_request, moc
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert proxy.request.headers["Proxy-Authorization"] == auth.encode()


Expand All @@ -807,7 +806,7 @@ async def test_proxy_from_env_http_with_auth_from_netrc(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert proxy.request.headers["Proxy-Authorization"] == auth.encode()


Expand All @@ -833,7 +832,7 @@ async def test_proxy_from_env_http_without_auth_from_netrc(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert "Proxy-Authorization" not in proxy.request.headers


Expand All @@ -857,7 +856,7 @@ async def test_proxy_from_env_http_without_auth_from_wrong_netrc(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert "Proxy-Authorization" not in proxy.request.headers


Expand All @@ -873,7 +872,7 @@ async def xtest_proxy_from_env_https(proxy_test_server, get_request, mocker):
assert len(proxy.requests_list) == 2
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "https://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert "Proxy-Authorization" not in proxy.request.headers


Expand Down
8 changes: 8 additions & 0 deletions tests/test_web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,14 @@ def test_non_ascii_raw_path() -> None:
assert "/путь" == req.raw_path


def test_absolute_url() -> None:
req = make_mocked_request("GET", "https://example.com/path/to?a=1")
assert req.url == URL("https://example.com/path/to?a=1")
assert req.scheme == "https"
assert req.host == "example.com"
assert req.rel_url == URL.build(path="/path/to", query={"a": "1"})


def test_content_length() -> None:
req = make_mocked_request("Get", "/", CIMultiDict([("CONTENT-LENGTH", "123")]))

Expand Down

0 comments on commit 535c58e

Please sign in to comment.