From 8f74bac801938d917378d38bcc91a75a92189705 Mon Sep 17 00:00:00 2001 From: Junyeong Jeong Date: Mon, 8 Jun 2020 23:36:27 +0900 Subject: [PATCH] Parse the last CRLF of chunked response correctly (#4630) If the last CRLF or only the LF are received via separate TCP segment, HTTPPayloadParser misjudges that trailers should come after 0\r\n in the chunked response body. In this case, HttpPayloadParser starts waiting for trailers, but the only remaining data to be received is CRLF. Thus, HttpPayloadParser waits trailers indefinitely and this incurs TimeoutError in user code. However, if the connection is keep alive disabled, this problem is not reproduced because the server shutdown the connection explicitly after sending all data. If the connection is closed .feed_eof is called and it helps HttpPayloadParser finish its waiting. --- CHANGES/4630.bugfix | 1 + CONTRIBUTORS.txt | 1 + aiohttp/http_parser.py | 24 +++++++++++++++++++----- 3 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 CHANGES/4630.bugfix diff --git a/CHANGES/4630.bugfix b/CHANGES/4630.bugfix new file mode 100644 index 00000000000..65d783be049 --- /dev/null +++ b/CHANGES/4630.bugfix @@ -0,0 +1 @@ +Handle the last CRLF correctly even if it is received via separate TCP segment. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index b40c107f6cd..f95ef97f8bd 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -153,6 +153,7 @@ Julia Tsemusheva Julien Duponchelle Jungkook Park Junjie Tao +Junyeong Jeong Justas Trimailovas Justin Foo Justin Turner Arthur diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 64e4b30c22e..b76ce094014 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -668,12 +668,26 @@ def feed_data(self, # we should get another \r\n otherwise # trailers needs to be skiped until \r\n\r\n if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: - if chunk[:2] == SEP: - # end of stream - self.payload.feed_eof() - return True, chunk[2:] + if len(chunk) >= 2: + if chunk[:2] == SEP: + # end of stream + self.payload.feed_eof() + return True, chunk[2:] + else: + self._chunk = ChunkState.PARSE_TRAILERS else: - self._chunk = ChunkState.PARSE_TRAILERS + # Both CR and LF, or only LF may not be received + # yet. It is expected that CRLF or LF will be shown at + # the very first byte next time, otherwise trailers + # should come. + # The last CRLF which marks the end of response + # might not be contained in the same TCP segment which + # delivered the size indicator. + if not chunk or chunk[:1] == SEP[:1]: + self._chunk_tail = chunk + return False, b'' + else: + self._chunk = ChunkState.PARSE_TRAILERS # read and discard trailer up to the CRLF terminator if self._chunk == ChunkState.PARSE_TRAILERS: