From f7bc8d3e219616dbce70720d10896e862e3563f5 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Mon, 27 Nov 2017 14:48:56 +0100 Subject: [PATCH 01/11] Support chunked transfer encoding --- .gitattributes | 2 ++ tests/res/chunked.txt | 25 +++++++++++++++++++++++++ tests/test_serving.py | 42 ++++++++++++++++++++++++++++++++++++++++++ werkzeug/serving.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 .gitattributes create mode 100644 tests/res/chunked.txt diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..96d79dad6 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +tests/res/chunked.txt binary + diff --git a/tests/res/chunked.txt b/tests/res/chunked.txt new file mode 100644 index 000000000..44aa71f62 --- /dev/null +++ b/tests/res/chunked.txt @@ -0,0 +1,25 @@ +94 +----------------------------898239224156930639461866 +Content-Disposition: form-data; name="file"; filename="test.txt" +Content-Type: text/plain + + +f +This is a test + +2 + + +65 +----------------------------898239224156930639461866 +Content-Disposition: form-data; name="type" + + +a +text/plain +3a + +----------------------------898239224156930639461866-- + +0 + diff --git a/tests/test_serving.py b/tests/test_serving.py index 9ce1ee6ea..33efbab63 100644 --- a/tests/test_serving.py +++ b/tests/test_serving.py @@ -294,3 +294,45 @@ def app(environ, start_response): serving.run_simple(hostname='localhost', port='5001', application=app, use_reloader=False) assert 'port must be an integer' in str(excinfo.value) + + +def test_chunked_encoding(dev_server): + server = dev_server(r''' + from werkzeug.wrappers import Request + from werkzeug.serving import run_simple + + def app(environ, start_response): + if environ['REQUEST_METHOD'] != 'POST': + start_response('204 OK', []) + return [] + + try: + assert environ['HTTP_TRANSFER_ENCODING'] == 'chunked' + assert environ.get('wsgi.input_terminated', False) + request = Request(environ) + assert request.mimetype == 'multipart/form-data' + assert request.files['file'].read() == b'This is a test\n' + assert request.form['type'] == b'text/plain' + start_response('200 OK', [('Content-Type', 'text/plain')]) + return [b'YES'] + except Exception as e: + start_response('200 OK', [('Content-Type', 'text/plain')]) + return [str(e)] + ''') + + testfile = os.path.join(os.path.dirname(__file__), 'res', 'chunked.txt') + + import httplib + conn = httplib.HTTPConnection('127.0.0.1', server.port) + conn.connect() + conn.putrequest('POST', '/', skip_host=1, skip_accept_encoding=1) + conn.putheader('Accept', 'text/plain') + conn.putheader('Transfer-Encoding', 'chunked') + conn.putheader('Content-Type', 'multipart/form-data; boundary=--------------------------898239224156930639461866') + conn.endheaders(open(testfile, 'rb').read()) + + res = conn.getresponse() + assert res.status == 200 + assert res.read() == b'YES' + + conn.close() diff --git a/werkzeug/serving.py b/werkzeug/serving.py index d7e552aae..0dda0f57d 100644 --- a/werkzeug/serving.py +++ b/werkzeug/serving.py @@ -37,6 +37,7 @@ """ from __future__ import with_statement +import io import os import socket import sys @@ -97,6 +98,42 @@ class ForkingMixIn(object): can_open_by_fd = not WIN and hasattr(socket, 'fromfd') +class DechunkedInput(io.RawIOBase): + def __init__(self, rfile): + self._rfile = rfile + self._done = False + self._len = 0 + + def readable(self): + return True + + def readinto(self, buf): + if self._done: + return 0 + + if self._len == 0: + try: + self._len = int(self._rfile.readline().decode('latin1').strip(), 16) + except ValueError: + raise IOError('Invalid chunk header') + + if self._len < 0: + raise IOError('Negative chunk length not allowed') + elif self._len > 0: + n = min(len(buf), self._len) + buf[:n] = self._rfile.read(n) + self._len -= n + else: + self._done = True + n = 0 + + if self._len == 0: + if self._rfile.readline() not in (b'\n', b'\r\n', b'\r'): + raise IOError('Missing chunk spacing') + + return n + + class WSGIRequestHandler(BaseHTTPRequestHandler, object): """A request handler that implements WSGI dispatching.""" @@ -117,7 +154,7 @@ def shutdown_server(): environ = { 'wsgi.version': (1, 0), 'wsgi.url_scheme': url_scheme, - 'wsgi.input': self.rfile, + 'wsgi.input': self.rfile, # TODO: Wrap this 'wsgi.errors': sys.stderr, 'wsgi.multithread': self.server.multithread, 'wsgi.multiprocess': self.server.multiprocess, @@ -141,6 +178,10 @@ def shutdown_server(): key = 'HTTP_' + key environ[key] = value + if environ.get('HTTP_TRANSFER_ENCODING', '').strip().lower() == 'chunked': + environ['wsgi.input_terminated'] = True + environ['wsgi.input'] = DechunkedInput(environ['wsgi.input']) + if request_url.scheme and request_url.netloc: environ['HTTP_HOST'] = request_url.netloc From beb216846688a3c6a1a85769b53aa60c488eb3eb Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Mon, 27 Nov 2017 20:29:25 +0100 Subject: [PATCH 02/11] Remove comment --- werkzeug/serving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/werkzeug/serving.py b/werkzeug/serving.py index 0dda0f57d..75453ed6f 100644 --- a/werkzeug/serving.py +++ b/werkzeug/serving.py @@ -154,7 +154,7 @@ def shutdown_server(): environ = { 'wsgi.version': (1, 0), 'wsgi.url_scheme': url_scheme, - 'wsgi.input': self.rfile, # TODO: Wrap this + 'wsgi.input': self.rfile, 'wsgi.errors': sys.stderr, 'wsgi.multithread': self.server.multithread, 'wsgi.multiprocess': self.server.multiprocess, From 1f91721549b9398174487f735ffd6f120bc42798 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 28 Nov 2017 10:24:57 +0100 Subject: [PATCH 03/11] Import HTTPConnection from correct module --- tests/test_serving.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_serving.py b/tests/test_serving.py index 33efbab63..90ba9980b 100644 --- a/tests/test_serving.py +++ b/tests/test_serving.py @@ -10,6 +10,7 @@ """ import os import ssl +import sys import subprocess import textwrap @@ -322,8 +323,12 @@ def app(environ, start_response): testfile = os.path.join(os.path.dirname(__file__), 'res', 'chunked.txt') - import httplib - conn = httplib.HTTPConnection('127.0.0.1', server.port) + if sys.version_info[0] == 2: + from httplib import HTTPConnection + else: + from http.client import HTTPConnection + + conn = HTTPConnection('127.0.0.1', server.port) conn.connect() conn.putrequest('POST', '/', skip_host=1, skip_accept_encoding=1) conn.putheader('Accept', 'text/plain') From 18f53e70fe82c5cb89b3868ff8874940cd055b81 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 28 Nov 2017 10:27:35 +0100 Subject: [PATCH 04/11] Remove unused test code --- tests/test_serving.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/tests/test_serving.py b/tests/test_serving.py index 90ba9980b..a559d85b8 100644 --- a/tests/test_serving.py +++ b/tests/test_serving.py @@ -300,25 +300,15 @@ def app(environ, start_response): def test_chunked_encoding(dev_server): server = dev_server(r''' from werkzeug.wrappers import Request - from werkzeug.serving import run_simple - def app(environ, start_response): - if environ['REQUEST_METHOD'] != 'POST': - start_response('204 OK', []) - return [] - - try: - assert environ['HTTP_TRANSFER_ENCODING'] == 'chunked' - assert environ.get('wsgi.input_terminated', False) - request = Request(environ) - assert request.mimetype == 'multipart/form-data' - assert request.files['file'].read() == b'This is a test\n' - assert request.form['type'] == b'text/plain' - start_response('200 OK', [('Content-Type', 'text/plain')]) - return [b'YES'] - except Exception as e: - start_response('200 OK', [('Content-Type', 'text/plain')]) - return [str(e)] + assert environ['HTTP_TRANSFER_ENCODING'] == 'chunked' + assert environ.get('wsgi.input_terminated', False) + request = Request(environ) + assert request.mimetype == 'multipart/form-data' + assert request.files['file'].read() == b'This is a test\n' + assert request.form['type'] == b'text/plain' + start_response('200 OK', [('Content-Type', 'text/plain')]) + return [b'YES'] ''') testfile = os.path.join(os.path.dirname(__file__), 'res', 'chunked.txt') From fcf6a06e4cf59e0899050f6d567479d90e8f9bf3 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 28 Nov 2017 12:01:45 +0100 Subject: [PATCH 05/11] Improve DechunkedInput implementation and add comments - Try to read multiple chunks until the buffer is filled - Add comments to make control flow more clear - Add doc comment for DechunkedInput --- werkzeug/serving.py | 63 ++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/werkzeug/serving.py b/werkzeug/serving.py index 75453ed6f..f76f4de95 100644 --- a/werkzeug/serving.py +++ b/werkzeug/serving.py @@ -99,6 +99,8 @@ class ForkingMixIn(object): class DechunkedInput(io.RawIOBase): + """An input stream that handles Transfer-Encoding 'chunked'""" + def __init__(self, rfile): self._rfile = rfile self._done = False @@ -107,31 +109,46 @@ def __init__(self, rfile): def readable(self): return True - def readinto(self, buf): - if self._done: - return 0 - - if self._len == 0: - try: - self._len = int(self._rfile.readline().decode('latin1').strip(), 16) - except ValueError: - raise IOError('Invalid chunk header') - - if self._len < 0: + def read_chunk_len(self): + try: + line = self._rfile.readline().decode('latin1') + _len = int(line.strip(), 16) + except ValueError: + raise IOError('Invalid chunk header') + if _len < 0: raise IOError('Negative chunk length not allowed') - elif self._len > 0: - n = min(len(buf), self._len) - buf[:n] = self._rfile.read(n) - self._len -= n - else: - self._done = True - n = 0 + return _len - if self._len == 0: - if self._rfile.readline() not in (b'\n', b'\r\n', b'\r'): - raise IOError('Missing chunk spacing') - - return n + def readinto(self, buf): + read = 0 + while not self._done and read < len(buf): + if self._len == 0: + # This is the first chunk or we fully consumed the previous + # one. Read the next length of the next chunk + self._len = self.read_chunk_len() + + if self._len == 0: + # Found the final chunk of size 0. The stream is now exhausted, + # but there is still a final newline that should be consumed + self._done = True + + if self._len > 0: + # There is data (left) in this chunk, so append it to the + # buffer. If this operation fully consumes the chunk, this will + # reset self._len to 0. + n = min(len(buf), self._len) + buf[read:read + n] = self._rfile.read(n) + self._len -= n + read += n + + if self._len == 0: + # Skip the terminating newline of a chunk that has been fully + # consumed. This also applies to the 0-sized final chunk + terminator = self._rfile.readline() + if terminator not in (b'\n', b'\r\n', b'\r'): + raise IOError('Missing chunk terminating newline') + + return read class WSGIRequestHandler(BaseHTTPRequestHandler, object): From 16923bb7f00a8fb8f1393932b0739e8ff4c69aa0 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 28 Nov 2017 12:10:03 +0100 Subject: [PATCH 06/11] Ignore Content-Length with chunked transfer encoding Required by RFC 2616, Section 4.4 See: https://www.greenbytes.de/tech/webdav/rfc2616.html#rfc.section.4.4 --- werkzeug/wsgi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/werkzeug/wsgi.py b/werkzeug/wsgi.py index b7e98e216..64c0e6c7e 100644 --- a/werkzeug/wsgi.py +++ b/werkzeug/wsgi.py @@ -166,12 +166,16 @@ def get_host(environ, trusted_hosts=None): def get_content_length(environ): """Returns the content length from the WSGI environment as - integer. If it's not available ``None`` is returned. + integer. If it's not available or chunked transfer encoding is used, + ``None`` is returned. .. versionadded:: 0.9 :param environ: the WSGI environ to fetch the content length from. """ + if environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked': + return None + content_length = environ.get('CONTENT_LENGTH') if content_length is not None: try: From 9806c5169bc3bb14ebc8e68c3bb072a6466955dc Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 28 Nov 2017 15:13:02 +0100 Subject: [PATCH 07/11] Default to streaming into a temporary file if content length is missing --- werkzeug/formparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/werkzeug/formparser.py b/werkzeug/formparser.py index a0118054b..25e0fea5b 100644 --- a/werkzeug/formparser.py +++ b/werkzeug/formparser.py @@ -38,7 +38,7 @@ def default_stream_factory(total_content_length, filename, content_type, content_length=None): """The stream factory that is used per default.""" - if total_content_length > 1024 * 500: + if total_content_length is None or total_content_length > 1024 * 500: return TemporaryFile('wb+') return BytesIO() From ce9b97812933cf938034d2699d6c81d8012472e3 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 28 Nov 2017 15:15:53 +0100 Subject: [PATCH 08/11] Fix a test assertion in python3 --- tests/test_serving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_serving.py b/tests/test_serving.py index a559d85b8..3565fd159 100644 --- a/tests/test_serving.py +++ b/tests/test_serving.py @@ -306,7 +306,7 @@ def app(environ, start_response): request = Request(environ) assert request.mimetype == 'multipart/form-data' assert request.files['file'].read() == b'This is a test\n' - assert request.form['type'] == b'text/plain' + assert request.form['type'] == 'text/plain' start_response('200 OK', [('Content-Type', 'text/plain')]) return [b'YES'] ''') From bb01f571c6e7a33175629f686da0cd3c45d71376 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 28 Nov 2017 16:58:20 +0100 Subject: [PATCH 09/11] Use spooled temporary files to stream files --- werkzeug/formparser.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/werkzeug/formparser.py b/werkzeug/formparser.py index 25e0fea5b..2af77c3f9 100644 --- a/werkzeug/formparser.py +++ b/werkzeug/formparser.py @@ -12,7 +12,7 @@ import re import codecs from io import BytesIO -from tempfile import TemporaryFile +from tempfile import SpooledTemporaryFile from itertools import chain, repeat, tee from functools import update_wrapper @@ -38,9 +38,7 @@ def default_stream_factory(total_content_length, filename, content_type, content_length=None): """The stream factory that is used per default.""" - if total_content_length is None or total_content_length > 1024 * 500: - return TemporaryFile('wb+') - return BytesIO() + return SpooledTemporaryFile(max_size=1024 * 500, mode='wb+') def parse_form_data(environ, stream_factory=None, charset='utf-8', From bea195e4c52255feac4f382b808d100cf5ed24a7 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 28 Nov 2017 16:59:49 +0100 Subject: [PATCH 10/11] Add a test with both chunked encoding and content length --- tests/test_serving.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/test_serving.py b/tests/test_serving.py index 3565fd159..8e8b26c8d 100644 --- a/tests/test_serving.py +++ b/tests/test_serving.py @@ -331,3 +331,42 @@ def app(environ, start_response): assert res.read() == b'YES' conn.close() + + +def test_chunked_encoding_with_content_length(dev_server): + server = dev_server(r''' + from werkzeug.wrappers import Request + def app(environ, start_response): + assert environ['HTTP_TRANSFER_ENCODING'] == 'chunked' + assert environ.get('wsgi.input_terminated', False) + request = Request(environ) + assert request.mimetype == 'multipart/form-data' + assert request.files['file'].read() == b'This is a test\n' + assert request.form['type'] == 'text/plain' + start_response('200 OK', [('Content-Type', 'text/plain')]) + return [b'YES'] + ''') + + testfile = os.path.join(os.path.dirname(__file__), 'res', 'chunked.txt') + + if sys.version_info[0] == 2: + from httplib import HTTPConnection + else: + from http.client import HTTPConnection + + conn = HTTPConnection('127.0.0.1', server.port) + conn.connect() + conn.putrequest('POST', '/', skip_host=1, skip_accept_encoding=1) + conn.putheader('Accept', 'text/plain') + conn.putheader('Transfer-Encoding', 'chunked') + # Content-Length is actually invalid, but some libraries might still send it + conn.putheader('Content-Length', '372') + conn.putheader( + 'Content-Type', 'multipart/form-data; boundary=--------------------------898239224156930639461866') + conn.endheaders(open(testfile, 'rb').read()) + + res = conn.getresponse() + assert res.status == 200 + assert res.read() == b'YES' + + conn.close() From 7302f1ced7026f2866330609d4aba85a942c2c44 Mon Sep 17 00:00:00 2001 From: David Lord Date: Tue, 28 Nov 2017 09:07:32 -0800 Subject: [PATCH 11/11] fix 2.6 test, flake8 body in send instead of endheaders --- tests/test_serving.py | 21 ++++++++++++++++----- werkzeug/formparser.py | 1 - 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/test_serving.py b/tests/test_serving.py index 8e8b26c8d..fb46a0beb 100644 --- a/tests/test_serving.py +++ b/tests/test_serving.py @@ -323,8 +323,14 @@ def app(environ, start_response): conn.putrequest('POST', '/', skip_host=1, skip_accept_encoding=1) conn.putheader('Accept', 'text/plain') conn.putheader('Transfer-Encoding', 'chunked') - conn.putheader('Content-Type', 'multipart/form-data; boundary=--------------------------898239224156930639461866') - conn.endheaders(open(testfile, 'rb').read()) + conn.putheader( + 'Content-Type', + 'multipart/form-data; boundary=' + '--------------------------898239224156930639461866') + conn.endheaders() + + with open(testfile, 'rb') as f: + conn.send(f.read()) res = conn.getresponse() assert res.status == 200 @@ -359,11 +365,16 @@ def app(environ, start_response): conn.putrequest('POST', '/', skip_host=1, skip_accept_encoding=1) conn.putheader('Accept', 'text/plain') conn.putheader('Transfer-Encoding', 'chunked') - # Content-Length is actually invalid, but some libraries might still send it + # Content-Length is invalid for chunked, but some libraries might send it conn.putheader('Content-Length', '372') conn.putheader( - 'Content-Type', 'multipart/form-data; boundary=--------------------------898239224156930639461866') - conn.endheaders(open(testfile, 'rb').read()) + 'Content-Type', + 'multipart/form-data; boundary=' + '--------------------------898239224156930639461866') + conn.endheaders() + + with open(testfile, 'rb') as f: + conn.send(f.read()) res = conn.getresponse() assert res.status == 200 diff --git a/werkzeug/formparser.py b/werkzeug/formparser.py index 2af77c3f9..42d796911 100644 --- a/werkzeug/formparser.py +++ b/werkzeug/formparser.py @@ -11,7 +11,6 @@ """ import re import codecs -from io import BytesIO from tempfile import SpooledTemporaryFile from itertools import chain, repeat, tee from functools import update_wrapper