diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 11ee4d15c..dded948c9 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -21,8 +21,7 @@ MAX_HEADERS = 32768 DEFAULT_MAX_HEADERFIELD_SIZE = 8190 -HEADER_RE = re.compile(r"[^!#$%&'*+\-.\^_`|~0-9a-zA-Z]") -METH_RE = re.compile(r"[A-Z0-9$-_.]{3,20}") +TOKEN_RE = re.compile(r"[!#$%&'*+\-.\^_`|~0-9a-zA-Z]+") VERSION_RE = re.compile(r"HTTP/(\d+)\.(\d+)") @@ -63,8 +62,8 @@ def parse_headers(self, data): cfg = self.cfg headers = [] - # Split lines on \r\n keeping the \r\n on each line - lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")] + # Split lines on \r\n + lines = [bytes_to_str(line) for line in data.split(b"\r\n")] # handle scheme headers scheme_header = False @@ -80,30 +79,27 @@ def parse_headers(self, data): if len(headers) >= self.limit_request_fields: raise LimitRequestHeaders("limit request headers fields") - # Parse initial header name : value pair. + # Parse initial header name: value pair. curr = lines.pop(0) - header_length = len(curr) + header_length = len(curr) + len("\r\n") if curr.find(":") <= 0: - raise InvalidHeader(curr.strip()) + raise InvalidHeader(curr) name, value = curr.split(":", 1) if self.cfg.strip_header_spaces: name = name.rstrip(" \t").upper() else: name = name.upper() - if HEADER_RE.search(name): + if not TOKEN_RE.fullmatch(name): raise InvalidHeaderName(name) - name, value = name.strip(), [value.lstrip()] + value = [value.lstrip(" \t")] # Consume value continuation lines while lines and lines[0].startswith((" ", "\t")): curr = lines.pop(0) - header_length += len(curr) - if header_length > self.limit_request_field_size > 0: - raise LimitRequestHeaders("limit request headers " - "fields size") - value.append(curr) - value = ''.join(value).rstrip() + header_length += len(curr) + len("\r\n") + value.append(curr.strip("\t ")) + value = " ".join(value) if header_length > self.limit_request_field_size > 0: raise LimitRequestHeaders("limit request headers fields size") @@ -156,7 +152,7 @@ def set_body_reader(self): def should_close(self): for (h, v) in self.headers: if h == "CONNECTION": - v = v.lower().strip() + v = v.lower().strip(" \t") if v == "close": return True elif v == "keep-alive": @@ -283,7 +279,7 @@ def proxy_protocol_access_check(self): raise ForbiddenProxyRequest(self.peer_addr[0]) def parse_proxy_protocol(self, line): - bits = line.split() + bits = line.split(" ") if len(bits) != 6: raise InvalidProxyLine(line) @@ -328,12 +324,12 @@ def parse_proxy_protocol(self, line): } def parse_request_line(self, line_bytes): - bits = [bytes_to_str(bit) for bit in line_bytes.split(None, 2)] + bits = [bytes_to_str(bit) for bit in line_bytes.split(b" ", 2)] if len(bits) != 3: raise InvalidRequestLine(bytes_to_str(line_bytes)) # Method - if not METH_RE.match(bits[0]): + if not TOKEN_RE.fullmatch(bits[0]): raise InvalidRequestMethod(bits[0]) self.method = bits[0].upper() @@ -349,7 +345,7 @@ def parse_request_line(self, line_bytes): self.fragment = parts.fragment or "" # Version - match = VERSION_RE.match(bits[2]) + match = VERSION_RE.fullmatch(bits[2]) if match is None: raise InvalidHTTPVersion(bits[2]) self.version = (int(match.group(1)), int(match.group(2))) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 10c5a3dd5..7a996de70 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -9,7 +9,7 @@ import re import sys -from gunicorn.http.message import HEADER_RE +from gunicorn.http.message import TOKEN_RE from gunicorn.http.errors import InvalidHeader, InvalidHeaderName from gunicorn import SERVER_SOFTWARE, SERVER from gunicorn import util @@ -18,7 +18,7 @@ # with sending files in blocks over 2GB. BLKSIZE = 0x3FFFFFFF -HEADER_VALUE_RE = re.compile(r'[^ \t\x21-\x7e\x80-\xff]') +HEADER_VALUE_RE = re.compile(r'[ \t\x21-\x7e\x80-\xff]*') log = logging.getLogger(__name__) @@ -249,31 +249,31 @@ def process_headers(self, headers): if not isinstance(name, str): raise TypeError('%r is not a string' % name) - if HEADER_RE.search(name): + if not TOKEN_RE.fullmatch(name): raise InvalidHeaderName('%r' % name) if not isinstance(value, str): raise TypeError('%r is not a string' % value) - if HEADER_VALUE_RE.search(value): + if not HEADER_VALUE_RE.fullmatch(value): raise InvalidHeader('%r' % value) - value = value.strip() - lname = name.lower().strip() + value = value.strip(" \t") + lname = name.lower() if lname == "content-length": self.response_length = int(value) elif util.is_hoppish(name): if lname == "connection": # handle websocket - if value.lower().strip() == "upgrade": + if value.lower() == "upgrade": self.upgrade = True elif lname == "upgrade": - if value.lower().strip() == "websocket": - self.headers.append((name.strip(), value)) + if value.lower() == "websocket": + self.headers.append((name, value)) # ignore hopbyhop headers continue - self.headers.append((name.strip(), value)) + self.headers.append((name, value)) def is_chunked(self): # Only use chunked responses when the client is diff --git a/tests/requests/invalid/003.http b/tests/requests/invalid/003.http index cd1ab7fcb..5a9eaafcd 100644 --- a/tests/requests/invalid/003.http +++ b/tests/requests/invalid/003.http @@ -1,2 +1,2 @@ --blargh /foo HTTP/1.1\r\n -\r\n \ No newline at end of file +GET\n/\nHTTP/1.1\r\n +\r\n diff --git a/tests/requests/invalid/003.py b/tests/requests/invalid/003.py index 86a0774e5..5a4ca8961 100644 --- a/tests/requests/invalid/003.py +++ b/tests/requests/invalid/003.py @@ -1,2 +1,2 @@ -from gunicorn.http.errors import InvalidRequestMethod -request = InvalidRequestMethod \ No newline at end of file +from gunicorn.http.errors import InvalidRequestLine +request = InvalidRequestLine diff --git a/tests/requests/valid/016.py b/tests/requests/valid/016.py index 139b27009..4e5144f8c 100644 --- a/tests/requests/valid/016.py +++ b/tests/requests/valid/016.py @@ -1,35 +1,35 @@ -certificate = """-----BEGIN CERTIFICATE-----\r\n - MIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n - ETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n - AkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n - dWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n - SzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n - BAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n - BQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n - W51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n - gW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n - 0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n - u2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n - wgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n - 1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n - BglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n - VR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n - loCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n - aWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n - 9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n - IjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n - BgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n - cHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4Qg\r\n - EDBDAWLmh0dHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC\r\n - 5jcmwwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n - Y3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n - XCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n - UO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n - hTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n - wTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n - Yhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n - RA==\r\n - -----END CERTIFICATE-----""".replace("\n\n", "\n") +certificate = """-----BEGIN CERTIFICATE----- + MIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx + ETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT + AkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu + dWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV + SzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV + BAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB + BQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF + W51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR + gW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL + 0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP + u2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR + wgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG + 1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs + BglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD + VR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj + loCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj + aWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG + 9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE + IjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO + BgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1 + cHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4Qg + EDBDAWLmh0dHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC + 5jcmwwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv + Y3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3 + XCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8 + UO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk + hTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK + wTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu + Yhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3 + RA== + -----END CERTIFICATE-----""".replace("\n", "") request = { "method": "GET", diff --git a/tests/requests/valid/031.http b/tests/requests/valid/031.http new file mode 100644 index 000000000..cd1ab7fcb --- /dev/null +++ b/tests/requests/valid/031.http @@ -0,0 +1,2 @@ +-blargh /foo HTTP/1.1\r\n +\r\n \ No newline at end of file diff --git a/tests/requests/valid/031.py b/tests/requests/valid/031.py new file mode 100644 index 000000000..9691a002b --- /dev/null +++ b/tests/requests/valid/031.py @@ -0,0 +1,7 @@ +request = { + "method": "-BLARGH", + "uri": uri("/foo"), + "version": (1, 1), + "headers": [], + "body": b"" +}