Skip to content

Commit

Permalink
- Unify HEADER_RE and METH_RE
Browse files Browse the repository at this point in the history
- Replace CRLF with SP during obs-fold processing (See RFC 9112 Section 5.2, last paragraph)
- Stop stripping header names.
- Remove HTAB in OWS in header values that use obs-fold (See RFC 9112 Section 5.2, last paragraph)
- Use fullmatch instead of search, which has problems with empty strings. (See GHSA-68xg-gqqm-vgj8)
- Split proxy protocol line on space only. (See proxy protocol Section 2.1, bullet 3)
- Use fullmatch for method and version (Thank you to Paul Dorn for noticing this.)
- Replace calls to str.strip() with str.strip(' \t')
- Split request line on SP only.

Co-authored-by: Paul Dorn <dorn@posteo.de>
  • Loading branch information
kenballus and Paul Dorn committed Dec 6, 2023
1 parent 976e4ae commit b9721a2
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 66 deletions.
36 changes: 16 additions & 20 deletions gunicorn/http/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
MAX_HEADERS = 32768
DEFAULT_MAX_HEADERFIELD_SIZE = 8190

HEADER_RE = re.compile(r"[^!#$%&'*+\-.\^_`|~0-9a-zA-Z]")
METH_RE = re.compile(r"[A-Z0-9$-_.]{3,20}")
TOKEN_RE = re.compile(r"[!#$%&'*+\-.\^_`|~0-9a-zA-Z]+")
VERSION_RE = re.compile(r"HTTP/(\d+)\.(\d+)")


Expand Down Expand Up @@ -63,8 +62,8 @@ def parse_headers(self, data):
cfg = self.cfg
headers = []

# Split lines on \r\n keeping the \r\n on each line
lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")]
# Split lines on \r\n
lines = [bytes_to_str(line) for line in data.split(b"\r\n")]

# handle scheme headers
scheme_header = False
Expand All @@ -80,30 +79,27 @@ def parse_headers(self, data):
if len(headers) >= self.limit_request_fields:
raise LimitRequestHeaders("limit request headers fields")

# Parse initial header name : value pair.
# Parse initial header name: value pair.
curr = lines.pop(0)
header_length = len(curr)
header_length = len(curr) + len("\r\n")
if curr.find(":") <= 0:
raise InvalidHeader(curr.strip())
raise InvalidHeader(curr)
name, value = curr.split(":", 1)
if self.cfg.strip_header_spaces:
name = name.rstrip(" \t").upper()
else:
name = name.upper()
if HEADER_RE.search(name):
if not TOKEN_RE.fullmatch(name):
raise InvalidHeaderName(name)

name, value = name.strip(), [value.lstrip()]
value = [value.lstrip(" \t")]

# Consume value continuation lines
while lines and lines[0].startswith((" ", "\t")):
curr = lines.pop(0)
header_length += len(curr)
if header_length > self.limit_request_field_size > 0:
raise LimitRequestHeaders("limit request headers "
"fields size")
value.append(curr)
value = ''.join(value).rstrip()
header_length += len(curr) + len("\r\n")
value.append(curr.strip("\t "))
value = " ".join(value)

if header_length > self.limit_request_field_size > 0:
raise LimitRequestHeaders("limit request headers fields size")
Expand Down Expand Up @@ -156,7 +152,7 @@ def set_body_reader(self):
def should_close(self):
for (h, v) in self.headers:
if h == "CONNECTION":
v = v.lower().strip()
v = v.lower().strip(" \t")
if v == "close":
return True
elif v == "keep-alive":
Expand Down Expand Up @@ -283,7 +279,7 @@ def proxy_protocol_access_check(self):
raise ForbiddenProxyRequest(self.peer_addr[0])

def parse_proxy_protocol(self, line):
bits = line.split()
bits = line.split(" ")

if len(bits) != 6:
raise InvalidProxyLine(line)
Expand Down Expand Up @@ -328,12 +324,12 @@ def parse_proxy_protocol(self, line):
}

def parse_request_line(self, line_bytes):
bits = [bytes_to_str(bit) for bit in line_bytes.split(None, 2)]
bits = [bytes_to_str(bit) for bit in line_bytes.split(b" ", 2)]
if len(bits) != 3:
raise InvalidRequestLine(bytes_to_str(line_bytes))

# Method
if not METH_RE.match(bits[0]):
if not TOKEN_RE.fullmatch(bits[0]):
raise InvalidRequestMethod(bits[0])
self.method = bits[0].upper()

Expand All @@ -349,7 +345,7 @@ def parse_request_line(self, line_bytes):
self.fragment = parts.fragment or ""

# Version
match = VERSION_RE.match(bits[2])
match = VERSION_RE.fullmatch(bits[2])
if match is None:
raise InvalidHTTPVersion(bits[2])
self.version = (int(match.group(1)), int(match.group(2)))
Expand Down
20 changes: 10 additions & 10 deletions gunicorn/http/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import re
import sys

from gunicorn.http.message import HEADER_RE
from gunicorn.http.message import TOKEN_RE
from gunicorn.http.errors import InvalidHeader, InvalidHeaderName
from gunicorn import SERVER_SOFTWARE, SERVER
from gunicorn import util
Expand All @@ -18,7 +18,7 @@
# with sending files in blocks over 2GB.
BLKSIZE = 0x3FFFFFFF

HEADER_VALUE_RE = re.compile(r'[^ \t\x21-\x7e\x80-\xff]')
HEADER_VALUE_RE = re.compile(r'[ \t\x21-\x7e\x80-\xff]*')

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -249,31 +249,31 @@ def process_headers(self, headers):
if not isinstance(name, str):
raise TypeError('%r is not a string' % name)

if HEADER_RE.search(name):
if not TOKEN_RE.fullmatch(name):
raise InvalidHeaderName('%r' % name)

if not isinstance(value, str):
raise TypeError('%r is not a string' % value)

if HEADER_VALUE_RE.search(value):
if not HEADER_VALUE_RE.fullmatch(value):
raise InvalidHeader('%r' % value)

value = value.strip()
lname = name.lower().strip()
value = value.strip(" \t")
lname = name.lower()
if lname == "content-length":
self.response_length = int(value)
elif util.is_hoppish(name):
if lname == "connection":
# handle websocket
if value.lower().strip() == "upgrade":
if value.lower() == "upgrade":
self.upgrade = True
elif lname == "upgrade":
if value.lower().strip() == "websocket":
self.headers.append((name.strip(), value))
if value.lower() == "websocket":
self.headers.append((name, value))

# ignore hopbyhop headers
continue
self.headers.append((name.strip(), value))
self.headers.append((name, value))

def is_chunked(self):
# Only use chunked responses when the client is
Expand Down
4 changes: 2 additions & 2 deletions tests/requests/invalid/003.http
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
-blargh /foo HTTP/1.1\r\n
\r\n
GET\n/\nHTTP/1.1\r\n
\r\n
4 changes: 2 additions & 2 deletions tests/requests/invalid/003.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from gunicorn.http.errors import InvalidRequestMethod
request = InvalidRequestMethod
from gunicorn.http.errors import InvalidRequestLine
request = InvalidRequestLine
64 changes: 32 additions & 32 deletions tests/requests/valid/016.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
certificate = """-----BEGIN CERTIFICATE-----\r\n
MIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n
ETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n
AkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n
dWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n
SzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n
BAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n
BQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n
W51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n
gW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n
0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n
u2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n
wgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n
1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n
BglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n
VR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n
loCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n
aWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n
9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n
IjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n
BgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n
cHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4Qg\r\n
EDBDAWLmh0dHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC\r\n
5jcmwwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n
Y3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n
XCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n
UO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n
hTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n
wTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n
Yhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n
RA==\r\n
-----END CERTIFICATE-----""".replace("\n\n", "\n")
certificate = """-----BEGIN CERTIFICATE-----
MIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx
ETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT
AkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu
dWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV
SzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV
BAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB
BQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF
W51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR
gW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL
0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP
u2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR
wgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG
1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs
BglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD
VR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj
loCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj
aWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG
9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE
IjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO
BgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1
cHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4Qg
EDBDAWLmh0dHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC
5jcmwwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv
Y3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3
XCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8
UO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk
hTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK
wTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu
Yhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3
RA==
-----END CERTIFICATE-----""".replace("\n", "")

request = {
"method": "GET",
Expand Down
2 changes: 2 additions & 0 deletions tests/requests/valid/031.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-blargh /foo HTTP/1.1\r\n
\r\n
7 changes: 7 additions & 0 deletions tests/requests/valid/031.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
request = {
"method": "-BLARGH",
"uri": uri("/foo"),
"version": (1, 1),
"headers": [],
"body": b""
}

0 comments on commit b9721a2

Please sign in to comment.