Skip to content

Commit

Permalink
Fix Content-Length and Transfer-Encoding problem
Browse files Browse the repository at this point in the history
  • Loading branch information
chenBright committed Jan 22, 2024
1 parent 09141d4 commit ada3f1e
Show file tree
Hide file tree
Showing 4 changed files with 290 additions and 24 deletions.
76 changes: 59 additions & 17 deletions src/brpc/details/http_message.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

namespace brpc {

DEFINE_bool(allow_chunked_length, false,
"Allow both Transfer-Encoding and Content-Length headers are present.");
DEFINE_bool(http_verbose, false,
"[DEBUG] Print EVERY http request/response");
DEFINE_int32(http_verbose_max_body_length, 512,
Expand Down Expand Up @@ -172,6 +174,28 @@ int HttpMessage::on_headers_complete(http_parser *parser) {
}
}

// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
// If a message is received with both a Transfer-Encoding and a
// Content-Length header field, the Transfer-Encoding overrides the
// Content-Length. Such a message might indicate an attempt to
// perform request smuggling (Section 9.5) or response splitting
// (Section 9.4) and ought to be handled as an error. A sender MUST
// remove the received Content-Length field prior to forwarding such
// a message.

// Reject message if both Transfer-Encoding and Content-Length headers are
// present or if allowed by http1 codec settings and 'Transfer-Encoding'
// is chunked - remove Content-Length and serve request.
if (parser->uses_transfer_encoding && parser->flags & F_CONTENTLENGTH) {
if (parser->flags & F_CHUNKED && FLAGS_allow_chunked_length) {
http_message->header().RemoveHeader("Content-Length");
} else {
LOG(ERROR) << "HTTP/1.1 protocol error: both Content-Length "
<< "and Transfer-Encoding are set.";
return -1;
}
}

// If server receives a response to a HEAD request, returns 1 and then
// the parser will interpret that as saying that this message has no body.
if (parser->type == HTTP_RESPONSE &&
Expand Down Expand Up @@ -401,6 +425,7 @@ HttpMessage::HttpMessage(bool read_body_progressively,
, _cur_value(NULL)
, _vbodylen(0) {
http_parser_init(&_parser, HTTP_BOTH);
_parser.allow_chunked_length = 1;
_parser.data = this;
}

Expand Down Expand Up @@ -548,7 +573,13 @@ void MakeRawHttpRequest(butil::IOBuf* request,
<< h->minor_version() << BRPC_CRLF;
// Never use "Content-Length" set by user.
h->RemoveHeader("Content-Length");
if (h->method() != HTTP_METHOD_GET) {
const std::string* transfer_encoding = h->GetHeader("Transfer-Encoding");
if (h->method() == HTTP_METHOD_GET) {
h->RemoveHeader("Transfer-Encoding");
} else if (!transfer_encoding) {
// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2
// A sender MUST NOT send a Content-Length header field in any message
// that contains a Transfer-Encoding header field.
os << "Content-Length: " << (content ? content->length() : 0)
<< BRPC_CRLF;
}
Expand Down Expand Up @@ -638,25 +669,36 @@ void MakeRawHttpResponse(butil::IOBuf* response,
// A server MUST NOT send a Content-Length header field in any response
// with a status code of 1xx (Informational) or 204 (No Content).
h->RemoveHeader("Content-Length");
} else if (content) {
const std::string* content_length = h->GetHeader("Content-Length");
if (is_head_req) {
// Prioritize "Content-Length" set by user.
// If "Content-Length" is not set, set it to the length of content.
if (!content_length) {
os << "Content-Length: " << content->length() << BRPC_CRLF;
}
} else {
if (content_length) {
h->RemoveHeader("Content-Length");
} else {
const std::string* transfer_encoding = h->GetHeader("Transfer-Encoding");
// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2
// A sender MUST NOT send a Content-Length header field in any message
// that contains a Transfer-Encoding header field.
if (transfer_encoding) {
h->RemoveHeader("Content-Length");
}
if (content) {
const std::string* content_length = h->GetHeader("Content-Length");
if (is_head_req) {
if (!content_length && !transfer_encoding) {
// Prioritize "Content-Length" set by user.
// If "Content-Length" is not set, set it to the length of content.
os << "Content-Length: " << content->length() << BRPC_CRLF;
}
} else {
if (!transfer_encoding) {
if (content_length) {
h->RemoveHeader("Content-Length");
}
// Never use "Content-Length" set by user.
// Always set Content-Length since lighttpd requires the header to be
// set to 0 for empty content.
os << "Content-Length: " << content->length() << BRPC_CRLF;
}
}
// Never use "Content-Length" set by user.
// Always set Content-Length since lighttpd requires the header to be
// set to 0 for empty content.
os << "Content-Length: " << content->length() << BRPC_CRLF;
}
}
if (!h->content_type().empty()) {
if (!is_invalid_content && !h->content_type().empty()) {
os << "Content-Type: " << h->content_type()
<< BRPC_CRLF;
}
Expand Down
111 changes: 109 additions & 2 deletions src/brpc/details/http_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,10 @@ enum header_states
, h_transfer_encoding
, h_upgrade

, h_matching_transfer_encoding_token_start
, h_matching_transfer_encoding_chunked
, h_matching_transfer_encoding_token

, h_matching_connection_keep_alive
, h_matching_connection_close

Expand Down Expand Up @@ -499,6 +502,12 @@ bool is_url_char(char c) { return IS_URL_CHAR(c); }

#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)

/**
* Verify that a char is a valid visible (printable) US-ASCII
* character or %x80-FF
**/
#define IS_HEADER_CHAR(ch) \
(ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))

#if BRPC_HTTP_PARSER_STRICT
# define STRICT_CHECK(cond) \
Expand Down Expand Up @@ -691,6 +700,8 @@ size_t http_parser_execute (http_parser *parser,
const char *url_mark = 0;
const char *body_mark = 0;
const char *status_mark = 0;
const unsigned int lenient = parser->lenient_http_headers;
const unsigned int allow_chunked_length = parser->allow_chunked_length;

/* We're in an error state. Don't bother doing anything. */
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
Expand Down Expand Up @@ -782,6 +793,7 @@ size_t http_parser_execute (http_parser *parser,
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;

if (ch == 'H') {
Expand Down Expand Up @@ -817,6 +829,7 @@ size_t http_parser_execute (http_parser *parser,
case s_start_res:
{
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;

switch (ch) {
Expand Down Expand Up @@ -1015,6 +1028,7 @@ size_t http_parser_execute (http_parser *parser,
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;

if (!IS_ALPHA(ch)) {
Expand Down Expand Up @@ -1470,6 +1484,7 @@ size_t http_parser_execute (http_parser *parser,
parser->header_state = h_general;
} else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
parser->header_state = h_transfer_encoding;
parser->uses_transfer_encoding = 1;
}
break;

Expand Down Expand Up @@ -1544,16 +1559,26 @@ size_t http_parser_execute (http_parser *parser,
if ('c' == c) {
parser->header_state = h_matching_transfer_encoding_chunked;
} else {
parser->header_state = h_general;
parser->header_state = h_matching_transfer_encoding_token;
}
break;

/* Multi-value `Transfer-Encoding` header */
case h_matching_transfer_encoding_token_start:
break;

case h_content_length:
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
goto error;
}

if (parser->flags & F_CONTENTLENGTH) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}

parser->flags |= F_CONTENTLENGTH;
parser->content_length = ch - '0';
break;

Expand Down Expand Up @@ -1591,6 +1616,11 @@ size_t http_parser_execute (http_parser *parser,
goto reexecute_byte;
}

if (!lenient && !IS_HEADER_CHAR(ch)) {
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
goto error;
}

c = LOWER(ch);

switch (parser->header_state) {
Expand Down Expand Up @@ -1627,17 +1657,47 @@ size_t http_parser_execute (http_parser *parser,
break;
}

/* Transfer-Encoding: chunked */
case h_matching_transfer_encoding_token_start:
/* looking for 'Transfer-Encoding: chunked' */
if ('c' == c) {
parser->header_state = h_matching_transfer_encoding_chunked;
} else if (TOKEN(c)) {
/* NOTE(gejun): Not use strict mode for these macros since the additional
* characeters seem to be OK.
*/

/* TODO(indutny): similar code below does this, but why?
* At the very least it seems to be inconsistent given that
* h_matching_transfer_encoding_token does not check for
* `STRICT_TOKEN`
*/
parser->header_state = h_matching_transfer_encoding_token;
} else if (c == ' ' || c == '\t') {
/* Skip lws */
} else {
parser->header_state = h_general;
}
break;

/* Transfer-Encoding: chunked */
case h_matching_transfer_encoding_chunked:
parser->index++;
if (parser->index > sizeof(CHUNKED)-1
|| c != CHUNKED[parser->index]) {
parser->header_state = h_general;
parser->header_state = h_matching_transfer_encoding_token;
} else if (parser->index == sizeof(CHUNKED)-2) {
parser->header_state = h_transfer_encoding_chunked;
}
break;

case h_matching_transfer_encoding_token:
if (ch == ',') {
parser->header_state = h_matching_transfer_encoding_token_start;
parser->index = 0;
}
break;

/* looking for 'Connection: keep-alive' */
case h_matching_connection_keep_alive:
parser->index++;
Expand All @@ -1660,6 +1720,9 @@ size_t http_parser_execute (http_parser *parser,
break;

case h_transfer_encoding_chunked:
if (ch != ' ') parser->header_state = h_matching_transfer_encoding_token;
break;

case h_connection_keep_alive:
case h_connection_close:
if (ch != ' ') parser->header_state = h_general;
Expand Down Expand Up @@ -1739,6 +1802,24 @@ size_t http_parser_execute (http_parser *parser,
break;
}

/* Cannot use transfer-encoding and a content-length header together
per the HTTP specification. (RFC 7230 Section 3.3.3) */
if ((parser->uses_transfer_encoding == 1) &&
(parser->flags & F_CONTENTLENGTH)) {
/* Allow it for lenient parsing as long as `Transfer-Encoding` is
* not `chunked` or allow_length_with_encoding is set
*/
if (parser->flags & F_CHUNKED) {
if (!allow_chunked_length) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}
} else if (!lenient) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}
}

parser->state = s_headers_done;

/* Here we call the headers_complete callback. This is somewhat
Expand Down Expand Up @@ -1791,6 +1872,26 @@ size_t http_parser_execute (http_parser *parser,
} else if (parser->flags & F_CHUNKED) {
/* chunked encoding - ignore Content-Length header */
parser->state = s_chunk_size_start;
} else if (parser->uses_transfer_encoding == 1) {
if (parser->type == HTTP_REQUEST && !lenient) {
/* RFC 7230 3.3.3 */
/* If a Transfer-Encoding header field
* is present in a request and the chunked transfer coding is not
* the final encoding, the message body length cannot be determined
* reliably; the server MUST respond with the 400 (Bad Request)
* status code and then close the connection.
*/
SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
return (p - data); /* Error */
} else {
/* RFC 7230 3.3.3 */
/* If a Transfer-Encoding header field is present in a response and
* the chunked transfer coding is not the final encoding, the
* message body length is determined by reading the connection until
* it is closed by the server.
*/
parser->state = s_body_identity_eof;
}
} else {
if (parser->content_length == 0) {
/* Content-Length header given but zero: Content-Length: 0\r\n */
Expand Down Expand Up @@ -2037,6 +2138,12 @@ http_message_needs_eof (const http_parser *parser)
return 0;
}

/* RFC 7230 3.3.3, see `s_headers_almost_done` */
if ((parser->uses_transfer_encoding == 1) &&
(parser->flags & F_CHUNKED) == 0) {
return 1;
}

if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
return 0;
}
Expand Down
20 changes: 15 additions & 5 deletions src/brpc/details/http_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ enum http_parser_flags
, F_TRAILING = 1 << 3
, F_UPGRADE = 1 << 4
, F_SKIPBODY = 1 << 5
, F_CONTENTLENGTH = 1 << 7
};


Expand Down Expand Up @@ -178,13 +179,17 @@ enum http_parser_flags
XX(INVALID_HEADER_TOKEN, "invalid character in header") \
XX(INVALID_CONTENT_LENGTH, \
"invalid character in content-length header") \
XX(UNEXPECTED_CONTENT_LENGTH, \
"unexpected content-length header") \
XX(INVALID_CHUNK_SIZE, \
"invalid character in chunk size header") \
XX(INVALID_CONSTANT, "invalid constant string") \
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
XX(STRICT, "strict mode assertion failed") \
XX(PAUSED, "parser is paused") \
XX(UNKNOWN, "an unknown error occurred")
XX(UNKNOWN, "an unknown error occurred") \
XX(INVALID_TRANSFER_ENCODING, \
"request has invalid transfer-encoding")


/* Define HPE_* values for each errno value above */
Expand All @@ -202,10 +207,15 @@ enum http_errno {
struct http_parser {
/** PRIVATE **/
unsigned int type : 2; /* enum http_parser_type */
unsigned int flags : 6; /* F_* values from 'flags' enum; semi-public */
unsigned int state : 8; /* enum state from http_parser.c */
unsigned int header_state : 8; /* enum header_state from http_parser.c */
unsigned int index : 8; /* index into current matcher */
unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
unsigned int state : 7; /* enum state from http_parser.c */
unsigned int header_state : 7; /* enum header_state from http_parser.c */
unsigned int index : 5; /* index into current matcher */
unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */
unsigned int allow_chunked_length : 1; /* Allow headers with both
* `Content-Length` and
* `Transfer-Encoding: chunked` set */
unsigned int lenient_http_headers : 1;

uint32_t nread; /* # bytes read in various scenarios */
uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one)
Expand Down
Loading

0 comments on commit ada3f1e

Please sign in to comment.