Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scheme parsing fix (https) and extending (ws, wss) #1570

Merged
merged 6 commits into from
Feb 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
315 changes: 182 additions & 133 deletions fw/http_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,31 +380,31 @@ do { \
__FSM_I_MATCH_MOVE_fixup_finish(alphabet, to, flag, {})

/* Conditional transition from state @st to @st_next. */
#define __FSM_TX_COND(st, condition, st_next, field) \
__FSM_STATE(st) { \
#define __FSM_TX_COND(st, condition, st_next, field, ...) \
__FSM_STATE(st, __VA_ARGS__) { \
if (likely(condition)) \
__FSM_MOVE_f(st_next, field); \
TFW_PARSER_BLOCK(st); \
}

#define __FSM_TX_COND_nofixup(st, condition, st_next) \
__FSM_STATE(st) { \
#define __FSM_TX_COND_nofixup(st, condition, st_next, ...) \
__FSM_STATE(st, __VA_ARGS__) { \
if (likely(condition)) \
__FSM_MOVE_nofixup(st_next); \
TFW_PARSER_BLOCK(st); \
}

/* Automaton transition from state @st to @st_next on character @ch. */
#define __FSM_TX(st, ch, st_next) \
__FSM_TX_COND(st, c == (ch), st_next, &parser->hdr)
#define __FSM_TX_nofixup(st, ch, st_next) \
__FSM_TX_COND_nofixup(st, c == (ch), st_next)
#define __FSM_TX(st, ch, st_next, ...) \
__FSM_TX_COND(st, c == (ch), st_next, &parser->hdr, __VA_ARGS__)
#define __FSM_TX_nofixup(st, ch, st_next, ...) \
__FSM_TX_COND_nofixup(st, c == (ch), st_next, __VA_ARGS__)

/* Case-insensitive version of __FSM_TX(). */
#define __FSM_TX_LC(st, ch, st_next, field) \
__FSM_TX_COND(st, TFW_LC(c) == (ch), st_next, field)
#define __FSM_TX_LC_nofixup(st, ch, st_next) \
__FSM_TX_COND_nofixup(st, TFW_LC(c) == (ch), st_next)
#define __FSM_TX_LC(st, ch, st_next, field, ...) \
__FSM_TX_COND(st, TFW_LC(c) == (ch), st_next, field, __VA_ARGS__)
#define __FSM_TX_LC_nofixup(st, ch, st_next, ...) \
__FSM_TX_COND_nofixup(st, TFW_LC(c) == (ch), st_next, __VA_ARGS__)

/*
* Automaton transition with alphabet checking and fallback state.
Expand Down Expand Up @@ -3837,119 +3837,7 @@ tfw_http_parse_req(void *req_data, unsigned char *data, size_t len,
__FSM_STATE(Req_Uri, hot) {
if (likely(c == '/'))
__FSM_JMP(Req_UriMark);

if (likely(__data_available(p, 7)
&& C4_INT_LCM(p, 'h', 't', 't', 'p')
&& *(p + 4) == ':' && *(p + 5) == '/'
&& *(p + 6) == '/'))
__FSM_MOVE_nofixup_n(Req_UriAuthorityStart, 7);

/* "http://" slow path - step char-by-char. */
if (likely(TFW_LC(c) == 'h'))
__FSM_MOVE_nofixup(Req_UriSchH);

TFW_PARSER_BLOCK(Req_Uri);
}

/*
* URI host part.
* RFC 3986 chapter 3.2: authority = [userinfo@]host[:port]
*
* Authority parsing: it can be "host" or "userinfo@host" (port is
* parsed later). At the beginning we don't know, which of variants we
* have. So we fill req->host, and if we get '@', we copy host to
* req->userinfo, reset req->host and fill it.
*/
__FSM_STATE(Req_UriAuthorityStart) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (likely(c == '/')) {
/*
* The case where "Host:" header value is empty.
* A special TfwStr{} string is created that has
* a valid pointer and the length of zero.
*/
T_DBG3("Handling http:///path\n");
tfw_http_msg_set_str_data(msg, &req->host, p);
req->host.flags |= TFW_STR_COMPLETE;
__FSM_JMP(Req_UriMark);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityStart);
}

__FSM_STATE(Req_UriAuthority) {
if (likely(isalnum(c) || c == '.' || c == '-' || c == '@')) {
if (unlikely(c == '@')) {
if (!TFW_STR_EMPTY(&req->userinfo)) {
T_DBG("Second '@' in authority\n");
TFW_PARSER_BLOCK(Req_UriAuthority);
}
T_DBG3("Authority contains userinfo\n");
/* copy current host to userinfo */
req->userinfo = req->host;
__msg_field_finish(&req->userinfo, p);
TFW_STR_INIT(&req->host);

__FSM_MOVE_nofixup(Req_UriAuthorityResetHost);
}

__FSM_MOVE_f(Req_UriAuthority, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityIPv6) {
if (likely(isxdigit(c) || c == ':')) {
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
} else if(c == ']') {
__FSM_MOVE_f(Req_UriAuthorityEnd, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityIPv6);
}

__FSM_STATE(Req_UriAuthorityResetHost) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityEnd) {
if (c == ':')
__FSM_MOVE_f(Req_UriPort, &req->host);
/* Authority End */
__msg_field_finish(&req->host, p);
T_DBG3("Userinfo len = %i, host len = %i\n",
(int)req->userinfo.len, (int)req->host.len);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriAuthorityEnd);
}

/* Host port in URI */
__FSM_STATE(Req_UriPort) {
if (likely(isdigit(c)))
__FSM_MOVE_f(Req_UriPort, &req->host);
__msg_field_finish(&req->host, p);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriPort);
__FSM_JMP(Req_UriRareForms);
}

__FSM_STATE(Req_UriMark, hot) {
Expand Down Expand Up @@ -4735,13 +4623,174 @@ Req_Method_1CharStep: __attribute__((cold))
__FSM_MOVE_nofixup_n(Req_MUSpace, 0);
}

/* process URI scheme: "http://" */
__FSM_TX_LC_nofixup(Req_UriSchH, 't', Req_UriSchHt);
__FSM_TX_LC_nofixup(Req_UriSchHt, 't', Req_UriSchHtt);
__FSM_TX_LC_nofixup(Req_UriSchHtt, 'p', Req_UriSchHttp);
__FSM_TX_nofixup(Req_UriSchHttp, ':', Req_UriSchHttpColon);
__FSM_TX_nofixup(Req_UriSchHttpColon, '/', Req_UriSchHttpColonSlash);
__FSM_TX_nofixup(Req_UriSchHttpColonSlash, '/', Req_UriAuthorityStart);
__FSM_STATE(Req_UriRareForms, cold) {
/* There is also authority form as in RFC7230#section-5.3.3,
* but it only used with CONNECT that is not supported */
/* Asterisk form as in RFC7230#section-5.3.4 */
if (req->method == TFW_HTTP_METH_OPTIONS && c == '*')
__FSM_MOVE_nofixup(Req_UriMarkEnd);
/* Absolute form as in RFC7230#section-5.3.2 */
__FSM_JMP(Req_UriAbsoluteForm);
}

__FSM_STATE(Req_UriAbsoluteForm, cold) {
/* Rare form so there is no need to speed-up matching with
* fast path prefixing */
if (likely(TFW_LC(c) == 'h'))
__FSM_MOVE_nofixup(Req_UriSchH);
else if (TFW_LC(c) == 'w')
__FSM_MOVE_nofixup(Req_UriSchW);

TFW_PARSER_BLOCK(Req_UriAbsoluteForm);
}

/* process URI scheme */
/* path for 'http://' and 'https://' */
__FSM_TX_LC_nofixup(Req_UriSchH, 't', Req_UriSchHt, cold);
__FSM_TX_LC_nofixup(Req_UriSchHt, 't', Req_UriSchHtt, cold);
__FSM_TX_LC_nofixup(Req_UriSchHtt, 'p', Req_UriSchHttp, cold);
__FSM_STATE(Req_UriSchHttp, cold) {
switch (TFW_LC(c)) {
case ':':
__FSM_MOVE_nofixup(Req_UriSchHttpColon);
case 's':
__FSM_MOVE_nofixup(Req_UriSchHttps);
}
TFW_PARSER_BLOCK(Req_UriSchHttp);
}
/* http */
__FSM_TX_nofixup(Req_UriSchHttpColon, '/', Req_UriSchHttpColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchHttpColonSlash, '/', Req_UriAuthorityStart,
cold);
/* https */
__FSM_TX_nofixup(Req_UriSchHttps, ':', Req_UriSchHttpsColon, cold);
__FSM_TX_nofixup(Req_UriSchHttpsColon, '/', Req_UriSchHttpsColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchHttpsColonSlash, '/', Req_UriAuthorityStart,
cold);
/* path for 'ws://' and 'wss://' */
__FSM_TX_LC_nofixup(Req_UriSchW, 's', Req_UriSchWs, cold);
__FSM_STATE(Req_UriSchWs, cold) {
switch (TFW_LC(c)) {
case ':':
__FSM_MOVE_nofixup(Req_UriSchWsColon);
case 's':
__FSM_MOVE_nofixup(Req_UriSchWss);
}
TFW_PARSER_BLOCK(Req_UriSchWs);
}
/* ws */
__FSM_TX_nofixup(Req_UriSchWsColon, '/', Req_UriSchWsColonSlash, cold);
__FSM_TX_nofixup(Req_UriSchWsColonSlash, '/', Req_UriAuthorityStart,
cold);
/* wss */
__FSM_TX_nofixup(Req_UriSchWss, ':', Req_UriSchWssColon, cold);
__FSM_TX_nofixup(Req_UriSchWssColon, '/', Req_UriSchWssColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchWssColonSlash, '/', Req_UriAuthorityStart,
cold);

/*
* URI host part.
* RFC 3986 chapter 3.2: authority = [userinfo@]host[:port]
*
* Authority parsing: it can be "host" or "userinfo@host" (port is
* parsed later). At the beginning we don't know, which of variants we
* have. So we fill req->host, and if we get '@', we copy host to
* req->userinfo, reset req->host and fill it.
*/
__FSM_STATE(Req_UriAuthorityStart, cold) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (likely(c == '/')) {
/*
* The case where "Host:" header value is empty.
* A special TfwStr{} string is created that has
* a valid pointer and the length of zero.
*/
T_DBG3("Handling http:///path\n");
tfw_http_msg_set_str_data(msg, &req->host, p);
req->host.flags |= TFW_STR_COMPLETE;
__FSM_JMP(Req_UriMark);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityStart);
}

__FSM_STATE(Req_UriAuthority, cold) {
if (likely(isalnum(c) || c == '.' || c == '-' || c == '@')) {
if (unlikely(c == '@')) {
if (!TFW_STR_EMPTY(&req->userinfo)) {
T_DBG("Second '@' in authority\n");
TFW_PARSER_BLOCK(Req_UriAuthority);
}
T_DBG3("Authority contains userinfo\n");
/* copy current host to userinfo */
req->userinfo = req->host;
__msg_field_finish(&req->userinfo, p);
TFW_STR_INIT(&req->host);

__FSM_MOVE_nofixup(Req_UriAuthorityResetHost);
}

__FSM_MOVE_f(Req_UriAuthority, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityIPv6, cold) {
if (likely(isxdigit(c) || c == ':')) {
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
} else if(c == ']') {
__FSM_MOVE_f(Req_UriAuthorityEnd, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityIPv6);
}

__FSM_STATE(Req_UriAuthorityResetHost, cold) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityEnd, cold) {
if (c == ':')
__FSM_MOVE_f(Req_UriPort, &req->host);
/* Authority End */
__msg_field_finish(&req->host, p);
T_DBG3("Userinfo len = %i, host len = %i\n",
(int)req->userinfo.len, (int)req->host.len);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriAuthorityEnd);
}

/* Host port in URI */
__FSM_STATE(Req_UriPort, cold) {
if (likely(isdigit(c)))
__FSM_MOVE_f(Req_UriPort, &req->host);
__msg_field_finish(&req->host, p);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriPort);
}

/* Parse HTTP version (1.1 and 1.0 are supported). */
__FSM_TX_nofixup(Req_HttpVerT1, 'T', Req_HttpVerT2);
Expand Down Expand Up @@ -5731,7 +5780,7 @@ do { \
__FSM_I_field_chunk_flags(fld, TFW_STR_HDR_VALUE); \
__FSM_EXIT(CSTR_POSTPONE); \
}

#define H2_TRY_STR_LAMBDA_fixup(str, fld, lambda, curr_st, next_st) \
H2_TRY_STR_2LAMBDA_fixup(str, fld, {}, lambda, curr_st, next_st)

Expand Down
19 changes: 19 additions & 0 deletions fw/t/unit/test_http_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,25 @@ TEST(http_parser, parses_req_uri)
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET https://userame@natsys-lab.com HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET ws://userame@natsys-lab.com HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET wss://userame@natsys-lab.com HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("OPTIONS * HTTP/1.1\r\n\r\n");

EXPECT_BLOCK_REQ("GET sch://userame@natsys-lab.com HTTP/1.1\r\n\r\n");

EXPECT_BLOCK_REQ("GET \x7f HTTP/1.1\r\n"
"Host: test\r\n"
"\r\n");
Expand Down