From ef76b1e05a5c72b2fee35ae8b2c430b43e31f228 Mon Sep 17 00:00:00 2001 From: Aapo Talvensaari Date: Thu, 2 Dec 2021 12:57:13 +0200 Subject: [PATCH 01/11] fix(tools.uri) normalization function excessive percent decoding This is alternative to PR #8139 where we actually fix the normalization function to not do excessive percent-decoding on normalization. --- kong/runloop/handler.lua | 5 ++-- kong/tools/uri.lua | 31 ++++++++++++++---------- spec/01-unit/08-router_spec.lua | 38 ++++++++++++++++++++++++++++++ spec/01-unit/18-tools_uri_spec.lua | 14 +++++++++-- 4 files changed, 72 insertions(+), 16 deletions(-) diff --git a/kong/runloop/handler.lua b/kong/runloop/handler.lua index d52cb523bb8..e33ebebedad 100644 --- a/kong/runloop/handler.lua +++ b/kong/runloop/handler.lua @@ -39,7 +39,8 @@ local subsystem = ngx.config.subsystem local clear_header = ngx.req.clear_header local http_version = ngx.req.http_version local unpack = unpack -local escape = require("kong.tools.uri").escape + + local is_http_module = subsystem == "http" @@ -1379,7 +1380,7 @@ return { -- router, which might have truncated it (`strip_uri`). -- `host` is the original header to be preserved if set. var.upstream_scheme = match_t.upstream_scheme -- COMPAT: pdk - var.upstream_uri = escape(match_t.upstream_uri) + var.upstream_uri = match_t.upstream_uri if match_t.upstream_host then var.upstream_host = match_t.upstream_host end diff --git a/kong/tools/uri.lua b/kong/tools/uri.lua index 2acdc0a351c..4024787b556 100644 --- a/kong/tools/uri.lua +++ b/kong/tools/uri.lua @@ -1,6 +1,3 @@ -local _M = {} - - local string_char = string.char local string_upper = string.upper local string_find = string.find @@ -35,6 +32,7 @@ local RESERVED_CHARACTERS = { [0x5D] = true, -- ] } + local ESCAPE_PATTERN = "[^!#$&'()*+,/:;=?@[\\]A-Z\\d-_.~%]" local TMP_OUTPUT = require("table.new")(16, 0) @@ -52,12 +50,21 @@ local function percent_decode(m) end -local function escape(m) +local function percent_escape(m) return string_format("%%%02X", string_byte(m[0])) end -function _M.normalize(uri, merge_slashes) +local function escape(uri) + if ngx_re_find(uri, ESCAPE_PATTERN, "joi") then + return ngx_re_gsub(uri, ESCAPE_PATTERN, percent_escape, "joi") + end + + return uri +end + + +local function unescape(uri, merge_slashes) -- check for simple cases and early exit if uri == "" or uri == "/" then return uri @@ -148,13 +155,13 @@ function _M.normalize(uri, merge_slashes) end -function _M.escape(uri) - if ngx_re_find(uri, ESCAPE_PATTERN, "joi") then - return ngx_re_gsub(uri, ESCAPE_PATTERN, escape, "joi") - end - - return uri +local function normalize(uri, merge_slashes) + return escape(unescape(uri, merge_slashes)) end -return _M +return { + escape = escape, + unescape = unescape, + normalize = normalize, +} diff --git a/spec/01-unit/08-router_spec.lua b/spec/01-unit/08-router_spec.lua index 56adb4cf100..26b788f1ef6 100644 --- a/spec/01-unit/08-router_spec.lua +++ b/spec/01-unit/08-router_spec.lua @@ -2789,6 +2789,44 @@ for _, flavor in ipairs({ "traditional", "traditional_compatible" }) do assert.equal(2, #match_t.matches.uri_captures) end) + it("returns uri_captures normalized, fix #7913", function() + local use_case = { + { + service = service, + route = { + paths = { [[/users/(?P[a-zA-Z\s\d%]+)/profile/?(?P[a-z]*)]] }, + }, + }, + } + + local router = assert(Router.new(use_case)) + local _ngx = mock_ngx("GET", "/users/%6aohn%20doe/profile", { host = "domain.org" }) + + router._set_ngx(_ngx) + local match_t = router.exec() + assert.equal("john%20doe", match_t.matches.uri_captures[1]) + assert.equal("john%20doe", match_t.matches.uri_captures.fullname) + assert.equal("", match_t.matches.uri_captures[2]) + assert.equal("", match_t.matches.uri_captures.scope) + -- returns the full match as well + assert.equal("/users/john%20doe/profile", match_t.matches.uri_captures[0]) + -- no stripped_uri capture + assert.is_nil(match_t.matches.uri_captures.stripped_uri) + assert.equal(2, #match_t.matches.uri_captures) + + -- again, this time from the LRU cache + local match_t = router.exec() + assert.equal("john%20doe", match_t.matches.uri_captures[1]) + assert.equal("john%20doe", match_t.matches.uri_captures.fullname) + assert.equal("", match_t.matches.uri_captures[2]) + assert.equal("", match_t.matches.uri_captures.scope) + -- returns the full match as well + assert.equal("/users/john%20doe/profile", match_t.matches.uri_captures[0]) + -- no stripped_uri capture + assert.is_nil(match_t.matches.uri_captures.stripped_uri) + assert.equal(2, #match_t.matches.uri_captures) + end) + it("returns no uri_captures from a [uri prefix] match", function() local use_case = { { diff --git a/spec/01-unit/18-tools_uri_spec.lua b/spec/01-unit/18-tools_uri_spec.lua index b9d9dc910aa..31fa4359963 100644 --- a/spec/01-unit/18-tools_uri_spec.lua +++ b/spec/01-unit/18-tools_uri_spec.lua @@ -47,8 +47,12 @@ describe("kong.tools.uri", function() assert.equal("/a/b/", uri.normalize("/a//b//", true)) end) - it("decodes non-ASCII characters that are unreserved, issue #2366", function() - assert.equal("/endeløst", uri.normalize("/endel%C3%B8st")) + it("does not decode non-ASCII characters that are unreserved, issue #2366", function() + assert.equal("/endel%C3%B8st", uri.normalize("/endel%C3%B8st")) + end) + + it("does normalize complex uri that has characters outside of normal uri charset", function() + assert.equal("/%C3%A4/a./a/_%99%AF%2F%2F" , uri.normalize("/ä/a/%2e./a%2E//a/%2e/./a/../a/%2e%2E/%5f%99%af%2f%2F", true)) end) end) @@ -66,4 +70,10 @@ describe("kong.tools.uri", function() assert.equal("/endel%C3%B8st", uri.escape("/endeløst")) end) end) + + describe("unescape()", function() + it("decodes non-ASCII characters that are unreserved, issue #2366", function() + assert.equal("/endeløst", uri.unescape("/endel%C3%B8st")) + end) + end) end) From 3744780ed819098cc3d5ecf658f21f3d2a4c513d Mon Sep 17 00:00:00 2001 From: Datong Sun Date: Sun, 23 Jan 2022 23:39:21 -0800 Subject: [PATCH 02/11] do not decode non-unreserved characters --- kong/tools/uri.lua | 54 +++++++++++++----------------- spec/01-unit/18-tools_uri_spec.lua | 8 +---- 2 files changed, 24 insertions(+), 38 deletions(-) diff --git a/kong/tools/uri.lua b/kong/tools/uri.lua index 4024787b556..63eea8a311a 100644 --- a/kong/tools/uri.lua +++ b/kong/tools/uri.lua @@ -10,27 +10,16 @@ local ngx_re_find = ngx.re.find local ngx_re_gsub = ngx.re.gsub -local RESERVED_CHARACTERS = { - [0x21] = true, -- ! - [0x23] = true, -- # - [0x24] = true, -- $ - [0x25] = true, -- % - [0x26] = true, -- & - [0x27] = true, -- ' - [0x28] = true, -- ( - [0x29] = true, -- ) - [0x2A] = true, -- * - [0x2B] = true, -- + - [0x2C] = true, -- , - [0x2F] = true, -- / - [0x3A] = true, -- : - [0x3B] = true, -- ; - [0x3D] = true, -- = - [0x3F] = true, -- ? - [0x40] = true, -- @ - [0x5B] = true, -- [ - [0x5D] = true, -- ] -} +local HYPHEN_BYTE = string_byte('-') +local DOT_BYTE = string_byte('.') +local UNDERSCORE_BYTE = string_byte('_') +local TILDE_BYTE = string_byte('~') +local CAP_A_BYTE = string_byte('A') +local CAP_Z_BYTE = string_byte('Z') +local A_BYTE = string_byte('a') +local Z_BYTE = string_byte('z') +local ZERO_BYTE = string_byte('0') +local NINE_BYTE = string_byte('9') local ESCAPE_PATTERN = "[^!#$&'()*+,/:;=?@[\\]A-Z\\d-_.~%]" @@ -42,11 +31,20 @@ local SLASH = string_byte("/") local function percent_decode(m) local hex = m[1] local num = tonumber(hex, 16) - if RESERVED_CHARACTERS[num] then - return string_upper(m[0]) + -- from rfc3986: ...should be normalized by decoding any + -- percent-encoded octet that corresponds to an unreserved character + -- + -- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + if (num >= A_BYTE and num <= Z_BYTE) -- a..z + or (num >= CAP_A_BYTE and num <= CAP_Z_BYTE) -- A..Z + or (num >= ZERO_BYTE and num <= NINE_BYTE) -- 0..9 + or num == HYPHEN_BYTE or num == DOT_BYTE + or num == UNDERSCORE_BYTE or num == TILDE_BYTE + then + return string_char(num) end - return string_char(num) + return string_upper(m[0]) end @@ -64,7 +62,7 @@ local function escape(uri) end -local function unescape(uri, merge_slashes) +local function normalize(uri, merge_slashes) -- check for simple cases and early exit if uri == "" or uri == "/" then return uri @@ -155,13 +153,7 @@ local function unescape(uri, merge_slashes) end -local function normalize(uri, merge_slashes) - return escape(unescape(uri, merge_slashes)) -end - - return { escape = escape, - unescape = unescape, normalize = normalize, } diff --git a/spec/01-unit/18-tools_uri_spec.lua b/spec/01-unit/18-tools_uri_spec.lua index 31fa4359963..17757ded749 100644 --- a/spec/01-unit/18-tools_uri_spec.lua +++ b/spec/01-unit/18-tools_uri_spec.lua @@ -52,7 +52,7 @@ describe("kong.tools.uri", function() end) it("does normalize complex uri that has characters outside of normal uri charset", function() - assert.equal("/%C3%A4/a./a/_%99%AF%2F%2F" , uri.normalize("/ä/a/%2e./a%2E//a/%2e/./a/../a/%2e%2E/%5f%99%af%2f%2F", true)) + assert.equal("/%C3%A4/a./a/_%99%AF%2F%2F" , uri.normalize("/%C3%A4/a/%2e./a%2E//a/%2e/./a/../a/%2e%2E/%5f%99%af%2f%2F", true)) end) end) @@ -70,10 +70,4 @@ describe("kong.tools.uri", function() assert.equal("/endel%C3%B8st", uri.escape("/endeløst")) end) end) - - describe("unescape()", function() - it("decodes non-ASCII characters that are unreserved, issue #2366", function() - assert.equal("/endeløst", uri.unescape("/endel%C3%B8st")) - end) - end) end) From 547b3f342dccef4c9add49f140529d85774bddcb Mon Sep 17 00:00:00 2001 From: Datong Sun Date: Mon, 24 Jan 2022 00:03:15 -0800 Subject: [PATCH 03/11] add tests for %20 --- spec/01-unit/18-tools_uri_spec.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/01-unit/18-tools_uri_spec.lua b/spec/01-unit/18-tools_uri_spec.lua index 17757ded749..2b241e7a765 100644 --- a/spec/01-unit/18-tools_uri_spec.lua +++ b/spec/01-unit/18-tools_uri_spec.lua @@ -12,9 +12,9 @@ describe("kong.tools.uri", function() assert.equal("/a/b/c/", uri.normalize("/a/b/c/")) end) - it("no normalization necessary (reserved characters)", function() + it("no normalization necessary (not unreserved characters)", function() assert.equal("/a%2Fb%2Fc/", uri.normalize("/a%2Fb%2Fc/")) - assert.equal("/%21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D", uri.normalize("/%21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D")) + assert.equal("/%20%21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D", uri.normalize("/%20%21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D")) end) it("converting percent-encoded triplets to uppercase", function() From 358fd27b47637fea95e5109a0e3a6569d88d7ece Mon Sep 17 00:00:00 2001 From: suika Date: Thu, 23 Jun 2022 14:37:26 +0800 Subject: [PATCH 04/11] fix(utils) escape should return only --- kong/tools/uri.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/tools/uri.lua b/kong/tools/uri.lua index 63eea8a311a..5d947c72d90 100644 --- a/kong/tools/uri.lua +++ b/kong/tools/uri.lua @@ -55,7 +55,7 @@ end local function escape(uri) if ngx_re_find(uri, ESCAPE_PATTERN, "joi") then - return ngx_re_gsub(uri, ESCAPE_PATTERN, percent_escape, "joi") + return (ngx_re_gsub(uri, ESCAPE_PATTERN, percent_escape, "joi")) end return uri From f88a7d218adc782e99c8c904b41f13d1e8cc70b6 Mon Sep 17 00:00:00 2001 From: suika Date: Mon, 27 Jun 2022 15:59:38 +0800 Subject: [PATCH 05/11] fix(core) handling of others for normalization We decided to decode "others" like the unpreserved ones. Therefore we have better interface for regex. --- kong/runloop/handler.lua | 3 +- kong/tools/uri.lua | 85 +++++++++++++++++++++++------- spec/01-unit/08-router_spec.lua | 19 +++---- spec/01-unit/18-tools_uri_spec.lua | 6 +-- 4 files changed, 82 insertions(+), 31 deletions(-) diff --git a/kong/runloop/handler.lua b/kong/runloop/handler.lua index e33ebebedad..e2aba723ef5 100644 --- a/kong/runloop/handler.lua +++ b/kong/runloop/handler.lua @@ -41,6 +41,7 @@ local http_version = ngx.req.http_version local unpack = unpack +local escape = require("kong.tools.uri").escape local is_http_module = subsystem == "http" @@ -1380,7 +1381,7 @@ return { -- router, which might have truncated it (`strip_uri`). -- `host` is the original header to be preserved if set. var.upstream_scheme = match_t.upstream_scheme -- COMPAT: pdk - var.upstream_uri = match_t.upstream_uri + var.upstream_uri = escape(match_t.upstream_uri) if match_t.upstream_host then var.upstream_host = match_t.upstream_host end diff --git a/kong/tools/uri.lua b/kong/tools/uri.lua index 5d947c72d90..e9148d4fbc8 100644 --- a/kong/tools/uri.lua +++ b/kong/tools/uri.lua @@ -9,6 +9,23 @@ local table_concat = table.concat local ngx_re_find = ngx.re.find local ngx_re_gsub = ngx.re.gsub +local table_new = table.new + + +-- Charset: +-- reserved = "!" / "*" / "'" / "(" / ")" / ";" / ":" / "@" / "&" / "=" / "+" / "$" / "," / "/" / "?" / "%" / "#" / "[" / "]" +-- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +-- other: * (meaning any char that is not mentioned above) + +-- Reserved characters have special meaning in URI. Encoding/decoding it affects the semantics of the URI; +-- Unreserved characters are safe to use as part of HTTP message without encoding; +-- Other characters has not special meaning but may be not safe to use as part of HTTP message without encoding; + +-- We should not unescape or escape reserved characters; +-- We should unescape but not escape unreserved characters; +-- We choose to unescape when processing and escape when forwarding for other characters + +local RESERVED_CHARS = "!*'();:@&=+$,/?%#[]" local HYPHEN_BYTE = string_byte('-') local DOT_BYTE = string_byte('.') @@ -21,6 +38,38 @@ local Z_BYTE = string_byte('z') local ZERO_BYTE = string_byte('0') local NINE_BYTE = string_byte('9') +local CHAR_RESERVED = true +local CHAR_UNRESERVED = false +local CHAR_OTHERS = nil -- luacheck: ignore + +local char_urlencode_type = table_new(256, 0) do + -- reserved + for i = 1, #RESERVED_CHARS do + char_urlencode_type[string_byte(RESERVED_CHARS, i)] = CHAR_RESERVED + end + + -- unreserved + for num = A_BYTE, Z_BYTE do + char_urlencode_type[num] = CHAR_UNRESERVED + end + + for num = CAP_A_BYTE, CAP_Z_BYTE do + char_urlencode_type[num] = CHAR_UNRESERVED + end + + for num = ZERO_BYTE, NINE_BYTE do + char_urlencode_type[num] = CHAR_UNRESERVED + end + + for _, num in ipairs{ + HYPHEN_BYTE, DOT_BYTE, UNDERSCORE_BYTE, TILDE_BYTE, + } do + char_urlencode_type[num] = CHAR_UNRESERVED + end + + -- others, default to CHAR_OTHERS +end + local ESCAPE_PATTERN = "[^!#$&'()*+,/:;=?@[\\]A-Z\\d-_.~%]" @@ -28,23 +77,20 @@ local TMP_OUTPUT = require("table.new")(16, 0) local DOT = string_byte(".") local SLASH = string_byte("/") -local function percent_decode(m) - local hex = m[1] - local num = tonumber(hex, 16) - -- from rfc3986: ...should be normalized by decoding any - -- percent-encoded octet that corresponds to an unreserved character - -- - -- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - if (num >= A_BYTE and num <= Z_BYTE) -- a..z - or (num >= CAP_A_BYTE and num <= CAP_Z_BYTE) -- A..Z - or (num >= ZERO_BYTE and num <= NINE_BYTE) -- 0..9 - or num == HYPHEN_BYTE or num == DOT_BYTE - or num == UNDERSCORE_BYTE or num == TILDE_BYTE - then - return string_char(num) - end +-- local function is_unreserved(num) return char_urlencode_type[num] == CHAR_UNRESERVED end +-- local function is_not_reserved(num) return not char_urlencode_type[num] end + +local function normalize_decode(m) + local hex = m[1] + local num = tonumber(hex, 16) - return string_upper(m[0]) + -- from rfc3986 we should decode unreserved character + -- and we choose to decode "others" + if not char_urlencode_type[num] then -- is not reserved(false or nil) + return string_char(num) + end + + return string_upper(m[0]) end @@ -52,7 +98,9 @@ local function percent_escape(m) return string_format("%%%02X", string_byte(m[0])) end - +-- This function does slightly different things from its name. +-- It ensures the output to be safe to a part of HTTP message (headers or path) +-- and preserve origin semantics local function escape(uri) if ngx_re_find(uri, ESCAPE_PATTERN, "joi") then return (ngx_re_gsub(uri, ESCAPE_PATTERN, percent_escape, "joi")) @@ -72,11 +120,12 @@ local function normalize(uri, merge_slashes) -- (this can in some cases lead to unnecessary percent-decoding) if string_find(uri, "%", 1, true) then -- decoding percent-encoded triplets of unreserved characters - uri = ngx_re_gsub(uri, "%([\\dA-F]{2})", percent_decode, "joi") + uri = ngx_re_gsub(uri, "%([\\dA-F]{2})", normalize_decode, "joi") end -- check if the uri contains a dot -- (this can in some cases lead to unnecessary dot removal processing) + -- notice: it's expected that /%2e./ is considered the same of /../ if string_find(uri, ".", 1, true) == nil then if not merge_slashes then return uri diff --git a/spec/01-unit/08-router_spec.lua b/spec/01-unit/08-router_spec.lua index 26b788f1ef6..80796371e76 100644 --- a/spec/01-unit/08-router_spec.lua +++ b/spec/01-unit/08-router_spec.lua @@ -2798,30 +2798,31 @@ for _, flavor in ipairs({ "traditional", "traditional_compatible" }) do }, }, } - + + local router = assert(Router.new(use_case)) local _ngx = mock_ngx("GET", "/users/%6aohn%20doe/profile", { host = "domain.org" }) - + router._set_ngx(_ngx) local match_t = router.exec() - assert.equal("john%20doe", match_t.matches.uri_captures[1]) - assert.equal("john%20doe", match_t.matches.uri_captures.fullname) + assert.equal("john doe", match_t.matches.uri_captures[1]) + assert.equal("john doe", match_t.matches.uri_captures.fullname) assert.equal("", match_t.matches.uri_captures[2]) assert.equal("", match_t.matches.uri_captures.scope) -- returns the full match as well - assert.equal("/users/john%20doe/profile", match_t.matches.uri_captures[0]) + assert.equal("/users/john doe/profile", match_t.matches.uri_captures[0]) -- no stripped_uri capture assert.is_nil(match_t.matches.uri_captures.stripped_uri) assert.equal(2, #match_t.matches.uri_captures) - + -- again, this time from the LRU cache local match_t = router.exec() - assert.equal("john%20doe", match_t.matches.uri_captures[1]) - assert.equal("john%20doe", match_t.matches.uri_captures.fullname) + assert.equal("john doe", match_t.matches.uri_captures[1]) + assert.equal("john doe", match_t.matches.uri_captures.fullname) assert.equal("", match_t.matches.uri_captures[2]) assert.equal("", match_t.matches.uri_captures.scope) -- returns the full match as well - assert.equal("/users/john%20doe/profile", match_t.matches.uri_captures[0]) + assert.equal("/users/john doe/profile", match_t.matches.uri_captures[0]) -- no stripped_uri capture assert.is_nil(match_t.matches.uri_captures.stripped_uri) assert.equal(2, #match_t.matches.uri_captures) diff --git a/spec/01-unit/18-tools_uri_spec.lua b/spec/01-unit/18-tools_uri_spec.lua index 2b241e7a765..f7c56569aec 100644 --- a/spec/01-unit/18-tools_uri_spec.lua +++ b/spec/01-unit/18-tools_uri_spec.lua @@ -14,7 +14,7 @@ describe("kong.tools.uri", function() it("no normalization necessary (not unreserved characters)", function() assert.equal("/a%2Fb%2Fc/", uri.normalize("/a%2Fb%2Fc/")) - assert.equal("/%20%21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D", uri.normalize("/%20%21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D")) + assert.equal("/ %21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D", uri.normalize("/%20%21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D")) end) it("converting percent-encoded triplets to uppercase", function() @@ -48,11 +48,11 @@ describe("kong.tools.uri", function() end) it("does not decode non-ASCII characters that are unreserved, issue #2366", function() - assert.equal("/endel%C3%B8st", uri.normalize("/endel%C3%B8st")) + assert.equal("/endeløst", uri.normalize("/endel%C3%B8st")) end) it("does normalize complex uri that has characters outside of normal uri charset", function() - assert.equal("/%C3%A4/a./a/_%99%AF%2F%2F" , uri.normalize("/%C3%A4/a/%2e./a%2E//a/%2e/./a/../a/%2e%2E/%5f%99%af%2f%2F", true)) + assert.equal("/ä/a./a/_\x99\xaf%2F%2F" , uri.normalize("/%C3%A4/a/%2e./a%2E//a/%2e/./a/../a/%2e%2E/%5f%99%af%2f%2F", true)) end) end) From 3bb6fcf28c1a164503e852038a5e1fbfd35bc6b7 Mon Sep 17 00:00:00 2001 From: suika Date: Mon, 27 Jun 2022 16:28:54 +0800 Subject: [PATCH 06/11] restore the name of the tests --- spec/01-unit/18-tools_uri_spec.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/01-unit/18-tools_uri_spec.lua b/spec/01-unit/18-tools_uri_spec.lua index f7c56569aec..cb7cede521d 100644 --- a/spec/01-unit/18-tools_uri_spec.lua +++ b/spec/01-unit/18-tools_uri_spec.lua @@ -12,7 +12,7 @@ describe("kong.tools.uri", function() assert.equal("/a/b/c/", uri.normalize("/a/b/c/")) end) - it("no normalization necessary (not unreserved characters)", function() + it("no normalization necessary (reserved characters)", function() assert.equal("/a%2Fb%2Fc/", uri.normalize("/a%2Fb%2Fc/")) assert.equal("/ %21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D", uri.normalize("/%20%21%23%24%25%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D")) end) @@ -47,7 +47,7 @@ describe("kong.tools.uri", function() assert.equal("/a/b/", uri.normalize("/a//b//", true)) end) - it("does not decode non-ASCII characters that are unreserved, issue #2366", function() + it("decodes non-ASCII characters that are unreserved, issue #2366", function() assert.equal("/endeløst", uri.normalize("/endel%C3%B8st")) end) From 4d1be6d9f7346b3cbcdd52da5c20e25b3070068b Mon Sep 17 00:00:00 2001 From: suika Date: Tue, 28 Jun 2022 15:36:49 +0800 Subject: [PATCH 07/11] style --- kong/runloop/handler.lua | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kong/runloop/handler.lua b/kong/runloop/handler.lua index e2aba723ef5..d52cb523bb8 100644 --- a/kong/runloop/handler.lua +++ b/kong/runloop/handler.lua @@ -39,9 +39,7 @@ local subsystem = ngx.config.subsystem local clear_header = ngx.req.clear_header local http_version = ngx.req.http_version local unpack = unpack - - -local escape = require("kong.tools.uri").escape +local escape = require("kong.tools.uri").escape local is_http_module = subsystem == "http" From decee969db290453204d5a854ebad82d5597d6ec Mon Sep 17 00:00:00 2001 From: suika Date: Thu, 7 Jul 2022 10:49:32 +0800 Subject: [PATCH 08/11] apply suggestions --- kong/tools/uri.lua | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/kong/tools/uri.lua b/kong/tools/uri.lua index e9148d4fbc8..7b723642a53 100644 --- a/kong/tools/uri.lua +++ b/kong/tools/uri.lua @@ -13,7 +13,8 @@ local table_new = table.new -- Charset: --- reserved = "!" / "*" / "'" / "(" / ")" / ";" / ":" / "@" / "&" / "=" / "+" / "$" / "," / "/" / "?" / "%" / "#" / "[" / "]" +-- reserved = "!" / "*" / "'" / "(" / ")" / ";" / ":" / +-- "@" / "&" / "=" / "+" / "$" / "," / "/" / "?" / "%" / "#" / "[" / "]" -- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -- other: * (meaning any char that is not mentioned above) @@ -40,32 +41,32 @@ local NINE_BYTE = string_byte('9') local CHAR_RESERVED = true local CHAR_UNRESERVED = false -local CHAR_OTHERS = nil -- luacheck: ignore +-- nil for CHAR_OTHERS -local char_urlencode_type = table_new(256, 0) do +local chars_to_decode = table_new(256, 0) +do -- reserved for i = 1, #RESERVED_CHARS do - char_urlencode_type[string_byte(RESERVED_CHARS, i)] = CHAR_RESERVED + chars_to_decode[string_byte(RESERVED_CHARS, i)] = CHAR_RESERVED end -- unreserved for num = A_BYTE, Z_BYTE do - char_urlencode_type[num] = CHAR_UNRESERVED + chars_to_decode[num] = CHAR_UNRESERVED end for num = CAP_A_BYTE, CAP_Z_BYTE do - char_urlencode_type[num] = CHAR_UNRESERVED + chars_to_decode[num] = CHAR_UNRESERVED end for num = ZERO_BYTE, NINE_BYTE do - char_urlencode_type[num] = CHAR_UNRESERVED + chars_to_decode[num] = CHAR_UNRESERVED end - for _, num in ipairs{ - HYPHEN_BYTE, DOT_BYTE, UNDERSCORE_BYTE, TILDE_BYTE, - } do - char_urlencode_type[num] = CHAR_UNRESERVED - end + chars_to_decode[HYPHEN_BYTE] = CHAR_UNRESERVED + chars_to_decode[DOT_BYTE] = CHAR_UNRESERVED + chars_to_decode[UNDERSCORE_BYTE] = CHAR_UNRESERVED + chars_to_decode[TILDE_BYTE] = CHAR_UNRESERVED -- others, default to CHAR_OTHERS end @@ -77,8 +78,6 @@ local TMP_OUTPUT = require("table.new")(16, 0) local DOT = string_byte(".") local SLASH = string_byte("/") --- local function is_unreserved(num) return char_urlencode_type[num] == CHAR_UNRESERVED end --- local function is_not_reserved(num) return not char_urlencode_type[num] end local function normalize_decode(m) local hex = m[1] @@ -86,7 +85,7 @@ local function normalize_decode(m) -- from rfc3986 we should decode unreserved character -- and we choose to decode "others" - if not char_urlencode_type[num] then -- is not reserved(false or nil) + if not chars_to_decode[num] then -- is not reserved(false or nil) return string_char(num) end From ce57c70c67e25d0e9477c20e660ac36d4cf51aa1 Mon Sep 17 00:00:00 2001 From: suika Date: Thu, 14 Jul 2022 12:26:49 +0800 Subject: [PATCH 09/11] adapt to new api --- spec/01-unit/08-router_spec.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/01-unit/08-router_spec.lua b/spec/01-unit/08-router_spec.lua index 80796371e76..40161c5ba2d 100644 --- a/spec/01-unit/08-router_spec.lua +++ b/spec/01-unit/08-router_spec.lua @@ -2804,7 +2804,7 @@ for _, flavor in ipairs({ "traditional", "traditional_compatible" }) do local _ngx = mock_ngx("GET", "/users/%6aohn%20doe/profile", { host = "domain.org" }) router._set_ngx(_ngx) - local match_t = router.exec() + local match_t = router:exec() assert.equal("john doe", match_t.matches.uri_captures[1]) assert.equal("john doe", match_t.matches.uri_captures.fullname) assert.equal("", match_t.matches.uri_captures[2]) @@ -2816,7 +2816,7 @@ for _, flavor in ipairs({ "traditional", "traditional_compatible" }) do assert.equal(2, #match_t.matches.uri_captures) -- again, this time from the LRU cache - local match_t = router.exec() + local match_t = router:exec() assert.equal("john doe", match_t.matches.uri_captures[1]) assert.equal("john doe", match_t.matches.uri_captures.fullname) assert.equal("", match_t.matches.uri_captures[2]) From d1e58a76267e5bf6f10016966f2ae8a1a81b6be1 Mon Sep 17 00:00:00 2001 From: suika Date: Thu, 14 Jul 2022 14:26:33 +0800 Subject: [PATCH 10/11] fix test --- spec/01-unit/08-router_spec.lua | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/01-unit/08-router_spec.lua b/spec/01-unit/08-router_spec.lua index 40161c5ba2d..b27481fc983 100644 --- a/spec/01-unit/08-router_spec.lua +++ b/spec/01-unit/08-router_spec.lua @@ -2794,7 +2794,8 @@ for _, flavor in ipairs({ "traditional", "traditional_compatible" }) do { service = service, route = { - paths = { [[/users/(?P[a-zA-Z\s\d%]+)/profile/?(?P[a-z]*)]] }, + id = "e8fb37f1-102d-461e-9c51-6608a6bb8101", + paths = { [[~/users/(?P[a-zA-Z\s\d%]+)/profile/?(?P[a-z]*)]] }, }, }, } From 0134e722937b4fe2126644329c59e3ffd8771eb3 Mon Sep 17 00:00:00 2001 From: suika Date: Fri, 22 Jul 2022 10:53:43 +0800 Subject: [PATCH 11/11] apply suggestions --- kong/tools/uri.lua | 42 ++++-------------------------------------- 1 file changed, 4 insertions(+), 38 deletions(-) diff --git a/kong/tools/uri.lua b/kong/tools/uri.lua index 7b723642a53..ecc599199ea 100644 --- a/kong/tools/uri.lua +++ b/kong/tools/uri.lua @@ -1,3 +1,5 @@ +local table_new = require "table.new" + local string_char = string.char local string_upper = string.upper local string_find = string.find @@ -9,9 +11,6 @@ local table_concat = table.concat local ngx_re_find = ngx.re.find local ngx_re_gsub = ngx.re.gsub -local table_new = table.new - - -- Charset: -- reserved = "!" / "*" / "'" / "(" / ")" / ";" / ":" / -- "@" / "&" / "=" / "+" / "$" / "," / "/" / "?" / "%" / "#" / "[" / "]" @@ -28,47 +27,14 @@ local table_new = table.new local RESERVED_CHARS = "!*'();:@&=+$,/?%#[]" -local HYPHEN_BYTE = string_byte('-') -local DOT_BYTE = string_byte('.') -local UNDERSCORE_BYTE = string_byte('_') -local TILDE_BYTE = string_byte('~') -local CAP_A_BYTE = string_byte('A') -local CAP_Z_BYTE = string_byte('Z') -local A_BYTE = string_byte('a') -local Z_BYTE = string_byte('z') -local ZERO_BYTE = string_byte('0') -local NINE_BYTE = string_byte('9') - -local CHAR_RESERVED = true -local CHAR_UNRESERVED = false --- nil for CHAR_OTHERS - local chars_to_decode = table_new(256, 0) do -- reserved for i = 1, #RESERVED_CHARS do - chars_to_decode[string_byte(RESERVED_CHARS, i)] = CHAR_RESERVED - end - - -- unreserved - for num = A_BYTE, Z_BYTE do - chars_to_decode[num] = CHAR_UNRESERVED + chars_to_decode[string_byte(RESERVED_CHARS, i)] = true end - for num = CAP_A_BYTE, CAP_Z_BYTE do - chars_to_decode[num] = CHAR_UNRESERVED - end - - for num = ZERO_BYTE, NINE_BYTE do - chars_to_decode[num] = CHAR_UNRESERVED - end - - chars_to_decode[HYPHEN_BYTE] = CHAR_UNRESERVED - chars_to_decode[DOT_BYTE] = CHAR_UNRESERVED - chars_to_decode[UNDERSCORE_BYTE] = CHAR_UNRESERVED - chars_to_decode[TILDE_BYTE] = CHAR_UNRESERVED - - -- others, default to CHAR_OTHERS + -- unreserved and others default to nil end