Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[backport -> release/3.8.x] fix(vault): let shdict secret vault cache presist enough time during resurrect_ttl #13561

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
message: Fixed an issue where the Vault secret cache got refreshed during `resurrect_ttl` time and could not be fetched by other workers.
type: bugfix
scope: Core
24 changes: 21 additions & 3 deletions kong/pdk/vault.lua
Original file line number Diff line number Diff line change
Expand Up @@ -822,9 +822,20 @@ local function new(self)
-- @tparam table parsed_reference the parsed reference
-- @treturn string|nil the retrieved value from the vault, of `nil`
-- @treturn string|nil a string describing an error if there was one
-- @treturn boolean|nil whether to resurrect value in case vault errors or doesn't return value
-- @usage local value, err = get_from_vault(reference, strategy, config, cache_key, parsed_reference)
local function get_from_vault(reference, strategy, config, cache_key, parsed_reference)
local function get_from_vault(reference, strategy, config, cache_key, parsed_reference, resurrect)
local value, err, ttl = invoke_strategy(strategy, config, parsed_reference)
if resurrect and value == nil then
local resurrected_value = SECRETS_CACHE:get(cache_key)
if resurrected_value then
return resurrected_value

else
return nil, fmt("could not get value from external vault (%s)", err)
end
end

local cache_value, shdict_ttl, lru_ttl = get_cache_value_and_ttl(value, config, ttl)
local ok, cache_err = SECRETS_CACHE:safe_set(cache_key, cache_value, shdict_ttl)
if not ok then
Expand Down Expand Up @@ -1267,18 +1278,25 @@ local function new(self)
-- If the TTL is still greater than the resurrect time
-- we don't have to rotate the secret, except it if it
-- negatively cached.
local resurrect
local ttl = SECRETS_CACHE:ttl(new_cache_key)
if ttl and SECRETS_CACHE:get(new_cache_key) ~= NEGATIVELY_CACHED_VALUE then
local resurrect_ttl = max(config.resurrect_ttl or DAO_MAX_TTL, SECRETS_CACHE_MIN_TTL)
-- the secret is still within ttl, no need to refresh
if ttl > resurrect_ttl then
return true
end

-- the secret is still within resurrect ttl time, so when we try to refresh the secret
-- we do not forciblly override it with a negative value, so that the cached value
-- can be resurrected
resurrect = ttl > SECRETS_CACHE_MIN_TTL
end

strategy = caching_strategy(strategy, config_hash)

-- we should refresh the secret at this point
local ok, err = get_from_vault(reference, strategy, config, new_cache_key, parsed_reference)
-- try to refresh the secret, according to the remaining time the cached value may or may not be refreshed.
local ok, err = get_from_vault(reference, strategy, config, new_cache_key, parsed_reference, resurrect)
if not ok then
return nil, fmt("could not retrieve value for reference %s (%s)", reference, err)
end
Expand Down
139 changes: 139 additions & 0 deletions spec/02-integration/13-vaults/07-resurrect_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ local LUA_PATH = CUSTOM_VAULTS .. "/?.lua;" ..
local DUMMY_HEADER = "Dummy-Plugin"
local fmt = string.format

local json = require "cjson"



--- A vault test harness is a driver for vault backends, which implements
Expand Down Expand Up @@ -48,6 +50,9 @@ local fmt = string.format
--- fixtures() output is passed directly to `helpers.start_kong()`
---@field fixtures fun(self: vault_test_harness):table|nil
---
--- pause() is exactly what you'd expect
---@field pause fun(self: vault_test_harness)
---
---
---@field prefix string # generated by the test suite
---@field host string # generated by the test suite
Expand Down Expand Up @@ -88,6 +93,10 @@ local VAULTS = {
}
}
end,

pause = function(_)
return test_vault.client.pause()
end,
},
}

Expand All @@ -103,6 +112,7 @@ for _, vault in ipairs(VAULTS) do
vault.setup = vault.setup or noop
vault.teardown = vault.teardown or noop
vault.fixtures = vault.fixtures or noop
vault.pause = vault.pause or noop
end


Expand Down Expand Up @@ -232,5 +242,134 @@ describe("vault resurrect_ttl and rotation (#" .. strategy .. ") #" .. vault.nam
end)


describe("#multiworker vault resurrect_ttl and rotation (#" .. strategy .. ") #" .. vault.name, function()
local client, admin_client
local secret = "my-secret"

lazy_setup(function()
helpers.setenv("KONG_LUA_PATH_OVERRIDE", LUA_PATH)
helpers.setenv("KONG_VAULT_ROTATION_INTERVAL", "1")

vault:setup()
vault:create_secret(secret, "init")

local bp = helpers.get_db_utils(strategy,
{ "vaults", "routes", "services", "plugins" },
{ "dummy" },
{ vault.name })


assert(bp.vaults:insert({
name = vault.name,
prefix = vault.prefix,
config = vault.config,
}))

local route = assert(bp.routes:insert({
name = vault.host,
hosts = { vault.host },
paths = { "/" },
service = assert(bp.services:insert()),
}))


assert(bp.plugins:insert({
name = "post-function",
config = {
access = {fmt([[
local value, err = kong.vault.get("{vault://%s/%s?ttl=%d&resurrect_ttl=%d}")
if value then
kong.response.exit(200, {["value"]=value, ["pid"]=ngx.worker.pid()}, {["Content-Type"]="application/json"})
end
]], vault.prefix, secret, 2, 5),}
},
route = { id = route.id },
}))

assert(helpers.start_kong({
database = strategy,
nginx_conf = "spec/fixtures/custom_nginx.template",
vaults = vault.name,
plugins = "post-function",
log_level = "debug",
dedicated_config_processing = false,
-- nginx_worker_processes = 2,
nginx_main_worker_processes = 2,
}, nil, nil, vault:fixtures() ))

client = helpers.proxy_client()
admin_client = helpers.admin_client()
end)


lazy_teardown(function()
if client then
client:close()
end

if admin_client then
admin_client:close()
end

helpers.stop_kong()
vault:teardown()

helpers.unsetenv("KONG_LUA_PATH_OVERRIDE")
end)


it("resurrects secret value from shared dict when secret is deleted (backend: #" .. vault.name .. ")", function()
-- fetch all worker pids
local status_ret = admin_client:get("/")
local body = assert.res_status(200, status_ret)
local json_body = json.decode(body)
assert.truthy(json_body)
local worker_pids = json_body.pids.workers
assert.truthy(#worker_pids == 2)

local worker_secret_hits = {}
for _, worker_pid in ipairs(worker_pids) do
worker_secret_hits[tostring(worker_pid)] = false
end

vault:update_secret(secret, "old", { ttl = 2, resurrect_ttl = 5 })

-- trigger post-function in one of the workers
local res = client:get("/", {headers = {host = assert(vault.host)}})
local body = assert.res_status(200, res)
local json_body = json.decode(body)
assert.same("old", json_body.value)
worker_secret_hits[tostring(json_body.pid)] = true

vault:pause()

-- let ttl pass and try to trigger post-function in all workers
-- check all of them can resurrect the secret from shared dict
ngx.sleep(3)

assert.with_timeout(5).with_step(0.1).eventually(
function()
-- avoid connection reuse so that we can hit all workers
local new_client = helpers.proxy_client()
local res = new_client:get("/", {headers = {host = assert(vault.host)}})
local body = assert.res_status(200, res)
local json_body = json.decode(body)
new_client:close()
assert.same("old", json_body.value)
worker_secret_hits[tostring(json_body.pid)] = true

for k, v in pairs(worker_secret_hits) do
if not v then
return false, "worker pid " .. k .. " did not hit the secret"
end
end

return true
end
).is_truthy("expected all workers to resurrect the secret from shared dict")
end)
end)


end -- each vault backend
end -- each strategy
39 changes: 39 additions & 0 deletions spec/fixtures/custom_vaults/kong/vaults/test/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,24 @@ function test.init()
end


function test.pause()
local shm = ngx.shared[test.SHM_NAME]
shm:set("paused", true)
return kong.response.exit(200, { message = "succeed" })
end


function test.is_running()
local shm = ngx.shared[test.SHM_NAME]
return shm:get("paused") ~= true
end


function test.get(conf, resource, version)
if not test.is_running() then
return nil, "Vault server paused"
end

local secret = get_from_shm(resource, version)

kong.log.inspect({
Expand Down Expand Up @@ -92,6 +109,10 @@ end


function test.api()
if not test.is_running() then
return kong.response.exit(503, { message = "Vault server paused" })
end

local shm = assert(ngx.shared[test.SHM_NAME])
local secret = assert(ngx.var.secret)
local args = assert(kong.request.get_query())
Expand Down Expand Up @@ -200,6 +221,18 @@ function test.client.get(secret, version)
end


function test.client.pause()
local client = assert(http.new())

local uri = fmt("http://127.0.0.1:%d/pause", test.PORT)

local res, err = client:request_uri(uri, { method = "GET" })
assert(err == nil, "failed GET " .. uri .. ": " .. tostring(err))

return cjson.decode(res.body)
end


test.http_mock = [[
lua_shared_dict ]] .. test.SHM_NAME .. [[ 5m;

Expand All @@ -212,6 +245,12 @@ test.http_mock = [[
require("kong.vaults.test").api()
}
}

location ~^/pause {
content_by_lua_block {
require("kong.vaults.test").pause()
}
}
}
]]

Expand Down
Loading