Skip to content

Commit

Permalink
fix(healthchecker) port 2.x lock fixes to 1.5.x (#113)
Browse files Browse the repository at this point in the history
* fix(healthchecker) port 2.x lock fixes to 1.5.x

* chore(healthcheck) remove unused vars

* chore(healthcheck) fix indent level

* fix(healthcheck) correct duplicate handling in add_target

* fix(healthchecker) handle fetch_target_list failure in checker callback

* chore(healthcheck) apply suggestions from #112

Co-authored-by: Vinicius Mignot <vinicius.mignot@gmail.com>
  • Loading branch information
flrgh and locao committed Jul 18, 2022
1 parent 99a13ab commit 277f8ab
Showing 1 changed file with 150 additions and 97 deletions.
247 changes: 150 additions & 97 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,19 @@ local ipairs = ipairs
local table_insert = table.insert
local table_remove = table.remove
local string_format = string.format
local resty_lock = require ("resty.lock")
local re_find = ngx.re.find
-- local resty_lock = require("resty.lock") -- required later in the file"
local ssl = require("ngx.ssl")
local resty_timer = require "resty.timer"
local bit = require("bit")
local re_find = ngx.re.find
local ngx_now = ngx.now
local ngx_worker_id = ngx.worker.id
local ngx_worker_pid = ngx.worker.pid
local ssl = require("ngx.ssl")
local resty_timer = require "resty.timer"
local pcall = pcall
local get_phase = ngx.get_phase
local type = type
local assert = assert


local RESTY_EVENTS_VER = [[^0\.1\.\d+$]]
Expand All @@ -53,7 +58,6 @@ local is_array
local codec

do
local pcall = pcall
local ok

ok, new_tab = pcall(require, "table.new")
Expand Down Expand Up @@ -243,7 +247,126 @@ local function key_for(key_prefix, ip, port, hostname)
return string_format("%s:%s:%s%s", key_prefix, ip, port, hostname and ":" .. hostname or "")
end

local run_locked
do
-- resty_lock is restricted to this scope in order to keep sensitive
-- lock-handling code separate separate from all other business logic
--
-- If you need to use resty_lock in a way that is not covered by the
-- `run_locked` helper function defined below, it's strongly-advised to
-- define it fully within this scope unless you have a very good reason
--
-- (see https://github.com/Kong/lua-resty-healthcheck/pull/112)
local resty_lock = require "resty.lock"

local yieldable = {
rewrite = true,
access = true,
content = true,
timer = true,
}

local function run_in_timer(premature, fn, ...)
if premature then
return
end

return fn(...)
end

local function schedule(fn, ...)
return ngx.timer.at(0, run_in_timer, fn, ...)
end

-- timeout when yieldable
local timeout = 5

-- resty.lock consumes these options immediately, so this table can be reused
local opts = {
exptime = 10, -- timeout after which lock is released anyway
timeout = timeout, -- max wait time to acquire lock
}

---
-- Acquire a lock and run a function
--
-- The function call itself is wrapped with `pcall` to protect against
-- exception.
--
-- This function exhibits some special behavior when called during a
-- non-yieldable phase such as `init_worker` or `log`:
--
-- 1. The lock timeout is set to 0 to ensure that `resty.lock` does not
-- attempt to sleep/yield
-- 2. If acquiring the lock fails due to a timeout, `run_locked`
-- (this function) is re-scheduled to run in a timer. In this case,
-- the function returns `"scheduled"`
--
-- @param self The checker object
-- @param key the key/identifier to acquire a lock for
-- @param fn The function to execute
-- @param ... arguments that will be passed to fn
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
function run_locked(self, key, fn, ...)
-- we're extra extra extra defensive in this code path
local typ = type(key)
-- XXX is a number key ever expected?
assert(typ == "string" or typ == "number",
"unexpected lock key type: " .. typ)
key = tostring(key)

-- first aqcuire a lock or conditionally re-schedule ourselves in a timer
local lock
do
local yield = yieldable[get_phase()]

if yield then
opts.timeout = timeout
else
-- if yielding is not possible in the current phase, use a zero timeout
-- so that resty.lock will return `nil, "timeout"` immediately instead of
-- calling ngx.sleep()
opts.timeout = 0
end

local err
lock, err = resty_lock:new(self.shm_name, opts)
if not lock then
return nil, "failed creating lock for '" .. key .. "', " .. err
end

local elapsed
elapsed, err = lock:lock(key)

if not elapsed and err == "timeout" and not yield then
-- yielding is not possible in the current phase, so retry in a timer
local ok, terr = schedule(run_locked, self, key, fn, ...)
if not ok then
return nil, terr
end

return "scheduled"

elseif not elapsed then
return nil, "failed acquiring lock for '" .. key .. "', " .. err
end
end

local pok, perr, res = pcall(fn, ...)

local ok, err = lock:unlock()
if not ok then
self:log(ERR, "failed unlocking '", key, "', ", err)
end

if not pok then
return nil, perr
end

return perr, res
end
end
local checker = {}


Expand All @@ -265,48 +388,15 @@ local function fetch_target_list(self)
end


--- Helper function to run the function holding a lock on the target list.
-- @see locking_target_list
local function run_fn_locked_target_list(premature, self, fn)

if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
})

if not lock then
return nil, "failed to create lock:" .. lock_err
end

local pok, perr = pcall(resty_lock.lock, lock, self.TARGET_LIST_LOCK)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local target_list, err = fetch_target_list(self)

local final_ok, final_err

if target_list then
final_ok, final_err = pcall(fn, target_list)
else
final_ok, final_err = nil, err
end

local ok
ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", self.TARGET_LIST_LOCK,
"': ", err)
local function with_target_list(self, fn)
local targets, err = fetch_target_list(self)
if not targets then
return nil, err
end

return final_ok, final_err
-- this is only ever called in the context of `run_locked`,
-- so no pcall needed
return fn(targets)
end


Expand All @@ -316,15 +406,10 @@ end
-- @return The results of the function; or nil and an error message
-- in case it fails locking.
local function locking_target_list(self, fn)
local ok, err = run_locked(self, self.TARGET_LIST_LOCK, with_target_list, self, fn)

local ok, err = run_fn_locked_target_list(false, self, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_fn_locked_target_list, self, fn)
if terr ~= nil then
return nil, terr
end

return true
if ok == "scheduled" then
self:log(DEBUG, "target_list function re-scheduled in timer")
end

return ok, err
Expand Down Expand Up @@ -360,15 +445,15 @@ function checker:add_target(ip, port, hostname, is_healthy, hostheader)
local internal_health = is_healthy and "healthy" or "unhealthy"

local ok, err = locking_target_list(self, function(target_list)
local found = false
local found = false

-- check whether we already have this target
for _, target in ipairs(target_list) do
if target.ip == ip and target.port == port and target.hostname == (hostname) then
if target.purge_time == nil then
self:log(DEBUG, "adding an existing target: ", hostname or "", " ", ip,
":", port, " (ignoring)")
return
return false
end
target.purge_time = nil
found = true
Expand Down Expand Up @@ -565,41 +650,6 @@ end
------------------------------------------------------------------------------


--- Helper function to actually run the function holding a lock on the target.
-- @see locking_target
local function run_mutexed_fn(premature, self, ip, port, hostname, fn)
if premature then
return
end

local lock, lock_err = resty_lock:new(self.shm_name, {
exptime = 10, -- timeout after which lock is released anyway
timeout = 5, -- max wait time to acquire lock
})
if not lock then
return nil, "failed to create lock:" .. lock_err
end
local lock_key = key_for(self.TARGET_LOCK, ip, port, hostname)

local pok, perr = pcall(resty_lock.lock, lock, lock_key)
if not pok then
self:log(DEBUG, "failed to acquire lock: ", perr)
return nil, "failed to acquire lock"
end

local final_ok, final_err = pcall(fn)

local ok, err = lock:unlock()
if not ok then
-- recoverable: not returning this error, only logging it
self:log(ERR, "failed to release lock '", lock_key, "': ", err)
end

return final_ok, final_err

end


-- Run the given function holding a lock on the target.
-- @param self The checker object
-- @param ip Target IP
Expand All @@ -609,14 +659,12 @@ end
-- @return The results of the function; or true in case it fails locking and
-- will retry asynchronously; or nil+err in case it fails to retry.
local function locking_target(self, ip, port, hostname, fn)
local ok, err = run_mutexed_fn(false, self, ip, port, hostname, fn)
if err == "failed to acquire lock" then
local _, terr = ngx.timer.at(0, run_mutexed_fn, self, ip, port, hostname, fn)
if terr ~= nil then
return nil, terr
end
local key = key_for(self.TARGET_LOCK, ip, port, hostname)

return true
local ok, err = run_locked(self, key, fn)

if ok == "scheduled" then
self:log(DEBUG, "target function for ", key, " was re-scheduled")
end

return ok, err
Expand Down Expand Up @@ -1149,7 +1197,12 @@ local function checker_callback(self, health_mode)
end

local list_to_check = {}
local targets = fetch_target_list(self)
local targets, err = fetch_target_list(self)
if not targets then
self:log(ERR, "checker_callback: ", err)
return
end

for _, target in ipairs(targets) do
local tgt = get_target(self, target.ip, target.port, target.hostname)
local internal_health = tgt and tgt.internal_health or nil
Expand Down

0 comments on commit 277f8ab

Please sign in to comment.