From ea0dd26b17f5cbad279b7a78fb0ef4faf61cd58e Mon Sep 17 00:00:00 2001 From: Michael Martin Date: Wed, 9 Aug 2023 16:19:20 -0700 Subject: [PATCH 1/6] refactor(cmd): rename kong.cmd.utils.kill => kong.cmd.utils.process There's a large refactor of this module in the pipeline, and renaming from 'kill' to 'process' makes the API more sensible. --- bin/kong-health | 6 +++--- kong-3.5.0-0.rockspec | 2 +- kong/cmd/health.lua | 4 ++-- kong/cmd/quit.lua | 6 +++--- kong/cmd/restart.lua | 4 ++-- kong/cmd/start.lua | 4 ++-- kong/cmd/utils/nginx_signals.lua | 10 +++++----- kong/cmd/utils/{kill.lua => process.lua} | 0 spec/02-integration/02-cmd/02-start_stop_spec.lua | 4 ++-- spec/helpers.lua | 8 ++++---- 10 files changed, 24 insertions(+), 24 deletions(-) rename kong/cmd/utils/{kill.lua => process.lua} (100%) diff --git a/bin/kong-health b/bin/kong-health index 9b39555f28f1..cc9ab0e3378c 100755 --- a/bin/kong-health +++ b/bin/kong-health @@ -3,7 +3,7 @@ setmetatable(_G, nil) package.path = (os.getenv("KONG_LUA_PATH_OVERRIDE") or "") .. "./?.lua;./?/init.lua;" .. package.path -local kill = require "kong.cmd.utils.kill" +local process = require "kong.cmd.utils.process" local kong_default_conf = require "kong.templates.kong_defaults" local pl_app = require "pl.lapp" local pl_config = require "pl.config" @@ -42,8 +42,8 @@ local function execute(args) print("") local pid_file = pl_path.join(prefix, "pids", "nginx.pid") - kill.is_running(pid_file) - assert(kill.is_running(pid_file), "Kong is not running at " .. prefix) + process.is_running(pid_file) + assert(process.is_running(pid_file), "Kong is not running at " .. prefix) print("Kong is healthy at ", prefix) end diff --git a/kong-3.5.0-0.rockspec b/kong-3.5.0-0.rockspec index 8c59cf2906b6..525a319e1386 100644 --- a/kong-3.5.0-0.rockspec +++ b/kong-3.5.0-0.rockspec @@ -116,7 +116,7 @@ build = { ["kong.cmd.version"] = "kong/cmd/version.lua", ["kong.cmd.hybrid"] = "kong/cmd/hybrid.lua", ["kong.cmd.utils.log"] = "kong/cmd/utils/log.lua", - ["kong.cmd.utils.kill"] = "kong/cmd/utils/kill.lua", + ["kong.cmd.utils.process"] = "kong/cmd/utils/process.lua", ["kong.cmd.utils.env"] = "kong/cmd/utils/env.lua", ["kong.cmd.utils.migrations"] = "kong/cmd/utils/migrations.lua", ["kong.cmd.utils.tty"] = "kong/cmd/utils/tty.lua", diff --git a/kong/cmd/health.lua b/kong/cmd/health.lua index ba8d37c758f2..60b7035f9efb 100644 --- a/kong/cmd/health.lua +++ b/kong/cmd/health.lua @@ -1,5 +1,5 @@ local log = require "kong.cmd.utils.log" -local kill = require "kong.cmd.utils.kill" +local process = require "kong.cmd.utils.process" local pl_path = require "pl.path" local pl_tablex = require "pl.tablex" local pl_stringx = require "pl.stringx" @@ -26,7 +26,7 @@ local function execute(args) local count = 0 for k, v in pairs(pids) do - local running = kill.is_running(v) + local running = process.is_running(v) local msg = pl_stringx.ljust(k, 12, ".") .. (running and "running" or "not running") if running then count = count + 1 diff --git a/kong/cmd/quit.lua b/kong/cmd/quit.lua index 3b7c95d37d37..5de47a4bd9ab 100644 --- a/kong/cmd/quit.lua +++ b/kong/cmd/quit.lua @@ -1,7 +1,7 @@ local nginx_signals = require "kong.cmd.utils.nginx_signals" local conf_loader = require "kong.conf_loader" local pl_path = require "pl.path" -local kill = require "kong.cmd.utils.kill" +local process = require "kong.cmd.utils.process" local log = require "kong.cmd.utils.log" local function execute(args) @@ -24,7 +24,7 @@ local function execute(args) log.verbose("waiting %s seconds before quitting", args.wait) while twait > ngx.now() do ngx.sleep(0.2) - if not kill.is_running(conf.nginx_pid) then + if not process.is_running(conf.nginx_pid) then log.error("Kong stopped while waiting (unexpected)") return end @@ -41,7 +41,7 @@ local function execute(args) local texp, running = tstart + math.max(args.timeout, 1) -- min 1s timeout repeat ngx.sleep(0.2) - running = kill.is_running(conf.nginx_pid) + running = process.is_running(conf.nginx_pid) ngx.update_time() until not running or ngx.now() >= texp diff --git a/kong/cmd/restart.lua b/kong/cmd/restart.lua index 0bc4777c47e9..fab85cc98e1d 100644 --- a/kong/cmd/restart.lua +++ b/kong/cmd/restart.lua @@ -1,6 +1,6 @@ local log = require "kong.cmd.utils.log" local stop = require "kong.cmd.stop" -local kill = require "kong.cmd.utils.kill" +local process = require "kong.cmd.utils.process" local start = require "kong.cmd.start" local pl_path = require "pl.path" local conf_loader = require "kong.conf_loader" @@ -27,7 +27,7 @@ local function execute(args) local running repeat ngx.sleep(0.1) - running = kill.is_running(conf.nginx_pid) + running = process.is_running(conf.nginx_pid) until not running or ngx.time() >= texp start.execute(args) diff --git a/kong/cmd/start.lua b/kong/cmd/start.lua index 63404cbaa98f..62ab610d7587 100644 --- a/kong/cmd/start.lua +++ b/kong/cmd/start.lua @@ -3,7 +3,7 @@ local prefix_handler = require "kong.cmd.utils.prefix_handler" local nginx_signals = require "kong.cmd.utils.nginx_signals" local conf_loader = require "kong.conf_loader" local kong_global = require "kong.global" -local kill = require "kong.cmd.utils.kill" +local process = require "kong.cmd.utils.process" local log = require "kong.cmd.utils.log" local DB = require "kong.db" local lfs = require "lfs" @@ -53,7 +53,7 @@ local function execute(args) conf.pg_timeout = args.db_timeout -- connect + send + read - assert(not kill.is_running(conf.nginx_pid), + assert(not process.is_running(conf.nginx_pid), "Kong is already running in " .. conf.prefix) assert(prefix_handler.prepare_prefix(conf, args.nginx_conf, nil, nil, diff --git a/kong/cmd/utils/nginx_signals.lua b/kong/cmd/utils/nginx_signals.lua index fb9065466eb9..fcec576142c6 100644 --- a/kong/cmd/utils/nginx_signals.lua +++ b/kong/cmd/utils/nginx_signals.lua @@ -1,6 +1,6 @@ local ffi = require "ffi" local log = require "kong.cmd.utils.log" -local kill = require "kong.cmd.utils.kill" +local process = require "kong.cmd.utils.process" local meta = require "kong.meta" local pl_path = require "pl.path" local version = require "version" @@ -55,13 +55,13 @@ end local function send_signal(kong_conf, signal) - if not kill.is_running(kong_conf.nginx_pid) then + if not process.is_running(kong_conf.nginx_pid) then return nil, fmt("nginx not running in prefix: %s", kong_conf.prefix) end log.verbose("sending %s signal to nginx running at %s", signal, kong_conf.nginx_pid) - local code = kill.kill(kong_conf.nginx_pid, "-s " .. signal) + local code = process.kill(kong_conf.nginx_pid, "-s " .. signal) if code ~= 0 then return nil, "could not send signal" end @@ -143,7 +143,7 @@ function _M.start(kong_conf) return nil, err end - if kill.is_running(kong_conf.nginx_pid) then + if process.is_running(kong_conf.nginx_pid) then return nil, "nginx is already running in " .. kong_conf.prefix end @@ -215,7 +215,7 @@ end function _M.reload(kong_conf) - if not kill.is_running(kong_conf.nginx_pid) then + if not process.is_running(kong_conf.nginx_pid) then return nil, fmt("nginx not running in prefix: %s", kong_conf.prefix) end diff --git a/kong/cmd/utils/kill.lua b/kong/cmd/utils/process.lua similarity index 100% rename from kong/cmd/utils/kill.lua rename to kong/cmd/utils/process.lua diff --git a/spec/02-integration/02-cmd/02-start_stop_spec.lua b/spec/02-integration/02-cmd/02-start_stop_spec.lua index 1bc151b0bb38..0d76203435e9 100644 --- a/spec/02-integration/02-cmd/02-start_stop_spec.lua +++ b/spec/02-integration/02-cmd/02-start_stop_spec.lua @@ -545,7 +545,7 @@ describe("kong start/stop #" .. strategy, function() end) it("should not start Kong if already running in prefix", function() - local kill = require "kong.cmd.utils.kill" + local process = require "kong.cmd.utils.process" assert(helpers.kong_exec("start --prefix " .. PREFIX, { pg_database = TEST_CONF.pg_database, @@ -557,7 +557,7 @@ describe("kong start/stop #" .. strategy, function() assert.False(ok) assert.matches("Kong is already running in " .. PREFIX, stderr, nil, true) - assert(kill.is_running(TEST_CONF.nginx_pid)) + assert(process.is_running(TEST_CONF.nginx_pid)) end) it("does not prepare the prefix directory if Kong is already running", function() diff --git a/spec/helpers.lua b/spec/helpers.lua index 6ba834fbf471..82f61cada58e 100644 --- a/spec/helpers.lua +++ b/spec/helpers.lua @@ -4045,7 +4045,7 @@ end -- Only use in CLI tests from spec/02-integration/01-cmd kill_all = function(prefix, timeout) - local kill = require "kong.cmd.utils.kill" + local process = require "kong.cmd.utils.process" local running_conf = get_running_conf(prefix) if not running_conf then return end @@ -4053,7 +4053,7 @@ end -- kill kong_tests.conf service local pid_path = running_conf.nginx_pid if pl_path.exists(pid_path) then - kill.kill(pid_path, "-TERM") + process.kill(pid_path, "-TERM") wait_pid(pid_path, timeout) end end, @@ -4069,7 +4069,7 @@ end end, signal = function(prefix, signal, pid_path) - local kill = require "kong.cmd.utils.kill" + local process = require "kong.cmd.utils.process" if not pid_path then local running_conf = get_running_conf(prefix) @@ -4080,7 +4080,7 @@ end pid_path = running_conf.nginx_pid end - return kill.kill(pid_path, signal) + return process.kill(pid_path, signal) end, -- send signal to all Nginx workers, not including the master signal_workers = function(prefix, signal, pid_path) From f50d9a4860358de3c611f94a9582625e63d38c05 Mon Sep 17 00:00:00 2001 From: Michael Martin Date: Wed, 9 Aug 2023 16:26:00 -0700 Subject: [PATCH 2/6] refactor(cmd): rename process.is_running => process.exists This is another name-only change ahead of some larger refactoring of the kong.cmd.utils.process module. --- bin/kong-health | 4 ++-- kong/cmd/health.lua | 2 +- kong/cmd/quit.lua | 4 ++-- kong/cmd/restart.lua | 2 +- kong/cmd/start.lua | 2 +- kong/cmd/utils/nginx_signals.lua | 6 +++--- kong/cmd/utils/process.lua | 4 ++-- spec/02-integration/02-cmd/02-start_stop_spec.lua | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bin/kong-health b/bin/kong-health index cc9ab0e3378c..d7176014e44f 100755 --- a/bin/kong-health +++ b/bin/kong-health @@ -42,8 +42,8 @@ local function execute(args) print("") local pid_file = pl_path.join(prefix, "pids", "nginx.pid") - process.is_running(pid_file) - assert(process.is_running(pid_file), "Kong is not running at " .. prefix) + process.exists(pid_file) + assert(process.exists(pid_file), "Kong is not running at " .. prefix) print("Kong is healthy at ", prefix) end diff --git a/kong/cmd/health.lua b/kong/cmd/health.lua index 60b7035f9efb..23384dcac8b0 100644 --- a/kong/cmd/health.lua +++ b/kong/cmd/health.lua @@ -26,7 +26,7 @@ local function execute(args) local count = 0 for k, v in pairs(pids) do - local running = process.is_running(v) + local running = process.exists(v) local msg = pl_stringx.ljust(k, 12, ".") .. (running and "running" or "not running") if running then count = count + 1 diff --git a/kong/cmd/quit.lua b/kong/cmd/quit.lua index 5de47a4bd9ab..cc1d4b4c3b17 100644 --- a/kong/cmd/quit.lua +++ b/kong/cmd/quit.lua @@ -24,7 +24,7 @@ local function execute(args) log.verbose("waiting %s seconds before quitting", args.wait) while twait > ngx.now() do ngx.sleep(0.2) - if not process.is_running(conf.nginx_pid) then + if not process.exists(conf.nginx_pid) then log.error("Kong stopped while waiting (unexpected)") return end @@ -41,7 +41,7 @@ local function execute(args) local texp, running = tstart + math.max(args.timeout, 1) -- min 1s timeout repeat ngx.sleep(0.2) - running = process.is_running(conf.nginx_pid) + running = process.exists(conf.nginx_pid) ngx.update_time() until not running or ngx.now() >= texp diff --git a/kong/cmd/restart.lua b/kong/cmd/restart.lua index fab85cc98e1d..fc9bac16f5b0 100644 --- a/kong/cmd/restart.lua +++ b/kong/cmd/restart.lua @@ -27,7 +27,7 @@ local function execute(args) local running repeat ngx.sleep(0.1) - running = process.is_running(conf.nginx_pid) + running = process.exists(conf.nginx_pid) until not running or ngx.time() >= texp start.execute(args) diff --git a/kong/cmd/start.lua b/kong/cmd/start.lua index 62ab610d7587..a43210b60795 100644 --- a/kong/cmd/start.lua +++ b/kong/cmd/start.lua @@ -53,7 +53,7 @@ local function execute(args) conf.pg_timeout = args.db_timeout -- connect + send + read - assert(not process.is_running(conf.nginx_pid), + assert(not process.exists(conf.nginx_pid), "Kong is already running in " .. conf.prefix) assert(prefix_handler.prepare_prefix(conf, args.nginx_conf, nil, nil, diff --git a/kong/cmd/utils/nginx_signals.lua b/kong/cmd/utils/nginx_signals.lua index fcec576142c6..40f11b92a48f 100644 --- a/kong/cmd/utils/nginx_signals.lua +++ b/kong/cmd/utils/nginx_signals.lua @@ -55,7 +55,7 @@ end local function send_signal(kong_conf, signal) - if not process.is_running(kong_conf.nginx_pid) then + if not process.exists(kong_conf.nginx_pid) then return nil, fmt("nginx not running in prefix: %s", kong_conf.prefix) end @@ -143,7 +143,7 @@ function _M.start(kong_conf) return nil, err end - if process.is_running(kong_conf.nginx_pid) then + if process.exists(kong_conf.nginx_pid) then return nil, "nginx is already running in " .. kong_conf.prefix end @@ -215,7 +215,7 @@ end function _M.reload(kong_conf) - if not process.is_running(kong_conf.nginx_pid) then + if not process.exists(kong_conf.nginx_pid) then return nil, fmt("nginx not running in prefix: %s", kong_conf.prefix) end diff --git a/kong/cmd/utils/process.lua b/kong/cmd/utils/process.lua index 94820dfc1314..1297f6dee166 100644 --- a/kong/cmd/utils/process.lua +++ b/kong/cmd/utils/process.lua @@ -17,7 +17,7 @@ local function kill(pid_file, args) end end -local function is_running(pid_file) +local function exists(pid_file) -- we do our own pid_file exists check here because -- we want to return `nil` in case of NOT running, -- and not `0` like `kill` would return. @@ -28,5 +28,5 @@ end return { kill = kill, - is_running = is_running + exists = exists, } diff --git a/spec/02-integration/02-cmd/02-start_stop_spec.lua b/spec/02-integration/02-cmd/02-start_stop_spec.lua index 0d76203435e9..725d3f1cba1e 100644 --- a/spec/02-integration/02-cmd/02-start_stop_spec.lua +++ b/spec/02-integration/02-cmd/02-start_stop_spec.lua @@ -557,7 +557,7 @@ describe("kong start/stop #" .. strategy, function() assert.False(ok) assert.matches("Kong is already running in " .. PREFIX, stderr, nil, true) - assert(process.is_running(TEST_CONF.nginx_pid)) + assert(process.exists(TEST_CONF.nginx_pid)) end) it("does not prepare the prefix directory if Kong is already running", function() From 972030c74aebb5494837a23279b8b577694398bd Mon Sep 17 00:00:00 2001 From: Michael Martin Date: Wed, 9 Aug 2023 18:05:09 -0700 Subject: [PATCH 3/6] refactor(cmd): use resty.signal for process signaling This refactors kong.cmd.utils.process to use the resty.signal library for sending signals instead of dropping to a shell. This comes with improvements (albeit minor) to performance and security. --- kong/cmd/utils/nginx_signals.lua | 11 +- kong/cmd/utils/process.lua | 324 +++++++++++++- .../02-integration/02-cmd/13-signals_spec.lua | 33 +- spec/02-integration/02-cmd/15-utils_spec.lua | 409 ++++++++++++++++-- spec/helpers.lua | 12 +- 5 files changed, 687 insertions(+), 102 deletions(-) diff --git a/kong/cmd/utils/nginx_signals.lua b/kong/cmd/utils/nginx_signals.lua index 40f11b92a48f..f3adaeeb759b 100644 --- a/kong/cmd/utils/nginx_signals.lua +++ b/kong/cmd/utils/nginx_signals.lua @@ -55,14 +55,15 @@ end local function send_signal(kong_conf, signal) - if not process.exists(kong_conf.nginx_pid) then + local pid = process.pid(kong_conf.nginx_pid) + + if not pid or not process.exists(pid) then return nil, fmt("nginx not running in prefix: %s", kong_conf.prefix) end log.verbose("sending %s signal to nginx running at %s", signal, kong_conf.nginx_pid) - local code = process.kill(kong_conf.nginx_pid, "-s " .. signal) - if code ~= 0 then + if not process.signal(pid, signal) then return nil, "could not send signal" end @@ -205,12 +206,12 @@ end function _M.stop(kong_conf) - return send_signal(kong_conf, "TERM") + return send_signal(kong_conf, process.SIG_TERM) end function _M.quit(kong_conf) - return send_signal(kong_conf, "QUIT") + return send_signal(kong_conf, process.SIG_QUIT) end diff --git a/kong/cmd/utils/process.lua b/kong/cmd/utils/process.lua index 1297f6dee166..7e5e43833b09 100644 --- a/kong/cmd/utils/process.lua +++ b/kong/cmd/utils/process.lua @@ -1,32 +1,312 @@ -local pl_path = require "pl.path" -local pl_utils = require "pl.utils" -local log = require "kong.cmd.utils.log" - -local cmd_tmpl = [[kill %s `cat %s 2>&1` >/dev/null 2>&1]] - -local function kill(pid_file, args) - log.debug("sending signal to pid at: %s", pid_file) - local cmd = string.format(cmd_tmpl, args or "-0", pid_file) - if pl_path.exists(pid_file) then - log.debug(cmd) - local _, code = pl_utils.execute(cmd) - return code +local read_file = require("pl.file").read +local resty_kill = require("resty.signal").kill + +local tonumber = tonumber +local type = type +local floor = math.floor + + +-- not supporting other usage of kill(2) for the moment +local MIN_PID = 1 + +-- source: proc(5) man page +local MAX_PID = 2 ^ 22 + + +local SIG_NONE = 0 +local SIG_HUP = 1 +local SIG_INT = 2 +local SIG_QUIT = 3 +local SIG_ILL = 4 +local SIG_TRAP = 5 +local SIG_ABRT = 6 +local SIG_BUS = 7 +local SIG_FPE = 8 +local SIG_KILL = 9 +local SIG_USR1 = 10 +local SIG_SEGV = 11 +local SIG_USR2 = 12 +local SIG_PIPE = 13 +local SIG_ALRM = 14 +local SIG_TERM = 15 +local SIG_CHLD = 17 +local SIG_CONT = 18 +local SIG_STOP = 19 +local SIG_TSTP = 20 +local SIG_TTIN = 21 +local SIG_TTOU = 22 +local SIG_URG = 23 +local SIG_XCPU = 24 +local SIG_XFSZ = 25 +local SIG_VTALRM = 26 +local SIG_PROF = 27 +local SIG_WINCH = 28 +local SIG_IO = 29 +local SIG_PWR = 30 +local SIG_EMT = 31 +local SIG_SYS = 32 +local SIG_INFO = 33 + + +--- +-- Checks if a value is a valid PID and returns it. +-- +---```lua +--- validate_pid(123) --> 123 +--- +--- -- value can be a numeric string +--- validate_pid("123") --> 123 +--- validate_pid("foo") --> nil +--- +--- -- value must be an integer +--- validate_pid(1.23) --> nil +--- +--- -- value must be in the valid range for PIDs +--- validate_pid(0) --> nil +--- validate_pid(2^32) --> nil +---``` +--- +---@param value any +---@return integer? pid +local function validate_pid(value) + local pid = tonumber(value) + return pid + -- good enough integer check for our use case + and floor(pid) == pid + and pid >= MIN_PID and pid <= MAX_PID + and pid +end + + +--- +-- Read and return the process ID from a pid file. +-- +---@param fname string +---@return integer|nil pid +---@return nil|string error +local function pid_from_file(fname) + local data, err = read_file(fname) + if not data then + return nil, "failed reading PID file: " .. tostring(err) + end + + -- strip whitespace + data = data:gsub("^%s*(.-)%s*$", "%1") + + if #data == 0 then + return nil, "PID file is empty" + end + + local pid = validate_pid(data) + + if not pid then + return nil, "file does not contain a valid PID" + end + + return pid +end + + +--- +-- Target processes may be referenced by their integer id (PID) +-- or by a pid filename. +-- +---@alias kong.cmd.utils.process.target +---| integer # pid +---| string # pid file + + +--- +-- Detects a PID from input and returns it as a number. +-- +-- The target process may be supplied as a PID (number) or path to a +-- pidfile (string). +-- +-- On success, returns the PID as a number. +-- +-- On any failure related to reading/parsing the PID from a file, returns +-- `nil` and an error string. +-- +-- Raises an error for invalid input (wrong Lua type, target is not a valid PID +-- number, etc). +-- +---@param target kong.cmd.utils.process.target +---@return integer|nil pid +---@return nil|string error +local function get_pid(target) + local typ = type(target) + + if typ == "number" then + if not validate_pid(target) then + error("target PID must be an integer from " + .. MIN_PID .. " to " .. MAX_PID + .. ", got: " .. tostring(target), 2) + end + + return target + + elseif typ == "string" then + return pid_from_file(target) + else - log.debug("no pid file at: %s", pid_file) - return 0 + error("invalid PID type: '" .. typ .. "'", 2) + end +end + + +--- +-- Send a signal to a process. +-- +-- The signal may be specified as a name ("TERM") or integer (15). +-- +---@param target kong.cmd.utils.process.target +---@param sig resty.signal.name|integer +---@return boolean|nil ok +---@return nil|string error +local function signal(target, sig) + local pid, err = get_pid(target) + + if not pid then + return nil, err end + + return resty_kill(pid, sig) +end + + +--- +-- Send the TERM signal to a process. +-- +---@param target kong.cmd.utils.process.target +---@return boolean|nil ok +---@return nil|string error +local function term(target) + return signal(target, SIG_TERM) end -local function exists(pid_file) - -- we do our own pid_file exists check here because - -- we want to return `nil` in case of NOT running, - -- and not `0` like `kill` would return. - if pl_path.exists(pid_file) then - return kill(pid_file) == 0 + +--- +-- Send the KILL signal to a process. +-- +---@param target kong.cmd.utils.process.target +---@return boolean|nil ok +---@return nil|string error +local function kill(target) + return signal(target, SIG_KILL) +end + + +--- +-- Send the QUIT signal to a process. +-- +---@param target kong.cmd.utils.process.target +---@return boolean|nil ok +---@return nil|string error +local function quit(target) + return signal(target, SIG_QUIT) +end + + +--- +-- Send the HUP signal to a process. +-- +---@param target kong.cmd.utils.process.target +---@return boolean|nil ok +---@return nil|string error +local function hup(target) + return signal(target, SIG_HUP) +end + + +--- +-- Check for the existence of a process. +-- +-- Under the hood this sends the special `0` signal to check the process state. +-- +-- Returns a boolean if the process unequivocally exists/does not exist. +-- +-- Returns `nil` and an error string if an error is encountered while attemping +-- to read a pidfile. +-- +-- Raises an error for invalid input or upon any unexpected result returned by +-- resty.signal. +-- +-- +-- Callers should decide for themselves how strict they must be when handling +-- errors. For instance, when NGINX is starting up there is a period where the +-- pidfile may be empty or non-existent, which will result in this function +-- returning nil+error. For some callers this might be expected and acceptible, +-- but for others it may not. +-- +---@param target kong.cmd.utils.process.target +---@return boolean|nil exists +---@return nil|string error +local function exists(target) + local pid, err = get_pid(target) + if not pid then + return nil, err + end + + local ok + ok, err = resty_kill(pid, 0) + + if ok then + return true + + elseif err == "No such process" then + return false + + elseif err == "Operation not permitted" then + -- the process *does* exist but is not manageable by us + return true end + + error(err or "unexpected error from resty.signal.kill()") end + return { - kill = kill, exists = exists, + pid_from_file = pid_from_file, + signal = signal, + pid = get_pid, + + term = term, + kill = kill, + quit = quit, + hup = hup, + + SIG_NONE = SIG_NONE, + SIG_HUP = SIG_HUP, + SIG_INT = SIG_INT, + SIG_QUIT = SIG_QUIT, + SIG_ILL = SIG_ILL, + SIG_TRAP = SIG_TRAP, + SIG_ABRT = SIG_ABRT, + SIG_BUS = SIG_BUS, + SIG_FPE = SIG_FPE, + SIG_KILL = SIG_KILL, + SIG_USR1 = SIG_USR1, + SIG_SEGV = SIG_SEGV, + SIG_USR2 = SIG_USR2, + SIG_PIPE = SIG_PIPE, + SIG_ALRM = SIG_ALRM, + SIG_TERM = SIG_TERM, + SIG_CHLD = SIG_CHLD, + SIG_CONT = SIG_CONT, + SIG_STOP = SIG_STOP, + SIG_TSTP = SIG_TSTP, + SIG_TTIN = SIG_TTIN, + SIG_TTOU = SIG_TTOU, + SIG_URG = SIG_URG, + SIG_XCPU = SIG_XCPU, + SIG_XFSZ = SIG_XFSZ, + SIG_VTALRM = SIG_VTALRM, + SIG_PROF = SIG_PROF, + SIG_WINCH = SIG_WINCH, + SIG_IO = SIG_IO, + SIG_PWR = SIG_PWR, + SIG_EMT = SIG_EMT, + SIG_SYS = SIG_SYS, + SIG_INFO = SIG_INFO, } diff --git a/spec/02-integration/02-cmd/13-signals_spec.lua b/spec/02-integration/02-cmd/13-signals_spec.lua index 1d709c00bfa1..7d28a1698ad4 100644 --- a/spec/02-integration/02-cmd/13-signals_spec.lua +++ b/spec/02-integration/02-cmd/13-signals_spec.lua @@ -1,4 +1,5 @@ local helpers = require "spec.helpers" +local process = require "kong.cmd.utils.process" describe("signals", function() @@ -13,57 +14,49 @@ describe("signals", function() it("can receive USR1", function() assert(helpers.start_kong()) - helpers.signal(nil, "-USR1") + helpers.signal(nil, process.SIG_USR1) - assert - .eventually(function () - local conf = helpers.get_running_conf() - local _, code = helpers.execute("grep -F '(SIGUSR1) received from' " .. - conf.nginx_err_logs, true) - assert.equal(0, code) - end) - .with_timeout(15) - .has_no_error() + assert.logfile().has.line('(SIGUSR1) received from', true) end) it("can receive USR2", function() assert(helpers.start_kong()) local conf = helpers.get_running_conf() + local pid_f = conf.nginx_pid local oldpid_f = conf.nginx_pid .. ".oldbin" finally(function() - ngx.sleep(0.5) - helpers.signal(nil, "-TERM") - helpers.signal(nil, "-TERM", oldpid_f) + process.term(pid_f) + process.term(oldpid_f) end) - helpers.signal(nil, "-USR2") + process.signal(pid_f, process.SIG_USR2) helpers.pwait_until(function() -- USR2 received assert.logfile().has.line('(SIGUSR2) received from', true) -- USR2 succeeded - assert.logfile().has.no.line('execve() failed', true) + assert.logfile().has.no.line('execve() failed', true, 0) assert.logfile().has.line('start new binary process', true) -- new master started successfully - assert.logfile().has.no.line('exited with code 1', true) + assert.logfile().has.no.line('exited with code 1', true, 0) -- 2 master processes - assert.is_true(helpers.path.isfile(oldpid_f)) + assert.truthy(process.pid_from_file(oldpid_f)) end) -- quit old master - helpers.signal(nil, "-QUIT", oldpid_f) + process.quit(oldpid_f) helpers.wait_pid(oldpid_f) assert.is_false(helpers.path.isfile(oldpid_f)) helpers.pwait_until(function () - assert.is_true(helpers.path.isfile(conf.nginx_pid)) + assert.truthy(process.pid_from_file(pid_f)) -- new master running - assert.equal(0, helpers.signal(nil, "-0")) + assert.is_true(process.exists(pid_f)) end) end) end) diff --git a/spec/02-integration/02-cmd/15-utils_spec.lua b/spec/02-integration/02-cmd/15-utils_spec.lua index 81a7b5489de1..fe0063221d06 100644 --- a/spec/02-integration/02-cmd/15-utils_spec.lua +++ b/spec/02-integration/02-cmd/15-utils_spec.lua @@ -1,75 +1,388 @@ local signals = require "kong.cmd.utils.nginx_signals" +local process = require "kong.cmd.utils.process" local pl_path = require "pl.path" local pl_file = require "pl.file" local pl_dir = require "pl.dir" +local pipe = require "ngx.pipe" -describe("kong cli utils", function() +math.randomseed(ngx.worker.pid() + ngx.now()) - describe("nginx_signals", function() - - describe("find_nginx_bin()", function() - local tmpdir - before_each(function() - tmpdir = pl_path.tmpname() - assert(os.remove(tmpdir)) - end) +describe("kong.cmd.utils.nginx_signals", function() + describe("find_nginx_bin()", function() + local tmpdir + before_each(function() + tmpdir = pl_path.tmpname() + assert(os.remove(tmpdir)) + end) - after_each(function() - pcall(pl_dir.rmtree, tmpdir) - end) + after_each(function() + pcall(pl_dir.rmtree, tmpdir) + end) - local function fake_nginx_binary(version) - local bin_dir = pl_path.join(tmpdir, "nginx/sbin") - pl_dir.makepath(bin_dir) + local function fake_nginx_binary(version) + local bin_dir = pl_path.join(tmpdir, "nginx/sbin") + pl_dir.makepath(bin_dir) - local nginx = pl_path.join(bin_dir, "nginx") - pl_file.write(nginx, string.format( - [[#!/bin/sh + local nginx = pl_path.join(bin_dir, "nginx") + pl_file.write(nginx, string.format( + [[#!/bin/sh echo 'nginx version: openresty/%s' >&2]], version - )) + )) + + assert(os.execute("chmod +x " .. nginx)) + + return nginx + end + + + it("works with empty/unset input", function() + local bin, err = signals.find_nginx_bin() + assert.is_nil(err) + assert.matches("sbin/nginx", bin) + assert.truthy(pl_path.exists(bin)) + end) + + it("works when openresty_path is unset", function() + local bin, err = signals.find_nginx_bin({}) + assert.is_nil(err) + assert.matches("sbin/nginx", bin) + assert.truthy(pl_path.exists(bin)) + end) + + it("prefers `openresty_path` when supplied", function() + local meta = require "kong.meta" + local version = meta._DEPENDENCIES.nginx[1] + + local nginx = fake_nginx_binary(version) + + local bin, err = signals.find_nginx_bin({ openresty_path = tmpdir }) + + assert.is_nil(err) + assert.equals(nginx, bin) + end) + + it("returns nil+error if a compatible nginx bin is not found in `openresty_path`", function() + fake_nginx_binary("1.0.1") + local bin, err = signals.find_nginx_bin({ openresty_path = tmpdir }) + assert.is_nil(bin) + assert.not_nil(err) + assert.matches("could not find OpenResty", err) + end) + end) +end) + +describe("kong.cmd.utils.process", function() + local pid_file + + before_each(function() + pid_file = assert.truthy(pl_path.tmpname()) + end) + + after_each(function() + if pid_file and pl_path.exists(pid_file) then + assert(os.remove(pid_file)) + end + end) + + describe("pid()", function() + it("accepts a number", function() + local pid, err = process.pid(123) + assert.is_nil(err) + assert.equals(123, pid) + end) + + it("accepts a pid filename", function() + assert.truthy(pl_file.write(pid_file, "1234")) + local pid, err = process.pid(pid_file) + assert.is_nil(err) + assert.equals(1234, pid) + end) + + it("throws an error() for invalid input types", function() + local exp = "invalid PID type: " + + assert.error_matches(function() process.pid(nil) end, exp) + + assert.error_matches(function() process.pid({}) end, exp) + + assert.error_matches(function() process.pid(ngx.null) end, exp) + + assert.error_matches(function() process.pid(true) end, exp) + end) - assert(os.execute("chmod +x " .. nginx)) + it("throws an error() for invalid PID numbers", function() + local exp = "target PID must be an integer from " - return nginx + assert.error_matches(function() process.pid(-1) end, exp) + + assert.error_matches(function() process.pid(0) end, exp) + + assert.error_matches(function() process.pid(1.000000001) end, exp) + + assert.error_matches(function() process.pid(math.pi) end, exp) + end) + end) + + describe("pid_from_file()", function() + it("reads pid from a file", function() + assert.truthy(pl_file.write(pid_file, "1234")) + local pid, err = process.pid_from_file(pid_file) + assert.is_nil(err) + assert.equals(1234, pid) + end) + + it("trims whitespace from the file contents", function() + assert.truthy(pl_file.write(pid_file, "1234\n")) + local pid, err = process.pid_from_file(pid_file) + assert.is_nil(err) + assert.equals(1234, pid) + end) + + it("returns nil+error on filesystem errors", function() + if pl_path.exists(pid_file) then + assert.truthy(os.remove(pid_file)) end + local pid, err = process.pid_from_file(pid_file) + assert.is_nil(pid) + assert.matches("failed reading PID file: ", err) + end) + + it("returns nil+error for non-file input", function() + local pid, err = process.pid_from_file("/") + assert.is_nil(pid) + assert.is_string(err) + end) + + it("returns nil+error if the pid file is empty", function() + local exp = "PID file is empty" + + local pid, err = process.pid_from_file(pid_file) + assert.is_nil(pid) + assert.same(exp, err) + + -- whitespace trimming applies before empty check + assert.truthy(pl_file.write(pid_file, " \n")) + pid, err = process.pid_from_file(pid_file) + assert.is_nil(pid) + assert.same(exp, err) + end) + + it("returns nil+error if the pid file contents are invalid", function() + local exp = "file does not contain a valid PID" + + assert.truthy(pl_file.write(pid_file, "not a pid\n")) + local pid, err = process.pid_from_file(pid_file) + assert.is_nil(pid) + assert.same(exp, err) + + assert.truthy(pl_file.write(pid_file, "-1.23")) + pid, err = process.pid_from_file(pid_file) + assert.is_nil(pid) + assert.same(exp, err) + end) + end) - it("works with empty/unset input", function() - local bin, err = signals.find_nginx_bin() - assert.is_nil(err) - assert.matches("sbin/nginx", bin) - assert.truthy(pl_path.exists(bin)) - end) + describe("exists()", function() + it("returns true for a pid of a running process", function() + local exists, err = process.exists(ngx.worker.pid()) + assert.is_nil(err) + assert.is_true(exists) + end) - it("works when openresty_path is unset", function() - local bin, err = signals.find_nginx_bin({}) - assert.is_nil(err) - assert.matches("sbin/nginx", bin) - assert.truthy(pl_path.exists(bin)) - end) + it("returns true for a pid file of a running process", function() + assert.truthy(pl_file.write(pid_file, tostring(ngx.worker.pid()))) + local exists, err = process.exists(pid_file) + assert.is_nil(err) + assert.is_true(exists) + end) + + it("returns false for the pid of a non-existent process", function() + local exists, err + + for _ = 1, 1000 do + local pid = math.random(1000, 2^16) + exists, err = process.exists(pid) + if exists == false then + break + end + end - it("prefers `openresty_path` when supplied", function() - local meta = require "kong.meta" - local version = meta._DEPENDENCIES.nginx[1] + assert.is_nil(err) + assert.is_false(exists) + end) - local nginx = fake_nginx_binary(version) + it("returns false for the pid file of a non-existent process", function() + local exists, err - local bin, err = signals.find_nginx_bin({ openresty_path = tmpdir }) + for _ = 1, 1000 do + local pid = math.random(1000, 2^16) + assert.truthy(pl_file.write(pid_file, tostring(pid))) + exists, err = process.exists(pid_file) + if exists == false then + break + end + end - assert.is_nil(err) - assert.equals(nginx, bin) - end) + assert.is_nil(err) + assert.is_false(exists) + end) - it("returns nil+error if a compatible nginx bin is not found in `openresty_path`", function() - fake_nginx_binary("1.0.1") - local bin, err = signals.find_nginx_bin({ openresty_path = tmpdir }) - assert.is_nil(bin) - assert.not_nil(err) - assert.matches("could not find OpenResty", err) - end) + it("returns nil+error when a pid file does not exist", function() + if pl_path.exists(pid_file) then + assert.truthy(os.remove(pid_file)) + end + local pid, err = process.exists(pid_file) + assert.is_nil(pid) + assert.matches("failed reading PID file", err) end) + it("returns nil+error when a pid file does not contain a valid pid", function() + assert.truthy(pl_file.write(pid_file, "nope\n")) + local pid, err = process.exists(pid_file) + assert.is_nil(pid) + assert.same("file does not contain a valid PID", err) + end) end) + describe("signal()", function() + local proc + + local function spawn() + local err + proc, err = pipe.spawn({ "sleep", "60" }, { + write_timeout = 100, + stdout_read_timeout = 100, + stderr_read_timeout = 100, + wait_timeout = 1000, + }) + assert.is_nil(err) + assert.not_nil(proc) + + assert.truthy(proc:shutdown("stdin")) + + local pid = proc:pid() + + -- There is a non-zero amount of time involved in starting up our + -- child process (before the sleep executable is invoked and nanosleep() + -- is called). + -- + -- During this time, signals may be ignored, so we must delay until we + -- are certain the process is in the interruptible sleep state (S). + + if pl_path.isdir("/proc/self") then + local stat_file + + local function is_sleeping() + stat_file = stat_file or io.open("/proc/" .. pid .. "/stat") + if not stat_file then return false end + + stat_file:seek("set", 0) + + local stat = stat_file:read("*a") + if not stat then return false end + + -- see the proc(5) man page for the details on this format + -- (section: /proc/pid/stat) + local state = stat:match("^%d+ [^%s]+ (%w) .*") + return state and state == "S" + end + + local deadline = ngx.now() + 2 + + while ngx.now() < deadline and not is_sleeping() do + ngx.sleep(0.05) + end + + if stat_file then stat_file:close() end + + else + -- the proc filesystem is not available, so all we can really do is + -- delay for some amount of time and hope it's enough + ngx.sleep(0.5) + end + + assert.is_true(process.exists(pid), "failed to spawn process") + + return proc + end + + after_each(function() + if proc then + pcall(proc.kill, proc, process.SIG_KILL) + pcall(proc.wait, proc) + end + end) + + it("sends a signal to a running process, using a pid", function() + spawn() + local pid = proc:pid() + + local ok, err = process.signal(pid, process.SIG_TERM) + assert.is_nil(err) + assert.truthy(ok) + + local reason, status + ok, reason, status = proc:wait() + assert.falsy(ok) + assert.equals("signal", reason) + assert.equals(15, status) + + assert.is_false(process.exists(pid)) + end) + + it("sends a signal to a running process, using a pid file", function() + spawn() + local pid = proc:pid() + + assert.truthy(pl_file.write(pid_file, tostring(pid))) + + local ok, err = process.signal(pid_file, process.SIG_TERM) + assert.is_nil(err) + assert.truthy(ok) + + local reason, status + ok, reason, status = proc:wait() + assert.falsy(ok) + assert.equals("signal", reason) + assert.equals(15, status) + + assert.is_false(process.exists(pid_file)) + end) + + it("returns nil+error for a non-existent process", function() + local ok, err + + for _ = 1, 1000 do + local pid = math.random(1000, 2^16) + ok, err = process.signal(pid, process.SIG_NONE) + if ok == nil and err == "No such process" then + break + end + end + + assert.is_nil(ok) + assert.is_string(err) + assert.equals("No such process", err) + end) + + it("accepts a signal name in place of signum", function() + spawn() + local pid = proc:pid() + + local ok, err = process.signal(pid, "INT") + assert.is_nil(err) + assert.truthy(ok) + + local reason, status + ok, reason, status = proc:wait() + assert.falsy(ok) + assert.equals("signal", reason) + assert.equals(2, status) + + assert.is_false(process.exists(pid)) + end) + + end) end) diff --git a/spec/helpers.lua b/spec/helpers.lua index 82f61cada58e..f92902990fc4 100644 --- a/spec/helpers.lua +++ b/spec/helpers.lua @@ -75,6 +75,8 @@ local https_server = require "spec.fixtures.https_server" local stress_generator = require "spec.fixtures.stress_generator" local resty_signal = require "resty.signal" local lfs = require "lfs" +local process = require "kong.cmd.utils.process" + ffi.cdef [[ int setenv(const char *name, const char *value, int overwrite); @@ -3636,7 +3638,7 @@ end -- @return true or nil+err local function stop_kong(prefix, preserve_prefix, preserve_dc, signal, nowait) prefix = prefix or conf.prefix - signal = signal or "TERM" + signal = signal or process.SIG_TERM local running_conf, err = get_running_conf(prefix) if not running_conf then @@ -4045,15 +4047,13 @@ end -- Only use in CLI tests from spec/02-integration/01-cmd kill_all = function(prefix, timeout) - local process = require "kong.cmd.utils.process" - local running_conf = get_running_conf(prefix) if not running_conf then return end -- kill kong_tests.conf service local pid_path = running_conf.nginx_pid if pl_path.exists(pid_path) then - process.kill(pid_path, "-TERM") + process.term(pid_path) wait_pid(pid_path, timeout) end end, @@ -4069,8 +4069,6 @@ end end, signal = function(prefix, signal, pid_path) - local process = require "kong.cmd.utils.process" - if not pid_path then local running_conf = get_running_conf(prefix) if not running_conf then @@ -4080,7 +4078,7 @@ end pid_path = running_conf.nginx_pid end - return process.kill(pid_path, signal) + return process.signal(pid_path, signal) end, -- send signal to all Nginx workers, not including the master signal_workers = function(prefix, signal, pid_path) From bd3d4289179ed9488bc272125a2876706834de78 Mon Sep 17 00:00:00 2001 From: Michael Martin Date: Wed, 9 Aug 2023 18:16:41 -0700 Subject: [PATCH 4/6] chore(cmd): remove redundant process check --- bin/kong-health | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/kong-health b/bin/kong-health index d7176014e44f..eb99fc251f6c 100755 --- a/bin/kong-health +++ b/bin/kong-health @@ -42,7 +42,6 @@ local function execute(args) print("") local pid_file = pl_path.join(prefix, "pids", "nginx.pid") - process.exists(pid_file) assert(process.exists(pid_file), "Kong is not running at " .. prefix) print("Kong is healthy at ", prefix) end From 1c6f3a05543643c6550ea5599d9f1bf9bf7da3aa Mon Sep 17 00:00:00 2001 From: Michael Martin Date: Mon, 14 Aug 2023 15:36:18 -0700 Subject: [PATCH 5/6] tests(helpers): fix flaky http_mock PID file test --- .../01-helpers/03-http_mock_spec.lua | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/spec/02-integration/01-helpers/03-http_mock_spec.lua b/spec/02-integration/01-helpers/03-http_mock_spec.lua index 6ffb0770cde8..bfd9de86b740 100644 --- a/spec/02-integration/01-helpers/03-http_mock_spec.lua +++ b/spec/02-integration/01-helpers/03-http_mock_spec.lua @@ -1,6 +1,6 @@ local http_mock = require "spec.helpers.http_mock" local tapping = require "spec.helpers.http_mock.tapping" -local pl_file = require "pl.file" +local process = require "kong.cmd.utils.process" for _, tls in ipairs {true, false} do describe("http_mock with " .. (tls and "https" or "http") , function() @@ -217,8 +217,16 @@ describe("http_mock config", function() end) local pid_filename = mock_prefix .. "/logs/nginx.pid" - - assert(pl_file.access_time(pid_filename) ~= nil, "mocking not in the correct place") + assert + .eventually(function() + local pid, err = process.pid_from_file(pid_filename) + if not pid then + return nil, "failed to get PID from " .. pid_filename .. ": " .. err + end + + return process.exists(pid) + end) + .is_truthy("mocking not in the correct place") end) it("init_by_lua_block inject", function () From 2d268adda33752f07cd68e4caa9fcc64d65370a0 Mon Sep 17 00:00:00 2001 From: Michael Martin Date: Tue, 15 Aug 2023 09:09:38 -0700 Subject: [PATCH 6/6] chore(cmd): return error on signal sending failures --- kong/cmd/utils/nginx_signals.lua | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kong/cmd/utils/nginx_signals.lua b/kong/cmd/utils/nginx_signals.lua index f3adaeeb759b..7b76b8845be6 100644 --- a/kong/cmd/utils/nginx_signals.lua +++ b/kong/cmd/utils/nginx_signals.lua @@ -63,8 +63,9 @@ local function send_signal(kong_conf, signal) log.verbose("sending %s signal to nginx running at %s", signal, kong_conf.nginx_pid) - if not process.signal(pid, signal) then - return nil, "could not send signal" + local ok, err = process.signal(pid, signal) + if not ok then + return nil, fmt("could not send signal: %s", err or "unknown error") end return true