Skip to content

Commit

Permalink
tests(integration): avoid passive health check flakiness (#10881)
Browse files Browse the repository at this point in the history
* style(spec): code formatting change

* tests(integration): avoid health check flakiness

* tests(integration): more health check flakiness fixes

* tests(integration): fix health check flakiness in stream test

(cherry picked from commit 9ceaddb)
  • Loading branch information
locao committed Jun 5, 2023
1 parent 59a21fc commit 41a7643
Showing 1 changed file with 70 additions and 28 deletions.
98 changes: 70 additions & 28 deletions spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1169,9 +1169,24 @@ for _, strategy in helpers.each_strategy() do

-- Go hit them with our test requests
local client_oks1, client_fails1 = bu.client_requests(bu.SLOTS, api_host)

-- set server2 unhealthy
assert(bu.direct_request(localhost, port2, "/unhealthy"))
assert
.with_timeout(5)
.eventually(function()
local client = helpers.http_client(localhost, port2)
local res = assert(client:send({
method = "GET",
path = "/status", })
)
client:close()
return res.status == 500
end)
.is_truthy()

-- Another test requests hit
local client_oks2, client_fails2 = bu.client_requests(bu.SLOTS, api_host)

local client_oks = client_oks1 + client_oks2
local client_fails = client_fails1 + client_fails2

Expand Down Expand Up @@ -1222,21 +1237,38 @@ for _, strategy in helpers.each_strategy() do

bu.end_testcase_setup(strategy, bp)

local requests = bu.SLOTS * 2 -- go round the balancer twice

-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server2_oks = math.floor(requests / 4)
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()

-- set test parameters
local req_burst = 10
local total_requests = req_burst * 2
local target2_reqs = req_burst / 2
local target1_reqs = total_requests - target2_reqs
local accepted_var = 0.3

-- Go hit them with our test requests
local client_oks1, client_fails1 = bu.client_requests(bu.SLOTS, api_host)
local client_oks1, client_fails1 = bu.client_requests(req_burst, api_host)
assert(bu.direct_request(localhost, port2, "/unhealthy"))
local client_oks2, client_fails2 = bu.client_requests(bu.SLOTS, api_host)
assert
.with_timeout(5)
.eventually(function()
local client = helpers.http_client(localhost, port2)
local res = assert(client:send({
method = "GET",
path = "/status", })
)
client:close()
return res.status == 500
end)
.is_truthy()

local client_oks2, client_fails2 = bu.client_requests(req_burst, api_host)

local client_oks = client_oks1 + client_oks2
local client_fails = client_fails1 + client_fails2
Expand All @@ -1246,12 +1278,12 @@ for _, strategy in helpers.each_strategy() do
local count2 = server2:shutdown()

-- verify
assert.are.equal(requests - server2_oks - nfails, count1.ok)
assert.are.equal(server2_oks, count2.ok)
assert.near(target1_reqs, count1.ok, target1_reqs * accepted_var)
assert.near(target2_reqs, count2.ok, target2_reqs * accepted_var)
assert.are.equal(0, count1.fail)
assert.are.equal(nfails, count2.fail)

assert.are.equal(client_oks, requests)
assert.are.equal(client_oks, total_requests)
assert.are.equal(0, client_fails)
end
end)
Expand Down Expand Up @@ -1815,14 +1847,14 @@ for _, strategy in helpers.each_strategy() do
})
bu.end_testcase_setup(strategy, bp)

-- 1) server1 and server2 take requests
-- 1. server1 and server2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)
-- server2 goes unhealthy
assert(bu.direct_request(localhost, port2, "/unhealthy", protocol, hostname))
-- Wait until healthchecker detects
bu.poll_wait_address_health(upstream_id, hostname, port1, localhost, port2, "UNHEALTHY")

-- 2) server1 takes all requests
-- 2. server1 takes all requests
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
Expand All @@ -1834,7 +1866,7 @@ for _, strategy in helpers.each_strategy() do
-- Give time for healthchecker to detect
bu.poll_wait_address_health(upstream_id, hostname, port1, localhost, port2, "HEALTHY")

-- 3) server1 and server2 take requests again
-- 3. server1 and server2 take requests again
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
Expand Down Expand Up @@ -1921,17 +1953,24 @@ for _, strategy in helpers.each_strategy() do

bu.end_testcase_setup(strategy, bp)

-- 1) target1 and target2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)
-- set test parameters
local req_burst = 10
local total_requests = req_burst * 3
local target1_reqs = req_burst * 2
local target2_reqs = req_burst
local accepted_var = 0.3

-- 1. target1 and target2 take requests
local oks, fails = bu.client_requests(req_burst, api_host)

-- target2 goes unhealthy
assert(bu.direct_request(localhost, port1, "/unhealthy", protocol, "target2.test"))
-- Wait until healthchecker detects
bu.poll_wait_health(upstream_id, "target2.test", port1, "UNHEALTHY")

-- 2) target1 takes all requests
-- 2. target1 takes all requests
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
local o, f = bu.client_requests(req_burst, api_host)
oks = oks + o
fails = fails + f
end
Expand All @@ -1941,21 +1980,21 @@ for _, strategy in helpers.each_strategy() do
-- Give time for healthchecker to detect
bu.poll_wait_health(upstream_id, "target2.test", port1, "HEALTHY")

-- 3) server1 and server2 take requests again
-- 3. server1 and server2 take requests again
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
local o, f = bu.client_requests(req_burst, api_host)
oks = oks + o
fails = fails + f
end

-- collect server results; hitcount
local results = server1:shutdown()
---- verify
assert.are.equal(bu.SLOTS * 2, results["target1.test"].ok)
assert.are.equal(bu.SLOTS, results["target2.test"].ok)
assert.near(target1_reqs, results["target1.test"].ok, target1_reqs * accepted_var)
assert.near(target2_reqs, results["target2.test"].ok, target2_reqs * accepted_var)
assert.are.equal(0, results["target1.test"].fail)
assert.are.equal(0, results["target1.test"].fail)
assert.are.equal(bu.SLOTS * 3, oks)
assert.are.equal(total_requests, oks)
assert.are.equal(0, fails)
end
end)
Expand Down Expand Up @@ -2132,7 +2171,7 @@ for _, strategy in helpers.each_strategy() do
server1:start()
server2:start()

-- 1) server1 and server2 take requests
-- 1. server1 and server2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)

-- manually bring it down using the endpoint
Expand All @@ -2145,7 +2184,7 @@ for _, strategy in helpers.each_strategy() do
bu.poll_wait_health(upstream_id, localhost, port2, "UNHEALTHY")
end

-- 2) server1 takes all requests
-- 2. server1 takes all requests
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
Expand All @@ -2162,7 +2201,7 @@ for _, strategy in helpers.each_strategy() do
bu.poll_wait_health(upstream_id, localhost, port2, "HEALTHY")
end

-- 3) server1 and server2 take requests again
-- 3. server1 and server2 take requests again
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
Expand Down Expand Up @@ -2218,12 +2257,12 @@ for _, strategy in helpers.each_strategy() do

ngx.sleep(bu.CONSISTENCY_FREQ) -- wait for proxy state consistency timer

-- 1) server1 and server2 take requests
-- 1. server1 and server2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)

assert(bu.direct_request(localhost, port2, "/timeout"))

-- 2) server1 takes all requests once server2 produces
-- 2. server1 takes all requests once server2 produces
-- `nfails` failures (even though server2 will be ready
-- to respond 200 again after `nfails`)
do
Expand Down Expand Up @@ -2671,6 +2710,7 @@ for _, strategy in helpers.each_strategy() do
-- Then server1 will take over.
local server1_oks = bu.SLOTS * 1.5
local server2_oks = bu.SLOTS / 2
local accepted_var = 0.3
local server1 = helpers.tcp_server(port1, {
requests = server1_oks,
prefix = "1 ",
Expand All @@ -2689,8 +2729,10 @@ for _, strategy in helpers.each_strategy() do
server2:join()

-- verify
assert.are.equal(server1_oks, ok1)
assert.are.equal(server2_oks, ok2)
-- we are not testing the ring balancer, but the health check. It's OK
-- to have some variance in the number of requests each server responds.
assert.near(server1_oks, ok1, server1_oks * accepted_var)
assert.near(server2_oks, ok2, server2_oks * accepted_var)
assert.are.equal(0, fails)
end)

Expand Down

0 comments on commit 41a7643

Please sign in to comment.