From a61fe269cdb4eb1b61c46cc796231b1c54c8d541 Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 29 Sep 2021 18:33:49 +0800 Subject: [PATCH 01/16] bugfix: etcd cluster single node failure APISIX startup failure --- apisix/cli/etcd.lua | 18 ++++++++--- t/cli/test_etcd_ha.sh | 73 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 t/cli/test_etcd_ha.sh diff --git a/apisix/cli/etcd.lua b/apisix/cli/etcd.lua index 3cdaaa805876..7b3149b2f3ac 100644 --- a/apisix/cli/etcd.lua +++ b/apisix/cli/etcd.lua @@ -32,6 +32,7 @@ local tonumber = tonumber local str_format = string.format local str_sub = string.sub local table_concat = table.concat +local table_insert = table.insert local _M = {} @@ -187,6 +188,7 @@ function _M.init(env, args) end -- check the etcd cluster version + local etcd_healthy_hosts = {} for index, host in ipairs(yaml_conf.etcd.host) do local version_url = host .. "/version" local errmsg @@ -207,8 +209,8 @@ function _M.init(env, args) end if not res then - errmsg = str_format("request etcd endpoint \'%s\' error, %s\n", version_url, err) - util.die(errmsg) + print(str_format("request etcd endpoint \'%s\' error, %s\n", version_url, err)) + goto continue end local body, _, err = dkjson.decode(res) @@ -225,10 +227,18 @@ function _M.init(env, args) env.min_etcd_version, ", please upgrade your etcd cluster\n") end + + table_insert(etcd_healthy_hosts, host) + + :: continue :: + end + + if host_count >= 2 and #etcd_healthy_hosts < 2 then + util.die("etcd cluster must have two or more healthy nodes\n") end local etcd_ok = false - for index, host in ipairs(yaml_conf.etcd.host) do + for index, host in ipairs(etcd_healthy_hosts) do local is_success = true local errmsg @@ -358,7 +368,7 @@ function _M.init(env, args) end if not etcd_ok then - util.die("none of the configured etcd works well") + util.die("none of the configured etcd works well\n") end end diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh new file mode 100644 index 000000000000..d1d6a14dde77 --- /dev/null +++ b/t/cli/test_etcd_ha.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +. ./t/cli/common.sh + +ETCD_NAME_0=etcd0 +ETCD_NAME_1=etcd1 +ETCD_NAME_2=etcd2 + +echo ' +etcd: + host: + - "http://127.0.0.1:23790" + - "http://127.0.0.1:23791" + - "http://127.0.0.1:23792" +' > conf/config.yaml + +docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml up -d + +# case 1: stop one etcd nodes +docker stop ${ETCD_NAME_0} + +make init && make run + +if [ "$?" != 0 ]; then + echo "FAIL: stop only one etcd node APISIX should start normally" + exit 1 +fi + +echo "OK: APISIX successfully to start, stop only one etcd node" + +make stop + +# case 2: stop two etcd nodes (cluster must have two or more nodes) +docker stop ${ETCD_NAME_1} + +make init && make run + +if [ "$?" == 0 ]; then + echo "FAIL: etcd has stopped two nodes, APISIX should fail to start" + exit 1 +fi + +echo "OK: APISIX failed to start, etcd cluster must have two or more healthy nodes" + + +# case 3: stop all etcd nodes +docker stop ${ETCD_NAME_2} + +make init && make run + +if [ "$?" == 0 ]; then + echo "FAIL: all etcd nodes have stopped, APISIX should not be able to start" + exit 1 +fi + +echo "OK: APISIX failed to start, all etcd nodes have stopped" From 6ed385a0915c68311b11cb67248e97692956972a Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 29 Sep 2021 18:47:38 +0800 Subject: [PATCH 02/16] test: update case description --- t/cli/test_etcd_ha.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh index d1d6a14dde77..c1121d806ac6 100644 --- a/t/cli/test_etcd_ha.sh +++ b/t/cli/test_etcd_ha.sh @@ -33,7 +33,7 @@ etcd: docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml up -d -# case 1: stop one etcd nodes +# case 1: stop one etcd nodes (result: start successful) docker stop ${ETCD_NAME_0} make init && make run @@ -47,7 +47,7 @@ echo "OK: APISIX successfully to start, stop only one etcd node" make stop -# case 2: stop two etcd nodes (cluster must have two or more nodes) +# case 2: stop two etcd nodes (result: start failure) docker stop ${ETCD_NAME_1} make init && make run @@ -60,7 +60,7 @@ fi echo "OK: APISIX failed to start, etcd cluster must have two or more healthy nodes" -# case 3: stop all etcd nodes +# case 3: stop all etcd nodes (result: start failure) docker stop ${ETCD_NAME_2} make init && make run @@ -71,3 +71,6 @@ if [ "$?" == 0 ]; then fi echo "OK: APISIX failed to start, all etcd nodes have stopped" + +# stop etcd docker container +docker-compose down From 5b8c11ac974851d16f391151c74d19f0336b73be Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 29 Sep 2021 19:39:48 +0800 Subject: [PATCH 03/16] style: modify file execution permissions --- t/cli/test_etcd_ha.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 t/cli/test_etcd_ha.sh diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh old mode 100644 new mode 100755 From bfe7b2c343dfbc029fab31033c9d97275d81e072 Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 29 Sep 2021 22:41:41 +0800 Subject: [PATCH 04/16] test: update test case --- apisix/cli/etcd.lua | 4 ++++ t/cli/test_etcd_ha.sh | 36 +++++++++++++++--------------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/apisix/cli/etcd.lua b/apisix/cli/etcd.lua index 7b3149b2f3ac..f279ccce878d 100644 --- a/apisix/cli/etcd.lua +++ b/apisix/cli/etcd.lua @@ -233,6 +233,10 @@ function _M.init(env, args) :: continue :: end + if #etcd_healthy_hosts <= 0 then + util.die("all etcd nodes are unavailable\n") + end + if host_count >= 2 and #etcd_healthy_hosts < 2 then util.die("etcd cluster must have two or more healthy nodes\n") end diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh index c1121d806ac6..a6e73f2941c2 100755 --- a/t/cli/test_etcd_ha.sh +++ b/t/cli/test_etcd_ha.sh @@ -36,41 +36,35 @@ docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml up -d # case 1: stop one etcd nodes (result: start successful) docker stop ${ETCD_NAME_0} -make init && make run - -if [ "$?" != 0 ]; then - echo "FAIL: stop only one etcd node APISIX should start normally" +out=$(make init 2>&1) +if echo "$out" | grep "23790" | grep "connection refused"; then + echo "passed: APISIX successfully to start, stop only one etcd node" +else + echo "failed: stop only one etcd node APISIX should start normally" exit 1 fi -echo "OK: APISIX successfully to start, stop only one etcd node" - -make stop - # case 2: stop two etcd nodes (result: start failure) docker stop ${ETCD_NAME_1} -make init && make run - -if [ "$?" == 0 ]; then - echo "FAIL: etcd has stopped two nodes, APISIX should fail to start" +out=$(make init 2>&1) +if echo "$out" | grep "etcd cluster must have two or more healthy nodes"; then + echo "passed: APISIX failed to start, etcd cluster must have two or more healthy nodes" +else + echo "failed: etcd has stopped two nodes, APISIX should fail to start" exit 1 fi -echo "OK: APISIX failed to start, etcd cluster must have two or more healthy nodes" - - # case 3: stop all etcd nodes (result: start failure) docker stop ${ETCD_NAME_2} -make init && make run - -if [ "$?" == 0 ]; then - echo "FAIL: all etcd nodes have stopped, APISIX should not be able to start" +out=$(make init 2>&1) +if echo "$out" | grep "all etcd nodes are unavailable"; then + echo "passed: APISIX failed to start, all etcd nodes have stopped" +else + echo "failed: all etcd nodes have stopped, APISIX should not be able to start" exit 1 fi -echo "OK: APISIX failed to start, all etcd nodes have stopped" - # stop etcd docker container docker-compose down From 2bb38c8b8fced6e113e71db11ebf5d1a2bd5ecce Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 29 Sep 2021 23:10:32 +0800 Subject: [PATCH 05/16] fix: test case ci execute exception --- t/cli/test_etcd_ha.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh index a6e73f2941c2..b3860ba1a133 100755 --- a/t/cli/test_etcd_ha.sh +++ b/t/cli/test_etcd_ha.sh @@ -17,8 +17,6 @@ # limitations under the License. # -. ./t/cli/common.sh - ETCD_NAME_0=etcd0 ETCD_NAME_1=etcd1 ETCD_NAME_2=etcd2 From 8b9d467d0f1cb9286566cfa371b5c48c50e252e9 Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 29 Sep 2021 23:22:20 +0800 Subject: [PATCH 06/16] test: add sleep command to etcd ha case --- t/cli/test_etcd_ha.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh index b3860ba1a133..6037511105dd 100755 --- a/t/cli/test_etcd_ha.sh +++ b/t/cli/test_etcd_ha.sh @@ -33,6 +33,7 @@ docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml up -d # case 1: stop one etcd nodes (result: start successful) docker stop ${ETCD_NAME_0} +sleep 3 out=$(make init 2>&1) if echo "$out" | grep "23790" | grep "connection refused"; then @@ -44,6 +45,7 @@ fi # case 2: stop two etcd nodes (result: start failure) docker stop ${ETCD_NAME_1} +sleep 3 out=$(make init 2>&1) if echo "$out" | grep "etcd cluster must have two or more healthy nodes"; then @@ -55,6 +57,7 @@ fi # case 3: stop all etcd nodes (result: start failure) docker stop ${ETCD_NAME_2} +sleep 3 out=$(make init 2>&1) if echo "$out" | grep "all etcd nodes are unavailable"; then From 644e7029bb72f79ad0bcb48a7731378a7710826c Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 29 Sep 2021 23:38:20 +0800 Subject: [PATCH 07/16] test: update etcd ha case --- t/cli/test_etcd_ha.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh index 6037511105dd..501b9646cea0 100755 --- a/t/cli/test_etcd_ha.sh +++ b/t/cli/test_etcd_ha.sh @@ -33,7 +33,6 @@ docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml up -d # case 1: stop one etcd nodes (result: start successful) docker stop ${ETCD_NAME_0} -sleep 3 out=$(make init 2>&1) if echo "$out" | grep "23790" | grep "connection refused"; then @@ -45,10 +44,9 @@ fi # case 2: stop two etcd nodes (result: start failure) docker stop ${ETCD_NAME_1} -sleep 3 out=$(make init 2>&1) -if echo "$out" | grep "etcd cluster must have two or more healthy nodes"; then +if echo "$out" | grep "23791" | grep "connection refused"; then echo "passed: APISIX failed to start, etcd cluster must have two or more healthy nodes" else echo "failed: etcd has stopped two nodes, APISIX should fail to start" @@ -57,10 +55,9 @@ fi # case 3: stop all etcd nodes (result: start failure) docker stop ${ETCD_NAME_2} -sleep 3 out=$(make init 2>&1) -if echo "$out" | grep "all etcd nodes are unavailable"; then +if echo "$out" | grep "23792" | grep "connection refused"; then echo "passed: APISIX failed to start, all etcd nodes have stopped" else echo "failed: all etcd nodes have stopped, APISIX should not be able to start" From 01a30e044bfff154a3d7cac9a2823c30f9b1cd65 Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 29 Sep 2021 23:45:56 +0800 Subject: [PATCH 08/16] test: add uninstall etcd docker images for etcd ha case --- t/cli/test_etcd_ha.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh index 501b9646cea0..2be59d31d30e 100755 --- a/t/cli/test_etcd_ha.sh +++ b/t/cli/test_etcd_ha.sh @@ -65,4 +65,4 @@ else fi # stop etcd docker container -docker-compose down +docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml down From 26dfaa6285e721a4587e4fb680fdc93be58f558d Mon Sep 17 00:00:00 2001 From: Janko Date: Thu, 30 Sep 2021 09:22:16 +0800 Subject: [PATCH 09/16] ci: compatible with Lua 5.1 syntax --- apisix/cli/etcd.lua | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/apisix/cli/etcd.lua b/apisix/cli/etcd.lua index f279ccce878d..af5dab264155 100644 --- a/apisix/cli/etcd.lua +++ b/apisix/cli/etcd.lua @@ -208,29 +208,26 @@ function _M.init(env, args) version_url, err, retry_time)) end - if not res then - print(str_format("request etcd endpoint \'%s\' error, %s\n", version_url, err)) - goto continue - end + if res then + local body, _, err = dkjson.decode(res) + if err or (body and not body["etcdcluster"]) then + errmsg = str_format("got malformed version message: \"%s\" from etcd \"%s\"\n", res, + version_url) + util.die(errmsg) + end - local body, _, err = dkjson.decode(res) - if err or (body and not body["etcdcluster"]) then - errmsg = str_format("got malformed version message: \"%s\" from etcd \"%s\"\n", res, - version_url) - util.die(errmsg) - end + local cluster_version = body["etcdcluster"] + if compare_semantic_version(cluster_version, env.min_etcd_version) then + util.die("etcd cluster version ", cluster_version, + " is less than the required version ", + env.min_etcd_version, + ", please upgrade your etcd cluster\n") + end - local cluster_version = body["etcdcluster"] - if compare_semantic_version(cluster_version, env.min_etcd_version) then - util.die("etcd cluster version ", cluster_version, - " is less than the required version ", - env.min_etcd_version, - ", please upgrade your etcd cluster\n") + table_insert(etcd_healthy_hosts, host) + else + print(str_format("request etcd endpoint \'%s\' error, %s\n", version_url, err)) end - - table_insert(etcd_healthy_hosts, host) - - :: continue :: end if #etcd_healthy_hosts <= 0 then From bcfddf9f575562fb974267968f1bfce7f3e87f4d Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 13 Oct 2021 11:32:32 +0800 Subject: [PATCH 10/16] chore: upgrade etcd from 1.5.5 to 1.6.0 --- apisix/cli/etcd.lua | 13 ++++++------- rockspec/apisix-master-0.rockspec | 2 +- t/cli/test_etcd_ha.sh | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/apisix/cli/etcd.lua b/apisix/cli/etcd.lua index af5dab264155..1b1d52184235 100644 --- a/apisix/cli/etcd.lua +++ b/apisix/cli/etcd.lua @@ -33,6 +33,7 @@ local str_format = string.format local str_sub = string.sub local table_concat = table.concat local table_insert = table.insert +local io_stderr = io.stderr local _M = {} @@ -218,15 +219,13 @@ function _M.init(env, args) local cluster_version = body["etcdcluster"] if compare_semantic_version(cluster_version, env.min_etcd_version) then - util.die("etcd cluster version ", cluster_version, - " is less than the required version ", - env.min_etcd_version, - ", please upgrade your etcd cluster\n") + util.die("etcd cluster version ", cluster_version, " is less than the required version ", + env.min_etcd_version, ", please upgrade your etcd cluster\n") end table_insert(etcd_healthy_hosts, host) else - print(str_format("request etcd endpoint \'%s\' error, %s\n", version_url, err)) + io_stderr:write(str_format("request etcd endpoint \'%s\' error, %s\n", version_url, err)) end end @@ -234,8 +233,8 @@ function _M.init(env, args) util.die("all etcd nodes are unavailable\n") end - if host_count >= 2 and #etcd_healthy_hosts < 2 then - util.die("etcd cluster must have two or more healthy nodes\n") + if host_count >= 2 and (#etcd_healthy_hosts / host_count * 100) < 50 then + util.die("the etcd cluster needs at least 50% and above healthy nodes\n") end local etcd_ok = false diff --git a/rockspec/apisix-master-0.rockspec b/rockspec/apisix-master-0.rockspec index d81a2de8eff9..1a0df5abd10b 100644 --- a/rockspec/apisix-master-0.rockspec +++ b/rockspec/apisix-master-0.rockspec @@ -34,7 +34,7 @@ dependencies = { "lua-resty-ctxdump = 0.1-0", "lua-resty-dns-client = 5.2.0", "lua-resty-template = 2.0", - "lua-resty-etcd = 1.5.5", + "lua-resty-etcd = 1.6.0", "api7-lua-resty-http = 0.2.0", "lua-resty-balancer = 0.02rc5", "lua-resty-ngxvar = 0.5.2", diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh index 2be59d31d30e..f8004fd11f95 100755 --- a/t/cli/test_etcd_ha.sh +++ b/t/cli/test_etcd_ha.sh @@ -31,7 +31,7 @@ etcd: docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml up -d -# case 1: stop one etcd nodes (result: start successful) +# case 1: stop one etcd node (result: start successful) docker stop ${ETCD_NAME_0} out=$(make init 2>&1) @@ -49,7 +49,7 @@ out=$(make init 2>&1) if echo "$out" | grep "23791" | grep "connection refused"; then echo "passed: APISIX failed to start, etcd cluster must have two or more healthy nodes" else - echo "failed: etcd has stopped two nodes, APISIX should fail to start" + echo "failed: two etcd nodes have been stopped, APISIX should fail to start" exit 1 fi From 4f6afdd64a680a25a4777131c21964e7da5847c5 Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 13 Oct 2021 11:51:28 +0800 Subject: [PATCH 11/16] fix: code lint --- apisix/cli/etcd.lua | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apisix/cli/etcd.lua b/apisix/cli/etcd.lua index 1b1d52184235..75a2d4b45fe3 100644 --- a/apisix/cli/etcd.lua +++ b/apisix/cli/etcd.lua @@ -219,13 +219,15 @@ function _M.init(env, args) local cluster_version = body["etcdcluster"] if compare_semantic_version(cluster_version, env.min_etcd_version) then - util.die("etcd cluster version ", cluster_version, " is less than the required version ", - env.min_etcd_version, ", please upgrade your etcd cluster\n") + errmsg = str_format("etcd cluster version %s is less than the required version %s, + please upgrade your etcd cluster\n", cluster_version, env.min_etcd_version) + util.die(errmsg) end table_insert(etcd_healthy_hosts, host) else - io_stderr:write(str_format("request etcd endpoint \'%s\' error, %s\n", version_url, err)) + io_stderr:write(str_format("request etcd endpoint \'%s\' error, %s\n", version_url, + err)) end end From a7b238ad16f672287c2529bbbf7b73e0e425c3bb Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 13 Oct 2021 12:05:12 +0800 Subject: [PATCH 12/16] fix: update code indent format --- apisix/cli/etcd.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apisix/cli/etcd.lua b/apisix/cli/etcd.lua index 75a2d4b45fe3..7af249db3f2d 100644 --- a/apisix/cli/etcd.lua +++ b/apisix/cli/etcd.lua @@ -219,9 +219,9 @@ function _M.init(env, args) local cluster_version = body["etcdcluster"] if compare_semantic_version(cluster_version, env.min_etcd_version) then - errmsg = str_format("etcd cluster version %s is less than the required version %s, - please upgrade your etcd cluster\n", cluster_version, env.min_etcd_version) - util.die(errmsg) + util.die("etcd cluster version ", cluster_version, + " is less than the required version ", env.min_etcd_version, + ", please upgrade your etcd cluster\n") end table_insert(etcd_healthy_hosts, host) From 2b75586664b6ae84f72ae30bf69fe4455b4e0c63 Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 13 Oct 2021 18:31:53 +0800 Subject: [PATCH 13/16] chore: update health check config --- apisix/core/config_etcd.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apisix/core/config_etcd.lua b/apisix/core/config_etcd.lua index f3ac321760f2..ad1edf03876a 100644 --- a/apisix/core/config_etcd.lua +++ b/apisix/core/config_etcd.lua @@ -518,7 +518,7 @@ local function _automatic_fetch(premature, self) return end - if not health_check.conf then + if not health_check.conf or not health_check.conf.shm_name then local _, err = health_check.init({ shm_name = health_check_shm_name, fail_timeout = self.health_check_timeout, From 315aba07d67ea53a1b78183c5ebdcf4a2b9eb51a Mon Sep 17 00:00:00 2001 From: Janko Date: Wed, 13 Oct 2021 22:39:30 +0800 Subject: [PATCH 14/16] chore: update etcd node calculation --- apisix/cli/etcd.lua | 6 +++--- apisix/core/config_etcd.lua | 2 +- rockspec/apisix-master-0.rockspec | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/apisix/cli/etcd.lua b/apisix/cli/etcd.lua index 7af249db3f2d..54b1879be2a1 100644 --- a/apisix/cli/etcd.lua +++ b/apisix/cli/etcd.lua @@ -220,8 +220,8 @@ function _M.init(env, args) local cluster_version = body["etcdcluster"] if compare_semantic_version(cluster_version, env.min_etcd_version) then util.die("etcd cluster version ", cluster_version, - " is less than the required version ", env.min_etcd_version, - ", please upgrade your etcd cluster\n") + " is less than the required version ", + env.min_etcd_version, ", please upgrade your etcd cluster\n") end table_insert(etcd_healthy_hosts, host) @@ -235,7 +235,7 @@ function _M.init(env, args) util.die("all etcd nodes are unavailable\n") end - if host_count >= 2 and (#etcd_healthy_hosts / host_count * 100) < 50 then + if (#etcd_healthy_hosts / host_count * 100) <= 50 then util.die("the etcd cluster needs at least 50% and above healthy nodes\n") end diff --git a/apisix/core/config_etcd.lua b/apisix/core/config_etcd.lua index ad1edf03876a..f3ac321760f2 100644 --- a/apisix/core/config_etcd.lua +++ b/apisix/core/config_etcd.lua @@ -518,7 +518,7 @@ local function _automatic_fetch(premature, self) return end - if not health_check.conf or not health_check.conf.shm_name then + if not health_check.conf then local _, err = health_check.init({ shm_name = health_check_shm_name, fail_timeout = self.health_check_timeout, diff --git a/rockspec/apisix-master-0.rockspec b/rockspec/apisix-master-0.rockspec index 1a0df5abd10b..d81a2de8eff9 100644 --- a/rockspec/apisix-master-0.rockspec +++ b/rockspec/apisix-master-0.rockspec @@ -34,7 +34,7 @@ dependencies = { "lua-resty-ctxdump = 0.1-0", "lua-resty-dns-client = 5.2.0", "lua-resty-template = 2.0", - "lua-resty-etcd = 1.6.0", + "lua-resty-etcd = 1.5.5", "api7-lua-resty-http = 0.2.0", "lua-resty-balancer = 0.02rc5", "lua-resty-ngxvar = 0.5.2", From 31abe9725ca76b677994134b8ed129395fefa0e6 Mon Sep 17 00:00:00 2001 From: Janko Date: Thu, 14 Oct 2021 11:19:04 +0800 Subject: [PATCH 15/16] chore: move case to health check and fix bad indent --- apisix/cli/etcd.lua | 4 +- t/cli/test_etcd_ha.sh | 68 ---------------------------------- t/cli/test_etcd_healthcheck.sh | 42 +++++++++++++++++++-- 3 files changed, 41 insertions(+), 73 deletions(-) delete mode 100755 t/cli/test_etcd_ha.sh diff --git a/apisix/cli/etcd.lua b/apisix/cli/etcd.lua index 54b1879be2a1..4595ec5938a5 100644 --- a/apisix/cli/etcd.lua +++ b/apisix/cli/etcd.lua @@ -220,8 +220,8 @@ function _M.init(env, args) local cluster_version = body["etcdcluster"] if compare_semantic_version(cluster_version, env.min_etcd_version) then util.die("etcd cluster version ", cluster_version, - " is less than the required version ", - env.min_etcd_version, ", please upgrade your etcd cluster\n") + " is less than the required version ", env.min_etcd_version, + ", please upgrade your etcd cluster\n") end table_insert(etcd_healthy_hosts, host) diff --git a/t/cli/test_etcd_ha.sh b/t/cli/test_etcd_ha.sh deleted file mode 100755 index f8004fd11f95..000000000000 --- a/t/cli/test_etcd_ha.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -ETCD_NAME_0=etcd0 -ETCD_NAME_1=etcd1 -ETCD_NAME_2=etcd2 - -echo ' -etcd: - host: - - "http://127.0.0.1:23790" - - "http://127.0.0.1:23791" - - "http://127.0.0.1:23792" -' > conf/config.yaml - -docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml up -d - -# case 1: stop one etcd node (result: start successful) -docker stop ${ETCD_NAME_0} - -out=$(make init 2>&1) -if echo "$out" | grep "23790" | grep "connection refused"; then - echo "passed: APISIX successfully to start, stop only one etcd node" -else - echo "failed: stop only one etcd node APISIX should start normally" - exit 1 -fi - -# case 2: stop two etcd nodes (result: start failure) -docker stop ${ETCD_NAME_1} - -out=$(make init 2>&1) -if echo "$out" | grep "23791" | grep "connection refused"; then - echo "passed: APISIX failed to start, etcd cluster must have two or more healthy nodes" -else - echo "failed: two etcd nodes have been stopped, APISIX should fail to start" - exit 1 -fi - -# case 3: stop all etcd nodes (result: start failure) -docker stop ${ETCD_NAME_2} - -out=$(make init 2>&1) -if echo "$out" | grep "23792" | grep "connection refused"; then - echo "passed: APISIX failed to start, all etcd nodes have stopped" -else - echo "failed: all etcd nodes have stopped, APISIX should not be able to start" - exit 1 -fi - -# stop etcd docker container -docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml down diff --git a/t/cli/test_etcd_healthcheck.sh b/t/cli/test_etcd_healthcheck.sh index f94b8f6f5cef..464433e2d6b9 100755 --- a/t/cli/test_etcd_healthcheck.sh +++ b/t/cli/test_etcd_healthcheck.sh @@ -40,7 +40,7 @@ etcd: docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml up -d -# Check apisix not got effected when one etcd node disconnected +# case 1: Check apisix not got effected when one etcd node disconnected make init && make run docker stop ${ETCD_NAME_0} @@ -63,7 +63,7 @@ make stop echo "passed: apisix not got effected when one etcd node disconnected" -# Check when all etcd nodes disconnected, apisix trying to reconnect with backoff, and could successfully recover when reconnected +# case 2: Check when all etcd nodes disconnected, apisix trying to reconnect with backoff, and could successfully recover when reconnected make init && make run docker stop ${ETCD_NAME_0} && docker stop ${ETCD_NAME_1} && docker stop ${ETCD_NAME_2} @@ -78,7 +78,7 @@ fi docker start ${ETCD_NAME_0} && docker start ${ETCD_NAME_1} && docker start ${ETCD_NAME_2} -# sleep till etcd health check try to check again +# case 3: sleep till etcd health check try to check again current_time=$(date +%s) sleep_seconds=$(( $sleep_till - $current_time + 3)) if [ "$sleep_seconds" -gt 0 ]; then @@ -96,3 +96,39 @@ fi make stop echo "passed: when all etcd nodes disconnected, apisix trying to reconnect with backoff, and could successfully recover when reconnected" + +# case 4: stop one etcd node (result: start successful) +docker stop ${ETCD_NAME_0} + +out=$(make init 2>&1) +if echo "$out" | grep "23790" | grep "connection refused"; then + echo "passed: APISIX successfully to start, stop only one etcd node" +else + echo "failed: stop only one etcd node APISIX should start normally" + exit 1 +fi + +# case 5: stop two etcd nodes (result: start failure) +docker stop ${ETCD_NAME_1} + +out=$(make init 2>&1) +if echo "$out" | grep "23791" | grep "connection refused"; then + echo "passed: APISIX failed to start, etcd cluster must have two or more healthy nodes" +else + echo "failed: two etcd nodes have been stopped, APISIX should fail to start" + exit 1 +fi + +# case 6: stop all etcd nodes (result: start failure) +docker stop ${ETCD_NAME_2} + +out=$(make init 2>&1) +if echo "$out" | grep "23792" | grep "connection refused"; then + echo "passed: APISIX failed to start, all etcd nodes have stopped" +else + echo "failed: all etcd nodes have stopped, APISIX should not be able to start" + exit 1 +fi + +# stop etcd docker container +docker-compose -f ./t/cli/docker-compose-etcd-cluster.yaml down From ed65d4488b6a3aeb564bfec5b4003912e4b45570 Mon Sep 17 00:00:00 2001 From: Janko Date: Thu, 14 Oct 2021 11:51:51 +0800 Subject: [PATCH 16/16] fix: test case execute failure --- t/cli/test_etcd_healthcheck.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/cli/test_etcd_healthcheck.sh b/t/cli/test_etcd_healthcheck.sh index 464433e2d6b9..b98cd4037491 100755 --- a/t/cli/test_etcd_healthcheck.sh +++ b/t/cli/test_etcd_healthcheck.sh @@ -111,7 +111,7 @@ fi # case 5: stop two etcd nodes (result: start failure) docker stop ${ETCD_NAME_1} -out=$(make init 2>&1) +out=$(make init 2>&1 || true) if echo "$out" | grep "23791" | grep "connection refused"; then echo "passed: APISIX failed to start, etcd cluster must have two or more healthy nodes" else @@ -122,7 +122,7 @@ fi # case 6: stop all etcd nodes (result: start failure) docker stop ${ETCD_NAME_2} -out=$(make init 2>&1) +out=$(make init 2>&1 || true) if echo "$out" | grep "23792" | grep "connection refused"; then echo "passed: APISIX failed to start, all etcd nodes have stopped" else