From 93873708a93868dbd079a8e57c919c1acb4b0d59 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Sat, 15 Jun 2024 19:24:46 +0200 Subject: [PATCH 1/2] Upgrade gofail for sleep to not block deactivation Signed-off-by: Marek Siarkowicz --- go.mod | 2 +- go.sum | 4 ++-- tests/framework/e2e/etcd_process.go | 3 +-- tests/go.mod | 2 +- tests/go.sum | 4 ++-- tools/mod/go.mod | 2 +- tools/mod/go.sum | 2 ++ 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 28bfa5269ab..33a4aa10c6d 100644 --- a/go.mod +++ b/go.mod @@ -77,7 +77,7 @@ require ( github.com/spf13/pflag v1.0.5 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect - go.etcd.io/gofail v0.1.0 // indirect + go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 // indirect go.opentelemetry.io/otel v1.27.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect diff --git a/go.sum b/go.sum index 7b7e9109b14..f39ea94d0d5 100644 --- a/go.sum +++ b/go.sum @@ -134,8 +134,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.etcd.io/bbolt v1.4.0-alpha.1 h1:3yrqQzbRRPFPdOMWS/QQIVxVnzSkAZQYeWlZFv1kbj4= go.etcd.io/bbolt v1.4.0-alpha.1/go.mod h1:S/Z/Nm3iuOnyO1W4XuFfPci51Gj6F1Hv0z8hisyYYOw= -go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg= -go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA= go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns= go.etcd.io/raft/v3 v3.6.0-alpha.0/go.mod h1:QpxpKeYmocQQFHP75LxNrdJTukZmqQig9lotwYLsUJY= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck= diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index 31aac3a55f7..f913efffeec 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -400,8 +400,7 @@ func (f *BinaryFailpoints) DeactivateHTTP(ctx context.Context, failpoint string) return err } httpClient := http.Client{ - // TODO: Decrease after deactivate is not blocked by sleep https://github.com/etcd-io/gofail/issues/64 - Timeout: 2 * time.Second, + Timeout: time.Second, } if f.clientTimeout != 0 { httpClient.Timeout = f.clientTimeout diff --git a/tests/go.mod b/tests/go.mod index 4e851428502..f7512f39034 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -35,7 +35,7 @@ require ( go.etcd.io/etcd/etcdutl/v3 v3.6.0-alpha.0 go.etcd.io/etcd/pkg/v3 v3.6.0-alpha.0 go.etcd.io/etcd/server/v3 v3.6.0-alpha.0 - go.etcd.io/gofail v0.1.0 + go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 go.etcd.io/raft/v3 v3.6.0-alpha.0 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 go.opentelemetry.io/otel v1.27.0 diff --git a/tests/go.sum b/tests/go.sum index 60b2b291297..d8c67da2a72 100644 --- a/tests/go.sum +++ b/tests/go.sum @@ -138,8 +138,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.etcd.io/bbolt v1.4.0-alpha.1 h1:3yrqQzbRRPFPdOMWS/QQIVxVnzSkAZQYeWlZFv1kbj4= go.etcd.io/bbolt v1.4.0-alpha.1/go.mod h1:S/Z/Nm3iuOnyO1W4XuFfPci51Gj6F1Hv0z8hisyYYOw= -go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg= -go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA= go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns= go.etcd.io/raft/v3 v3.6.0-alpha.0/go.mod h1:QpxpKeYmocQQFHP75LxNrdJTukZmqQig9lotwYLsUJY= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck= diff --git a/tools/mod/go.mod b/tools/mod/go.mod index e3833c5d705..75f86188a09 100644 --- a/tools/mod/go.mod +++ b/tools/mod/go.mod @@ -14,7 +14,7 @@ require ( github.com/google/addlicense v1.1.1 github.com/google/yamlfmt v0.11.0 github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 - go.etcd.io/gofail v0.1.0 + go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116 go.etcd.io/raft/v3 v3.6.0-alpha.0 gotest.tools/gotestsum v1.11.0 diff --git a/tools/mod/go.sum b/tools/mod/go.sum index 4343149a558..8b5784f740f 100644 --- a/tools/mod/go.sum +++ b/tools/mod/go.sum @@ -501,6 +501,8 @@ go-simpler.org/sloglint v0.7.0 h1:rMZRxD9MbaGoRFobIOicMxZzum7AXNFDlez6xxJs5V4= go-simpler.org/sloglint v0.7.0/go.mod h1:g9SXiSWY0JJh4LS39/Q0GxzP/QX2cVcbTOYhDpXrJEs= go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg= go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs= +go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA= go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116 h1:QQiUXlqz+d96jyNG71NE+IGTgOK6Xlhdx+PzvfbLHlQ= go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116/go.mod h1:F9kog+iVAuvPJucb1dkYcDcbV0g4uyGEHllTP5NrXiw= go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns= From 5e42ed9b22b47dea9f61f1a8102b0467af54d46d Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Mon, 1 Apr 2024 18:09:59 +0200 Subject: [PATCH 2/2] Reproduce issue #17529 Signed-off-by: Marek Siarkowicz --- tests/robustness/failpoint/failpoint.go | 1 + tests/robustness/failpoint/gofail.go | 4 +--- tests/robustness/makefile.mk | 20 +++++++++++++++++++ .../beforeSendWatchResponse/build.patch | 9 +++++++++ .../beforeSendWatchResponse/watch.patch | 12 +++++++++++ tests/robustness/scenarios.go | 11 ++++++++++ tests/robustness/traffic/traffic.go | 2 +- 7 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 tests/robustness/patches/beforeSendWatchResponse/build.patch create mode 100644 tests/robustness/patches/beforeSendWatchResponse/watch.patch diff --git a/tests/robustness/failpoint/failpoint.go b/tests/robustness/failpoint/failpoint.go index 14e6ddf7e94..bfe63fcdeba 100644 --- a/tests/robustness/failpoint/failpoint.go +++ b/tests/robustness/failpoint/failpoint.go @@ -50,6 +50,7 @@ var ( RaftBeforeSaveSleep, RaftAfterSaveSleep, ApplyBeforeOpenSnapshot, + SleepBeforeSendWatchResponse, } ) diff --git a/tests/robustness/failpoint/gofail.go b/tests/robustness/failpoint/gofail.go index b6218edb9af..be54faa01f2 100644 --- a/tests/robustness/failpoint/gofail.go +++ b/tests/robustness/failpoint/gofail.go @@ -59,6 +59,7 @@ var ( BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second} RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second} RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second} + SleepBeforeSendWatchResponse Failpoint = gofailSleepAndDeactivate{"beforeSendWatchResponse", time.Second} ) type goPanicFailpoint struct { @@ -238,9 +239,6 @@ func (f gofailSleepAndDeactivate) Name() string { } func (f gofailSleepAndDeactivate) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool { - if config.ClusterSize == 1 { - return false - } memberFailpoints := member.Failpoints() if memberFailpoints == nil { return false diff --git a/tests/robustness/makefile.mk b/tests/robustness/makefile.mk index 65c373e897d..2920b3753d9 100644 --- a/tests/robustness/makefile.mk +++ b/tests/robustness/makefile.mk @@ -39,6 +39,11 @@ test-robustness-issue15271: /tmp/etcd-v3.5.7-failpoints/bin GO_TEST_FLAGS='-v --run=TestRobustnessRegression/Issue15271 --count 100 --failfast --bin-dir=/tmp/etcd-v3.5.7-failpoints/bin' make test-robustness && \ echo "Failed to reproduce" || echo "Successful reproduction" +.PHONY: test-robustness-issue17529 +test-robustness-issue17529: /tmp/etcd-v3.5.12-beforeSendWatchResponse/bin + GO_TEST_FLAGS='-v --run=TestRobustnessRegression/Issue17529 --count 100 --failfast --bin-dir=/tmp/etcd-v3.5.12-beforeSendWatchResponse/bin' make test-robustness && \ + echo "Failed to reproduce" || echo "Successful reproduction" + # Failpoints GOPATH = $(shell go env GOPATH) @@ -98,6 +103,21 @@ $(GOPATH)/bin/gofail: tools/mod/go.mod tools/mod/go.sum (cd etcdutl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \ FAILPOINTS=true ./build; +/tmp/etcd-v3.5.12-beforeSendWatchResponse/bin: $(GOPATH)/bin/gofail + rm -rf /tmp/etcd-v3.5.12-beforeSendWatchResponse/ + mkdir -p /tmp/etcd-v3.5.12-beforeSendWatchResponse/ + git clone --depth 1 --branch v3.5.12 https://github.com/etcd-io/etcd.git /tmp/etcd-v3.5.12-beforeSendWatchResponse/ + cp -r ./tests/robustness/patches/beforeSendWatchResponse /tmp/etcd-v3.5.12-beforeSendWatchResponse/ + cd /tmp/etcd-v3.5.12-beforeSendWatchResponse/; \ + patch -l server/etcdserver/api/v3rpc/watch.go ./beforeSendWatchResponse/watch.patch; \ + patch -l build.sh ./beforeSendWatchResponse/build.patch; \ + go get go.etcd.io/gofail@${GOFAIL_VERSION}; \ + (cd server; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \ + (cd etcdctl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \ + (cd etcdutl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \ + (cd tools/mod; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \ + FAILPOINTS=true ./build; + /tmp/etcd-release-3.5-failpoints/bin: $(GOPATH)/bin/gofail rm -rf /tmp/etcd-release-3.5-failpoints/ mkdir -p /tmp/etcd-release-3.5-failpoints/ diff --git a/tests/robustness/patches/beforeSendWatchResponse/build.patch b/tests/robustness/patches/beforeSendWatchResponse/build.patch new file mode 100644 index 00000000000..af93a7e5a90 --- /dev/null +++ b/tests/robustness/patches/beforeSendWatchResponse/build.patch @@ -0,0 +1,9 @@ +@@ -25,7 +26,7 @@ GOFAIL_VERSION=$(cd tools/mod && go list -m -f {{.Version}} go.etcd.io/gofail) + toggle_failpoints() { + mode="$1" + if command -v gofail >/dev/null 2>&1; then +- run gofail "$mode" server/etcdserver/ server/mvcc/ server/wal/ server/mvcc/backend/ ++ run gofail "$mode" server/etcdserver/ server/mvcc/ server/wal/ server/mvcc/backend/ server/etcdserver/api/v3rpc/ + if [[ "$mode" == "enable" ]]; then + go get go.etcd.io/gofail@${GOFAIL_VERSION} + cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION} diff --git a/tests/robustness/patches/beforeSendWatchResponse/watch.patch b/tests/robustness/patches/beforeSendWatchResponse/watch.patch new file mode 100644 index 00000000000..4ae45f06b40 --- /dev/null +++ b/tests/robustness/patches/beforeSendWatchResponse/watch.patch @@ -0,0 +1,12 @@ +diff --git a/server/etcdserver/api/v3rpc/watch.go b/server/etcdserver/api/v3rpc/watch.go +index cd834aa..e6aaf2b 100644 +--- a/server/etcdserver/api/v3rpc/watch.go ++++ b/server/etcdserver/api/v3rpc/watch.go +@@ -460,6 +460,7 @@ func (sws *serverWatchStream) sendLoop() { + sws.mu.RUnlock() + + var serr error ++ // gofail: var beforeSendWatchResponse struct{} + if !fragmented && !ok { + serr = sws.gRPCStream.Send(wr) + } else { diff --git a/tests/robustness/scenarios.go b/tests/robustness/scenarios.go index 57e0bd20443..9a1dfaed073 100644 --- a/tests/robustness/scenarios.go +++ b/tests/robustness/scenarios.go @@ -191,6 +191,17 @@ func regressionScenarios(t *testing.T) []testScenario { e2e.WithClusterSize(1), ), }) + scenarios = append(scenarios, testScenario{ + name: "Issue17529", + profile: traffic.HighTrafficProfile, + traffic: traffic.Kubernetes, + failpoint: failpoint.SleepBeforeSendWatchResponse, + cluster: *e2e.NewConfig( + e2e.WithClusterSize(1), + e2e.WithGoFailEnabled(true), + options.WithSnapshotCount(100), + ), + }) if v.Compare(version.V3_5) >= 0 { opts := []e2e.EPClusterOption{ e2e.WithSnapshotCount(100), diff --git a/tests/robustness/traffic/traffic.go b/tests/robustness/traffic/traffic.go index e7f293a14c6..31fec7fa760 100644 --- a/tests/robustness/traffic/traffic.go +++ b/tests/robustness/traffic/traffic.go @@ -34,7 +34,7 @@ import ( var ( DefaultLeaseTTL int64 = 7200 RequestTimeout = 200 * time.Millisecond - WatchTimeout = 400 * time.Millisecond + WatchTimeout = time.Second MultiOpTxnOpCount = 4 LowTraffic = Profile{