diff --git a/.github/workflows/linearizability.yaml b/.github/workflows/linearizability.yaml index 1a1a2feafc9f..53f53d02ae47 100644 --- a/.github/workflows/linearizability.yaml +++ b/.github/workflows/linearizability.yaml @@ -9,7 +9,9 @@ jobs: with: go-version: "1.19.1" - run: | + go install go.etcd.io/gofail mkdir -p /tmp/linearizability + FAILPOINTS=true make build EXPECT_DEBUG=true GO_TEST_FLAGS=-v RESULTS_DIR=/tmp/linearizability make test-linearizability - uses: actions/upload-artifact@v2 if: always() diff --git a/Makefile b/Makefile index 4c3bcfab52b8..6f8a27f8c170 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,8 @@ test-e2e-release: build PASSES="release e2e" ./scripts/test.sh $(GO_TEST_FLAGS) .PHONY: test-linearizability -test-linearizability: build - PASSES="linearizability" ./scripts/test.sh $(GO_TEST_FLAGS) +test-linearizability: + FAILPOINTS=true PASSES="linearizability" ./scripts/test.sh $(GO_TEST_FLAGS) # Static analysis diff --git a/go.mod b/go.mod index 74956a2eb692..2e494c705edb 100644 --- a/go.mod +++ b/go.mod @@ -80,6 +80,7 @@ require ( github.com/stretchr/testify v1.7.2 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect + go.etcd.io/gofail v0.0.0-20220826035847-d0d2a96a6ef0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0 // indirect go.opentelemetry.io/otel v1.7.0 // indirect go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect diff --git a/go.sum b/go.sum index 70338cde9b5f..f1131e508d62 100644 --- a/go.sum +++ b/go.sum @@ -327,6 +327,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/gofail v0.0.0-20220826035847-d0d2a96a6ef0 h1:TcXBU/YdVROXQ7FUowVK1ih9gu2yi3YMLE+tQb9q964= +go.etcd.io/gofail v0.0.0-20220826035847-d0d2a96a6ef0/go.mod h1:bOzzUWJ5bNHifkNkoIN6Ydf/z/UPT0bYuPghFYVC8+4= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/scripts/build_lib.sh b/scripts/build_lib.sh index ad5c732a5491..9db3f47f9d85 100755 --- a/scripts/build_lib.sh +++ b/scripts/build_lib.sh @@ -19,6 +19,22 @@ toggle_failpoints() { mode="$1" if command -v gofail >/dev/null 2>&1; then run gofail "$mode" server/etcdserver/ server/storage/backend/ + ( + cd ./server + run go get go.etcd.io/gofail/runtime + ) + ( + cd ./etcdutl + run go get go.etcd.io/gofail/runtime + ) + ( + cd ./etcdctl + run go get go.etcd.io/gofail/runtime + ) + ( + cd ./tests + run go get go.etcd.io/gofail/runtime + ) elif [[ "$mode" != "disable" ]]; then log_error "FAILPOINTS set but gofail not found" exit 1 diff --git a/tests/framework/e2e/cluster.go b/tests/framework/e2e/cluster.go index 5096df714f51..ff57f35ebee4 100644 --- a/tests/framework/e2e/cluster.go +++ b/tests/framework/e2e/cluster.go @@ -182,6 +182,7 @@ type EtcdProcessClusterConfig struct { CorruptCheckTime time.Duration CompactHashCheckEnabled bool CompactHashCheckTime time.Duration + GoFailEnabled bool } // NewEtcdProcessCluster launches a new cluster from etcd processes, returning @@ -388,12 +389,20 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfig(tb testing.TB, i in if cfg.CompactHashCheckTime != 0 { args = append(args, "--experimental-compact-hash-check-time", cfg.CompactHashCheckTime.String()) } + envVars := map[string]string{} + for key, value := range cfg.EnvVars { + envVars[key] = value + } + if cfg.GoFailEnabled { + port = (i+1)*10000 + 2381 + envVars["GOFAIL_HTTP"] = fmt.Sprintf("127.0.0.1:%d", port) + } return &EtcdServerProcessConfig{ lg: cfg.Logger, ExecPath: cfg.ExecPath, Args: args, - EnvVars: cfg.EnvVars, + EnvVars: envVars, TlsArgs: cfg.TlsArgs(), DataDirPath: dataDirPath, KeepDataDir: cfg.KeepDataDir, diff --git a/tests/linearizability/failpoints.go b/tests/linearizability/failpoints.go index eddbd53c8f3a..0c85342dc523 100644 --- a/tests/linearizability/failpoints.go +++ b/tests/linearizability/failpoints.go @@ -15,14 +15,19 @@ package linearizability import ( + "bytes" "context" + "fmt" "math/rand" + "net/http" + "time" "go.etcd.io/etcd/tests/v3/framework/e2e" ) var ( - KillFailpoint Failpoint = killFailpoint{} + KillFailpoint Failpoint = killFailpoint{} + RaftBeforeSavePanic Failpoint = goFailpoint{"etcd.io/etcd/server/etcdserver/raftBeforeSave", "panic"} ) type Failpoint interface { @@ -47,3 +52,45 @@ func (f killFailpoint) Trigger(ctx context.Context, clus *e2e.EtcdProcessCluster } return nil } + +type goFailpoint struct { + failpoint string + payload string +} + +func (f goFailpoint) Trigger(ctx context.Context, clus *e2e.EtcdProcessCluster) error { + err := triggerGoFailpoint(f.failpoint, f.payload) + if err != nil { + return fmt.Errorf("failed to trigger failpoint %q, err: %v", f.failpoint, err) + } + err = clus.Procs[0].Wait() + if err != nil { + return err + } + err = clus.Procs[0].Start(ctx) + if err != nil { + return err + } + return nil +} + +func triggerGoFailpoint(failpoint, payload string) error { + // TODO: Send failpoints to different members + r, err := http.NewRequest("PUT", "http://127.0.0.1:12381/"+failpoint, bytes.NewBuffer([]byte(payload))) + if err != nil { + return err + } + resp, err := httpClient.Do(r) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusNoContent { + return fmt.Errorf("bad status code: %d", resp.StatusCode) + } + return nil +} + +var httpClient = http.Client{ + Timeout: 10 * time.Millisecond, +} diff --git a/tests/linearizability/linearizability_test.go b/tests/linearizability/linearizability_test.go index 20911695bacc..494441d43395 100644 --- a/tests/linearizability/linearizability_test.go +++ b/tests/linearizability/linearizability_test.go @@ -60,6 +60,14 @@ func TestLinearizability(t *testing.T) { ClusterSize: 3, }, }, + { + name: "Issue14370", + failpoint: RaftBeforeSavePanic, + config: e2e.EtcdProcessClusterConfig{ + ClusterSize: 1, + GoFailEnabled: true, + }, + }, } for _, tc := range tcs { t.Run(tc.name, func(t *testing.T) { diff --git a/tools/mod/tools.go b/tools/mod/tools.go index ccc3338eb85d..c2d1ce5819ec 100644 --- a/tools/mod/tools.go +++ b/tools/mod/tools.go @@ -34,6 +34,7 @@ import ( _ "github.com/mdempsky/unconvert" _ "github.com/mgechev/revive" _ "github.com/mikefarah/yq/v4" + _ "go.etcd.io/gofail" _ "go.etcd.io/protodoc" _ "gotest.tools/gotestsum" _ "gotest.tools/v3"