Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.5] member replace e2e test #17123

Merged
merged 1 commit into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions tests/e2e/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,15 +218,10 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
assert.NoError(t, err, "error on put")
}

members, err := cc.MemberList()
_, found, err := getMemberIdByName(context.Background(), cc, epc.Procs[0].Config().Name)
assert.NoError(t, err, "error on member list")
var memberID uint64
for _, m := range members.Members {
if m.Name == epc.Procs[0].Config().Name {
memberID = m.ID
}
}
assert.NotZero(t, memberID, "member not found")
assert.Equal(t, found, true, "member not found")

epc.Procs[0].Stop()
err = testutil.CorruptBBolt(datadir.ToBackendFileName(epc.Procs[0].Config().DataDirPath))
assert.NoError(t, err)
Expand Down
98 changes: 98 additions & 0 deletions tests/e2e/ctl_v3_member_no_proxy_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright 2023 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !cluster_proxy

package e2e

import (
"context"
"math/rand"
"os"
"testing"
"time"

"github.com/stretchr/testify/require"

"go.etcd.io/etcd/server/v3/etcdserver"
"go.etcd.io/etcd/tests/v3/framework/e2e"
)

func TestMemberReplace(t *testing.T) {
e2e.BeforeTest(t)
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()

epc, err := e2e.NewEtcdProcessCluster(t, &e2e.EtcdProcessClusterConfig{
ClusterSize: 3,
KeepDataDir: true,
CorruptCheckTime: time.Second,
})
require.NoError(t, err)
defer epc.Close()

memberIdx := rand.Int() % len(epc.Procs)
member := epc.Procs[memberIdx]
memberName := member.Config().Name
var endpoints []string
for i := 1; i < len(epc.Procs); i++ {
endpoints = append(endpoints, epc.Procs[(memberIdx+i)%len(epc.Procs)].EndpointsGRPC()...)
}
cc := NewEtcdctl(endpoints, e2e.ClientNonTLS, false, false)

memberID, found, err := getMemberIdByName(ctx, cc, memberName)
require.NoError(t, err)
require.Equal(t, found, true, "Member not found")

// Need to wait health interval for cluster to accept member changes
time.Sleep(etcdserver.HealthInterval)

t.Logf("Removing member %s", memberName)
_, err = cc.MemberRemove(memberID)
require.NoError(t, err)
_, found, err = getMemberIdByName(ctx, cc, memberName)
require.NoError(t, err)
require.Equal(t, found, false, "Expected member to be removed")
for member.IsRunning() {
member.Close()
time.Sleep(10 * time.Millisecond)
Copy link
Member

@fuweid fuweid Dec 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For 3.5 release, there is no background goroutine as zombie process's reaper.
I think we can just export Close() here. Otherwise, zombie process is running and member.IsRunning is true until timeout.

// Close waits for the expect process to exit.
// Close currently does not return error if process exited with !=0 status.
// TODO: Close should expose underlying proces failure by default.
func (ep *ExpectProcess) Close() error { return ep.close(false) }
ps -ef | grep 2975576
fuwei    2975576 2973518  0 15:35 pts/1    00:00:03 /tmp/go-build2055634301/b001/e2e.test -test.testlogfile=/tmp/go-build2055634301/b001/testlog.txt -test.paniconexit0 -test.v=true -test.timeout=30m0s -test.run=TestMemberReplace
fuwei    2975587 2975576  0 15:35 ?        00:00:00 [etcd] <defunct>
fuwei    2975588 2975576  0 15:35 pts/39   00:00:02 /home/fuwei/go/src/go.etcd.io/etcd/bin/etcd --name test-0 --listen-client-urls http://localhost:20000 --advertise-client-urls http://localhost:20000 --listen-peer-urls http://localhost:20001 --initial-advertise-peer-urls http://localhost:20001 --initial-cluster-token  --data-dir /tmp/TestMemberReplace206448044/002 --snapshot-count 100000 --experimental-corrupt-check-time 1s --initial-cluster test-0=http://localhost:20001,test-1=http://localhost:20006,test-2=http://localhost:20011
fuwei    2975589 2975576  0 15:35 pts/40   00:00:02 /home/fuwei/go/src/go.etcd.io/etcd/bin/etcd --name test-1 --listen-client-urls http://localhost:20005 --advertise-client-urls http://localhost:20005 --listen-peer-urls http://localhost:20006 --initial-advertise-peer-urls http://localhost:20006 --initial-cluster-token  --data-dir /tmp/TestMemberReplace206448044/003 --snapshot-count 100000 --experimental-corrupt-check-time 1s --initial-cluster test-0=http://localhost:20001,test-1=http://localhost:20006,test-2=http://localhost:20011
fuwei    2975630 2975576  0 15:35 ?        00:00:00 [etcdctl] <defunct>
fuwei    2975657 2975576  0 15:35 ?        00:00:00 [etcdctl] <defunct>
fuwei    2975667 2975576  0 15:35 ?        00:00:00 [etcdctl] <defunct>
fuwei    2982980 2975823  0 15:46 pts/44   00:00:00 grep --color=auto --exclude-dir=.bzr --exclude-dir=CVS --exclude-dir=.git --exclude-dir=.hg --exclude-dir=.svn --exclude-dir=.idea --exclude-dir=.tox 2975576

https://github.com/etcd-io/etcd/blob/5b572f15162d9b61979fb5eed65e65b026917464/pkg/expect/expect.go#L88C8-L88C23

}

t.Logf("Removing member %s data", memberName)
err = os.RemoveAll(member.Config().DataDirPath)
require.NoError(t, err)

t.Logf("Adding member %s back", memberName)
removedMemberPeerUrl := member.Config().Purl.String()
_, err = cc.MemberAdd(memberName, []string{removedMemberPeerUrl})
require.NoError(t, err)
member.Config().Args = patchArgs(member.Config().Args, "initial-cluster-state", "existing")
require.NoError(t, err)

// Sleep 100ms to bypass the known issue https://github.com/etcd-io/etcd/issues/16687.
time.Sleep(100 * time.Millisecond)
t.Logf("Starting member %s", memberName)
err = member.Start()
require.NoError(t, err)
e2e.ExecuteUntil(ctx, t, func() {
for {
_, found, err := getMemberIdByName(ctx, cc, memberName)
if err != nil || !found {
time.Sleep(10 * time.Millisecond)
continue
}
break
}
})
}
18 changes: 18 additions & 0 deletions tests/e2e/etcdctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,24 @@ func (ctl *Etcdctl) MemberList() (*clientv3.MemberListResponse, error) {
return &resp, err
}

func (ctl *Etcdctl) MemberAdd(name string, peerURLs []string) (*clientv3.MemberAddResponse, error) {
if ctl.v2 {
panic("Unsupported method for v2")
}
var resp clientv3.MemberAddResponse
err := ctl.spawnJsonCmd(&resp, "member", "add", name, "--peer-urls", strings.Join(peerURLs, ","))
return &resp, err
}

func (ctl *Etcdctl) MemberRemove(id uint64) (*clientv3.MemberRemoveResponse, error) {
if ctl.v2 {
panic("Unsupported method for v2")
}
var resp clientv3.MemberRemoveResponse
err := ctl.spawnJsonCmd(&resp, "member", "remove", fmt.Sprintf("%x", id))
return &resp, err
}

func (ctl *Etcdctl) Compact(rev int64) (*clientv3.CompactResponse, error) {
if ctl.v2 {
panic("Unsupported method for v2")
Expand Down
27 changes: 27 additions & 0 deletions tests/e2e/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package e2e
import (
"context"
"fmt"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -114,3 +115,29 @@ func fillEtcdWithData(ctx context.Context, c *clientv3.Client, dbSize int) error
}
return g.Wait()
}

func getMemberIdByName(ctx context.Context, c *Etcdctl, name string) (id uint64, found bool, err error) {
resp, err := c.MemberList()
if err != nil {
return 0, false, err
}
for _, member := range resp.Members {
if name == member.Name {
return member.ID, true, nil
}
}
return 0, false, nil
}

// Different implementations here since 3.5 e2e test framework does not have "initial-cluster-state" as a default argument
// Append new flag if not exist, otherwise replace the value
func patchArgs(args []string, flag, newValue string) []string {
for i, arg := range args {
if strings.Contains(arg, flag) {
args[i] = fmt.Sprintf("--%s=%s", flag, newValue)
return args
}
}
args = append(args, fmt.Sprintf("--%s=%s", flag, newValue))
return args
}
Comment on lines +134 to +143
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the implementation is different from the main branch?

etcd/tests/e2e/utils.go

Lines 139 to 147 in 93530f6

func patchArgs(args []string, flag, newValue string) error {
for i, arg := range args {
if strings.Contains(arg, flag) {
args[i] = fmt.Sprintf("--%s=%s", flag, newValue)
return nil
}
}
return fmt.Errorf("--%s flag not found", flag)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3.5 does not have "initial-cluster-state" as a default argument in e2e framework. So, we need to append it in args.

func (cfg *EtcdProcessClusterConfig) SetInitialOrDiscovery(serverCfg *EtcdServerProcessConfig, initialCluster []string, initialClusterState string) {
if cfg.Discovery == "" && len(cfg.ServerConfig.DiscoveryCfg.Endpoints) == 0 {
serverCfg.InitialCluster = strings.Join(initialCluster, ",")
serverCfg.Args = append(serverCfg.Args, "--initial-cluster="+serverCfg.InitialCluster)
serverCfg.Args = append(serverCfg.Args, "--initial-cluster-state="+initialClusterState)
}

First introduced in commit 6f63f4b

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should backport #16707 to 3.5 if possible.

Please add a comment to explain the difference for now.

Loading