diff --git a/pkg/member/member.go b/pkg/member/member.go index 8d0eb978c50..af7c1dfe50e 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -204,7 +204,7 @@ func (m *EmbeddedEtcdMember) KeepLeader(ctx context.Context) { // PreCheckLeader does some pre-check before checking whether it's the leader. func (m *EmbeddedEtcdMember) PreCheckLeader() error { - if m.GetEtcdLeader() == 0 { + if m.GetEtcdLeaderID() == 0 { return errs.ErrEtcdLeaderNotFound } return nil @@ -281,7 +281,7 @@ func (m *EmbeddedEtcdMember) ResetLeader() { // CheckPriority checks whether the etcd leader should be moved according to the priority. func (m *EmbeddedEtcdMember) CheckPriority(ctx context.Context) { - etcdLeader := m.GetEtcdLeader() + etcdLeader := m.GetEtcdLeaderID() if etcdLeader == m.ID() || etcdLeader == 0 { return } @@ -318,8 +318,8 @@ func (m *EmbeddedEtcdMember) MoveEtcdLeader(ctx context.Context, old, new uint64 return nil } -// GetEtcdLeader returns the etcd leader ID. -func (m *EmbeddedEtcdMember) GetEtcdLeader() uint64 { +// GetEtcdLeaderID returns the etcd leader ID. +func (m *EmbeddedEtcdMember) GetEtcdLeaderID() uint64 { return m.etcd.Server.Lead() } diff --git a/server/api/member.go b/server/api/member.go index df8c0aee622..0cae90726ab 100644 --- a/server/api/member.go +++ b/server/api/member.go @@ -87,7 +87,7 @@ func getMembers(svr *server.Server) (*pdpb.GetMembersResponse, error) { if e != nil { log.Error("failed to load deploy path", zap.Uint64("member", m.GetMemberId()), errs.ZapError(e)) } - if svr.GetMember().GetEtcdLeader() == 0 { + if svr.GetMember().GetEtcdLeaderID() == 0 { log.Warn("no etcd leader, skip get leader priority", zap.Uint64("member", m.GetMemberId())) continue } diff --git a/server/grpc_service.go b/server/grpc_service.go index ef7020f7fee..36814200709 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -473,9 +473,9 @@ func (s *GrpcServer) GetMembers(context.Context, *pdpb.GetMembersRequest) (*pdpb } var etcdLeader, pdLeader *pdpb.Member - leaderID := s.member.GetEtcdLeader() + etcdLeaderID := s.member.GetEtcdLeaderID() for _, m := range members { - if m.MemberId == leaderID { + if m.MemberId == etcdLeaderID { etcdLeader = m break } @@ -499,6 +499,15 @@ func (s *GrpcServer) GetMembers(context.Context, *pdpb.GetMembersRequest) (*pdpb break } } + failpoint.Inject("noLeaderInMembers", func() { + pdLeader = nil + }) + // If the leader is not in the member list, we should fallback to + // the `leader` set in the PD server to gain a better availability. + // See https://github.com/tikv/pd/issues/7752 for more details. + if pdLeader == nil && leader.GetMemberId() == etcdLeaderID { + pdLeader = leader + } return &pdpb.GetMembersResponse{ Header: s.header(), diff --git a/server/server.go b/server/server.go index be886a56712..a65c474b80a 100644 --- a/server/server.go +++ b/server/server.go @@ -1645,7 +1645,7 @@ func (s *Server) leaderLoop() { } // To make sure the etcd leader and PD leader are on the same server. - etcdLeader := s.member.GetEtcdLeader() + etcdLeader := s.member.GetEtcdLeaderID() if etcdLeader != s.member.ID() { if s.member.GetLeader() == nil { lastUpdated := s.member.GetLastLeaderUpdatedTime() @@ -1800,7 +1800,7 @@ func (s *Server) campaignLeader() { } }) - etcdLeader := s.member.GetEtcdLeader() + etcdLeader := s.member.GetEtcdLeaderID() if etcdLeader != s.member.ID() { log.Info("etcd leader changed, resigns pd leadership", zap.String("old-pd-leader-name", s.Name())) return diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index 67c798d7f69..f582b76779d 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -543,11 +543,19 @@ func TestGetPDMembers(t *testing.T) { grpcPDClient := testutil.MustNewGrpcClient(re, leaderServer.GetAddr()) clusterID := leaderServer.GetClusterID() req := &pdpb.GetMembersRequest{Header: testutil.NewRequestHeader(clusterID)} - resp, err := grpcPDClient.GetMembers(context.Background(), req) + resp, err := grpcPDClient.GetMembers(ctx, req) re.NoError(err) re.Equal(pdpb.ErrorType_OK, resp.GetHeader().GetError().GetType()) - // A more strict test can be found at api/member_test.go re.NotEmpty(resp.GetMembers()) + re.Equal(leaderServer.GetLeader(), resp.GetLeader()) + // Test the member list does not include the PD leader. + re.NoError(failpoint.Enable("github.com/tikv/pd/server/noLeaderInMembers", `return(true)`)) + resp, err = grpcPDClient.GetMembers(ctx, req) + re.NoError(err) + re.Equal(pdpb.ErrorType_OK, resp.GetHeader().GetError().GetType()) + re.NotEmpty(resp.GetMembers()) + re.Equal(leaderServer.GetLeader(), resp.GetLeader()) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/noLeaderInMembers")) } func TestNotLeader(t *testing.T) {