From 71621d3cc296190f11cba4532bc4afcb73e314c6 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 10 Nov 2023 17:42:43 +0800 Subject: [PATCH] checker: refactor fixOrphanPeers (#7342) ref tikv/pd#4399 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/schedule/checker/rule_checker.go | 42 +++++++++++++---------- pkg/schedule/checker/rule_checker_test.go | 1 + server/cluster/cluster_test.go | 2 +- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/pkg/schedule/checker/rule_checker.go b/pkg/schedule/checker/rule_checker.go index c4e7c242dea..08ef5f7b45c 100644 --- a/pkg/schedule/checker/rule_checker.go +++ b/pkg/schedule/checker/rule_checker.go @@ -449,23 +449,31 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg return nil, nil } - isUnhealthyPeer := func(id uint64) bool { - for _, downPeer := range region.GetDownPeers() { - if downPeer.Peer.GetId() == id { + isPendingPeer := func(id uint64) bool { + for _, pendingPeer := range region.GetPendingPeers() { + if pendingPeer.GetId() == id { return true } } - for _, pendingPeer := range region.GetPendingPeers() { - if pendingPeer.GetId() == id { + return false + } + + isDownPeer := func(id uint64) bool { + for _, downPeer := range region.GetDownPeers() { + if downPeer.Peer.GetId() == id { return true } } return false } - isDisconnectedPeer := func(p *metapb.Peer) bool { + isUnhealthyPeer := func(id uint64) bool { + return isPendingPeer(id) || isDownPeer(id) + } + + isInDisconnectedStore := func(p *metapb.Peer) bool { // avoid to meet down store when fix orphan peers, - // Isdisconnected is more strictly than IsUnhealthy. + // isInDisconnectedStore is usually more strictly than IsUnhealthy. store := c.cluster.GetStore(p.GetStoreId()) if store == nil { return true @@ -475,16 +483,12 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg checkDownPeer := func(peers []*metapb.Peer) (*metapb.Peer, bool) { for _, p := range peers { - if isUnhealthyPeer(p.GetId()) { - // make sure is down peer. - if region.GetDownPeer(p.GetId()) != nil { - return p, true - } - return nil, true - } - if isDisconnectedPeer(p) { + if isInDisconnectedStore(p) || isDownPeer(p.GetId()) { return p, true } + if isPendingPeer(p.GetId()) { + return nil, true + } } return nil, false } @@ -517,7 +521,7 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg continue } // make sure the orphan peer is healthy. - if isUnhealthyPeer(orphanPeer.GetId()) || isDisconnectedPeer(orphanPeer) { + if isUnhealthyPeer(orphanPeer.GetId()) || isInDisconnectedStore(orphanPeer) { continue } // no consider witness in this path. @@ -525,7 +529,7 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg continue } // pinDownPeer's store should be disconnected, because we use more strict judge before. - if !isDisconnectedPeer(pinDownPeer) { + if !isInDisconnectedStore(pinDownPeer) { continue } // check if down peer can replace with orphan peer. @@ -539,7 +543,7 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg return operator.CreatePromoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) case orphanPeerRole == metapb.PeerRole_Voter && destRole == metapb.PeerRole_Learner: return operator.CreateDemoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) - case orphanPeerRole == destRole && isDisconnectedPeer(pinDownPeer) && !dstStore.IsDisconnected(): + case orphanPeerRole == destRole && isInDisconnectedStore(pinDownPeer) && !dstStore.IsDisconnected(): return operator.CreateRemovePeerOperator("remove-replaced-orphan-peer", c.cluster, 0, region, pinDownPeer.GetStoreId()) default: // destRole should not same with orphanPeerRole. if role is same, it fit with orphanPeer should be better than now. @@ -557,7 +561,7 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg hasHealthPeer := false var disconnectedPeer *metapb.Peer for _, orphanPeer := range fit.OrphanPeers { - if isDisconnectedPeer(orphanPeer) { + if isInDisconnectedStore(orphanPeer) { disconnectedPeer = orphanPeer break } diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index eb357f302b7..4185ce6c167 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -1052,6 +1052,7 @@ func (suite *ruleCheckerTestSuite) TestPriorityFitHealthWithDifferentRole1() { suite.Equal("replace-down-peer-with-orphan-peer", op.Desc()) // set peer3 only pending + suite.cluster.GetStore(3).GetMeta().LastHeartbeat = time.Now().UnixNano() r1 = r1.Clone(core.WithDownPeers(nil)) suite.cluster.PutRegion(r1) op = suite.rc.Check(suite.cluster.GetRegion(1)) diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 89c9ea32f19..70782e27cd3 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -2792,7 +2792,7 @@ func TestReplica(t *testing.T) { re.NoError(dispatchHeartbeat(co, region, stream)) waitNoResponse(re, stream) - // Remove peer from store 4. + // Remove peer from store 3. re.NoError(tc.addLeaderRegion(2, 1, 2, 3, 4)) region = tc.GetRegion(2) re.NoError(dispatchHeartbeat(co, region, stream))