From a22710c52b50985d9901f922d874bac08f41199e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 8 Nov 2023 19:59:11 +0800 Subject: [PATCH] checker: reduces the probability of deleting normal peers when the store becomes unavailable (#7249) (#7330) close tikv/pd#7249 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/schedule/checker/rule_checker.go | 70 +++-- pkg/schedule/checker/rule_checker_test.go | 304 +++++++++++++++++++++- server/cluster/cluster_test.go | 2 +- 3 files changed, 358 insertions(+), 18 deletions(-) diff --git a/pkg/schedule/checker/rule_checker.go b/pkg/schedule/checker/rule_checker.go index b0537bf9ce4..c4e7c242dea 100644 --- a/pkg/schedule/checker/rule_checker.go +++ b/pkg/schedule/checker/rule_checker.go @@ -78,6 +78,7 @@ var ( ruleCheckerSkipRemoveOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "skip-remove-orphan-peer") ruleCheckerRemoveOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "remove-orphan-peer") ruleCheckerReplaceOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "replace-orphan-peer") + ruleCheckerReplaceOrphanPeerNoFitCounter = checkerCounter.WithLabelValues(ruleChecker, "replace-orphan-peer-no-fit") ) // RuleChecker fix/improve region by placement rules. @@ -447,7 +448,7 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg if len(fit.OrphanPeers) == 0 { return nil, nil } - var pinDownPeer *metapb.Peer + isUnhealthyPeer := func(id uint64) bool { for _, downPeer := range region.GetDownPeers() { if downPeer.Peer.GetId() == id { @@ -461,24 +462,45 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg } return false } + + isDisconnectedPeer := func(p *metapb.Peer) bool { + // avoid to meet down store when fix orphan peers, + // Isdisconnected is more strictly than IsUnhealthy. + store := c.cluster.GetStore(p.GetStoreId()) + if store == nil { + return true + } + return store.IsDisconnected() + } + + checkDownPeer := func(peers []*metapb.Peer) (*metapb.Peer, bool) { + for _, p := range peers { + if isUnhealthyPeer(p.GetId()) { + // make sure is down peer. + if region.GetDownPeer(p.GetId()) != nil { + return p, true + } + return nil, true + } + if isDisconnectedPeer(p) { + return p, true + } + } + return nil, false + } + // remove orphan peers only when all rules are satisfied (count+role) and all peers selected // by RuleFits is not pending or down. + var pinDownPeer *metapb.Peer hasUnhealthyFit := false -loopFits: for _, rf := range fit.RuleFits { if !rf.IsSatisfied() { hasUnhealthyFit = true break } - for _, p := range rf.Peers { - if isUnhealthyPeer(p.GetId()) { - // make sure is down peer. - if region.GetDownPeer(p.GetId()) != nil { - pinDownPeer = p - } - hasUnhealthyFit = true - break loopFits - } + pinDownPeer, hasUnhealthyFit = checkDownPeer(rf.Peers) + if hasUnhealthyFit { + break } } @@ -491,16 +513,19 @@ loopFits: // try to use orphan peers to replace unhealthy down peers. for _, orphanPeer := range fit.OrphanPeers { if pinDownPeer != nil { + if pinDownPeer.GetId() == orphanPeer.GetId() { + continue + } // make sure the orphan peer is healthy. - if isUnhealthyPeer(orphanPeer.GetId()) { + if isUnhealthyPeer(orphanPeer.GetId()) || isDisconnectedPeer(orphanPeer) { continue } // no consider witness in this path. if pinDownPeer.GetIsWitness() || orphanPeer.GetIsWitness() { continue } - // down peer's store should be down. - if !c.isStoreDownTimeHitMaxDownTime(pinDownPeer.GetStoreId()) { + // pinDownPeer's store should be disconnected, because we use more strict judge before. + if !isDisconnectedPeer(pinDownPeer) { continue } // check if down peer can replace with orphan peer. @@ -514,10 +539,14 @@ loopFits: return operator.CreatePromoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) case orphanPeerRole == metapb.PeerRole_Voter && destRole == metapb.PeerRole_Learner: return operator.CreateDemoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) + case orphanPeerRole == destRole && isDisconnectedPeer(pinDownPeer) && !dstStore.IsDisconnected(): + return operator.CreateRemovePeerOperator("remove-replaced-orphan-peer", c.cluster, 0, region, pinDownPeer.GetStoreId()) default: // destRole should not same with orphanPeerRole. if role is same, it fit with orphanPeer should be better than now. // destRole never be leader, so we not consider it. } + } else { + ruleCheckerReplaceOrphanPeerNoFitCounter.Inc() } } } @@ -526,14 +555,25 @@ loopFits: // Ref https://github.com/tikv/pd/issues/4045 if len(fit.OrphanPeers) >= 2 { hasHealthPeer := false + var disconnectedPeer *metapb.Peer + for _, orphanPeer := range fit.OrphanPeers { + if isDisconnectedPeer(orphanPeer) { + disconnectedPeer = orphanPeer + break + } + } for _, orphanPeer := range fit.OrphanPeers { if isUnhealthyPeer(orphanPeer.GetId()) { ruleCheckerRemoveOrphanPeerCounter.Inc() - return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId) + return operator.CreateRemovePeerOperator("remove-unhealthy-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId) } if hasHealthPeer { // there already exists a healthy orphan peer, so we can remove other orphan Peers. ruleCheckerRemoveOrphanPeerCounter.Inc() + // if there exists a disconnected orphan peer, we will pick it to remove firstly. + if disconnectedPeer != nil { + return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, disconnectedPeer.StoreId) + } return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId) } hasHealthPeer = true diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index 8ee3b1eccfa..d75841bcf83 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -225,7 +225,6 @@ func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers() { suite.NotNil(op) suite.Equal("remove-orphan-peer", op.Desc()) suite.Equal(uint64(5), op.Step(0).(operator.RemovePeer).FromStore) - // Case2: // store 4, 5, 6 are orphan peers, and peer on store 3 is down peer. and peer on store 4, 5 are pending. region = suite.cluster.GetRegion(1) @@ -235,6 +234,91 @@ func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers() { suite.cluster.PutRegion(region) op = suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) + suite.Equal("remove-unhealthy-orphan-peer", op.Desc()) + suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore) + // Case3: + // store 4, 5, 6 are orphan peers, and peer on one of stores is disconnect peer + // we should remove disconnect peer first. + for i := uint64(4); i <= 6; i++ { + region = suite.cluster.GetRegion(1) + suite.cluster.SetStoreDisconnect(i) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + suite.Equal(i, op.Step(0).(operator.RemovePeer).FromStore) + suite.cluster.SetStoreUp(i) + } + // Case4: + // store 4, 5, 6 are orphan peers, and peer on two of stores is disconnect peer + // we should remove disconnect peer first. + for i := uint64(4); i <= 6; i++ { + region = suite.cluster.GetRegion(1) + suite.cluster.SetStoreDisconnect(4) + suite.cluster.SetStoreDisconnect(5) + suite.cluster.SetStoreDisconnect(6) + suite.cluster.SetStoreUp(i) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + removedPeerStoreID := op.Step(0).(operator.RemovePeer).FromStore + suite.NotEqual(i, removedPeerStoreID) + region = suite.cluster.GetRegion(1) + newRegion := region.Clone(core.WithRemoveStorePeer(removedPeerStoreID)) + suite.cluster.PutRegion(newRegion) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + removedPeerStoreID = op.Step(0).(operator.RemovePeer).FromStore + suite.NotEqual(i, removedPeerStoreID) + suite.cluster.PutRegion(region) + } +} + +func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers2() { + suite.cluster.AddLeaderStore(1, 1) + suite.cluster.AddLeaderStore(2, 1) + suite.cluster.AddLeaderStore(3, 1) + suite.cluster.AddLeaderStore(4, 1) + suite.cluster.AddLeaderStore(5, 1) + suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4, 5}) + + // Case1: + // store 4, 5 are orphan peers, and peer on one of stores is disconnect peer + // we should remove disconnect peer first. + for i := uint64(4); i <= 5; i++ { + region := suite.cluster.GetRegion(1) + suite.cluster.SetStoreDisconnect(i) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + suite.Equal(i, op.Step(0).(operator.RemovePeer).FromStore) + suite.cluster.SetStoreUp(i) + } + + // Case2: + // store 4, 5 are orphan peers, and they are disconnect peers + // we should remove the peer on disconnect stores at least. + region := suite.cluster.GetRegion(1) + suite.cluster.SetStoreDisconnect(4) + suite.cluster.SetStoreDisconnect(5) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) suite.Equal("remove-orphan-peer", op.Desc()) suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore) } @@ -702,7 +786,7 @@ func (suite *ruleCheckerTestSuite) TestPriorityFixOrphanPeer() { suite.cluster.PutRegion(testRegion) op = suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) - suite.Equal("remove-orphan-peer", op.Desc()) + suite.Equal("remove-unhealthy-orphan-peer", op.Desc()) suite.IsType(remove, op.Step(0)) // Ref #3521 suite.cluster.SetStoreOffline(2) @@ -723,6 +807,222 @@ func (suite *ruleCheckerTestSuite) TestPriorityFixOrphanPeer() { suite.Equal("remove-orphan-peer", op.Desc()) } +// Ref https://github.com/tikv/pd/issues/7249 https://github.com/tikv/tikv/issues/15799 +func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRuleChanged() { + // disconnect any two stores and change rule to 3 replicas + stores := []uint64{1, 2, 3, 4, 5} + testCases := [][]uint64{} + for i := 0; i < len(stores); i++ { + for j := i + 1; j < len(stores); j++ { + testCases = append(testCases, []uint64{stores[i], stores[j]}) + } + } + for _, leader := range stores { + var followers []uint64 + for i := 0; i < len(stores); i++ { + if stores[i] != leader { + followers = append(followers, stores[i]) + } + } + + for _, testCase := range testCases { + // init cluster with 5 replicas + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + suite.cluster.AddLeaderRegionWithRange(1, "", "", leader, followers...) + rule := &placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 5, + StartKey: []byte{}, + EndKey: []byte{}, + } + err := suite.ruleManager.SetRule(rule) + suite.NoError(err) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Nil(op) + + // set two stores to disconnected + suite.cluster.SetStoreDisconnect(testCase[0]) + suite.cluster.SetStoreDisconnect(testCase[1]) + + // change rule to 3 replicas + rule = &placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 3, + StartKey: []byte{}, + EndKey: []byte{}, + Override: true, + } + suite.ruleManager.SetRule(rule) + + // remove peer from region 1 + for j := 1; j <= 2; j++ { + r1 := suite.cluster.GetRegion(1) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Contains(op.Desc(), "orphan") + var removedPeerStoreID uint64 + newLeaderStoreID := r1.GetLeader().GetStoreId() + for i := 0; i < op.Len(); i++ { + if s, ok := op.Step(i).(operator.RemovePeer); ok { + removedPeerStoreID = s.FromStore + } + if s, ok := op.Step(i).(operator.TransferLeader); ok { + newLeaderStoreID = s.ToStore + } + } + suite.NotZero(removedPeerStoreID) + r1 = r1.Clone( + core.WithLeader(r1.GetStorePeer(newLeaderStoreID)), + core.WithRemoveStorePeer(removedPeerStoreID)) + suite.cluster.PutRegion(r1) + r1 = suite.cluster.GetRegion(1) + suite.Len(r1.GetPeers(), 5-j) + } + + r1 := suite.cluster.GetRegion(1) + for _, p := range r1.GetPeers() { + suite.NotEqual(p.GetStoreId(), testCase[0]) + suite.NotEqual(p.GetStoreId(), testCase[1]) + } + suite.TearDownTest() + suite.SetupTest() + } + } +} + +// Ref https://github.com/tikv/pd/issues/7249 https://github.com/tikv/tikv/issues/15799 +func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRuleChangedWithLearner() { + // disconnect any three stores and change rule to 3 replicas + // and there is a learner in the disconnected store. + stores := []uint64{1, 2, 3, 4, 5, 6} + testCases := [][]uint64{} + for i := 0; i < len(stores); i++ { + for j := i + 1; j < len(stores); j++ { + for k := j + 1; k < len(stores); k++ { + testCases = append(testCases, []uint64{stores[i], stores[j], stores[k]}) + } + } + } + for _, leader := range stores { + var followers []uint64 + for i := 0; i < len(stores); i++ { + if stores[i] != leader { + followers = append(followers, stores[i]) + } + } + + for _, testCase := range testCases { + for _, learnerStore := range testCase { + if learnerStore == leader { + continue + } + voterFollowers := []uint64{} + for _, follower := range followers { + if follower != learnerStore { + voterFollowers = append(voterFollowers, follower) + } + } + // init cluster with 5 voters and 1 learner + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + suite.cluster.AddLabelsStore(6, 1, map[string]string{"host": "host6"}) + suite.cluster.AddLeaderRegionWithRange(1, "", "", leader, voterFollowers...) + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: "pd", + ID: "default", + Index: 100, + Override: true, + Role: placement.Voter, + Count: 5, + IsWitness: false, + }, + { + GroupID: "pd", + ID: "r1", + Index: 100, + Override: false, + Role: placement.Learner, + Count: 1, + IsWitness: false, + LabelConstraints: []placement.LabelConstraint{ + {Key: "host", Op: "in", Values: []string{"host" + strconv.FormatUint(learnerStore, 10)}}, + }, + }, + }) + suite.NoError(err) + r1 := suite.cluster.GetRegion(1) + r1 = r1.Clone(core.WithAddPeer(&metapb.Peer{Id: 12, StoreId: learnerStore, Role: metapb.PeerRole_Learner})) + suite.cluster.PutRegion(r1) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Nil(op) + + // set three stores to disconnected + suite.cluster.SetStoreDisconnect(testCase[0]) + suite.cluster.SetStoreDisconnect(testCase[1]) + suite.cluster.SetStoreDisconnect(testCase[2]) + + // change rule to 3 replicas + suite.ruleManager.DeleteRule("pd", "r1") + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 3, + StartKey: []byte{}, + EndKey: []byte{}, + Override: true, + }) + + // remove peer from region 1 + for j := 1; j <= 3; j++ { + r1 := suite.cluster.GetRegion(1) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Contains(op.Desc(), "orphan") + var removedPeerStroeID uint64 + newLeaderStoreID := r1.GetLeader().GetStoreId() + for i := 0; i < op.Len(); i++ { + if s, ok := op.Step(i).(operator.RemovePeer); ok { + removedPeerStroeID = s.FromStore + } + if s, ok := op.Step(i).(operator.TransferLeader); ok { + newLeaderStoreID = s.ToStore + } + } + suite.NotZero(removedPeerStroeID) + r1 = r1.Clone( + core.WithLeader(r1.GetStorePeer(newLeaderStoreID)), + core.WithRemoveStorePeer(removedPeerStroeID)) + suite.cluster.PutRegion(r1) + r1 = suite.cluster.GetRegion(1) + suite.Len(r1.GetPeers(), 6-j) + } + + r1 = suite.cluster.GetRegion(1) + for _, p := range r1.GetPeers() { + suite.NotEqual(p.GetStoreId(), testCase[0]) + suite.NotEqual(p.GetStoreId(), testCase[1]) + suite.NotEqual(p.GetStoreId(), testCase[2]) + } + suite.TearDownTest() + suite.SetupTest() + } + } + } +} + func (suite *ruleCheckerTestSuite) TestPriorityFitHealthWithDifferentRole1() { suite.cluster.SetEnableUseJointConsensus(true) suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index b7b9dcfb736..89c9ea32f19 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -2796,7 +2796,7 @@ func TestReplica(t *testing.T) { re.NoError(tc.addLeaderRegion(2, 1, 2, 3, 4)) region = tc.GetRegion(2) re.NoError(dispatchHeartbeat(co, region, stream)) - region = waitRemovePeer(re, stream, region, 4) + region = waitRemovePeer(re, stream, region, 3) // store3 is down, we should remove it firstly. re.NoError(dispatchHeartbeat(co, region, stream)) waitNoResponse(re, stream)