diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 4ed4820bdf9..755d1acd009 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -61,6 +61,11 @@ func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { // multiple rules. return c.fixRange(region) } + op, err := c.fixOrphanPeers(region, fit) + if err == nil && op != nil { + return op + } + log.Debug("fail to fix orphan peer", errs.ZapError(err)) for _, rf := range fit.RuleFits { op, err := c.fixRulePeer(region, fit, rf) if err != nil { @@ -71,12 +76,7 @@ func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { return op } } - op, err := c.fixOrphanPeers(region, fit) - if err != nil { - log.Debug("fail to fix orphan peer", errs.ZapError(err)) - return nil - } - return op + return nil } func (c *RuleChecker) fixRange(region *core.RegionInfo) *operator.Operator { diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 466a8a7c2ff..41d99006982 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -299,3 +299,34 @@ func (s *testRuleCheckerSuite) TestIssue2419(c *C) { c.Assert(op.Step(1).(operator.PromoteLearner).ToStore, Equals, uint64(4)) c.Assert(op.Step(2).(operator.RemovePeer).FromStore, Equals, uint64(3)) } + +// Ref https://github.com/tikv/pd/issues/3521 +// The problem is when offline a store, we may add learner multiple times if +// the operator is timeout. +func (s *testRuleCheckerSuite) TestIssue3521_PriorityFixOrphanPeer(c *C) { + s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) + s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + s.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) + op := s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) + var add operator.AddLearner + var remove operator.RemovePeer + s.cluster.SetStoreOffline(2) + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, NotNil) + c.Assert(op.Step(0), FitsTypeOf, add) + c.Assert(op.Desc(), Equals, "replace-rule-offline-peer") + r := s.cluster.GetRegion(1).Clone(core.WithAddPeer( + &metapb.Peer{ + Id: 5, + StoreId: 4, + IsLearner: true, + })) + s.cluster.PutRegion(r) + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op.Step(0), FitsTypeOf, remove) + c.Assert(op.Desc(), Equals, "remove-orphan-peer") +}