From 9db232b7599fef94708b365cb82e64f883780edb Mon Sep 17 00:00:00 2001 From: Andy Lok Date: Mon, 6 Sep 2021 11:10:57 +0800 Subject: [PATCH] This is an automated cherry-pick of #4067 Signed-off-by: ti-chi-bot --- server/schedule/checker/rule_checker.go | 17 ++++- server/schedule/checker/rule_checker_test.go | 70 ++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 3b1b95137d4..370a67cafc7 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -269,12 +269,27 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg if len(fit.OrphanPeers) == 0 { return nil, nil } - // remove orphan peers only when all rules are satisfied (count+role) + // remove orphan peers only when all rules are satisfied (count+role) and all peers selected + // by RuleFits is not pending or down. for _, rf := range fit.RuleFits { if !rf.IsSatisfied() { checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() return nil, nil } + for _, p := range rf.Peers { + for _, pendingPeer := range region.GetPendingPeers() { + if pendingPeer.Id == p.Id { + checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() + return nil, nil + } + } + for _, downPeer := range region.GetDownPeers() { + if downPeer.Peer.Id == p.Id { + checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() + return nil, nil + } + } + } } checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc() peer := fit.OrphanPeers[0] diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 9893508fb76..38cc2ebdad8 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -600,3 +600,73 @@ func (s *testRuleCheckerSuite) TestFixOfflinePeer(c *C) { s.ruleManager.SetRule(rule) c.Assert(s.rc.Check(region), IsNil) } +<<<<<<< HEAD +======= + +func (s *testRuleCheckerSerialSuite) TestRuleCache(c *C) { + s.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1"}) + s.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z2"}) + s.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"}) + s.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) + s.cluster.AddLeaderRegion(1, 1, 3, 4) + rule := &placement.Rule{ + GroupID: "pd", + ID: "test", + Index: 100, + Override: true, + Role: placement.Voter, + Count: 3, + LocationLabels: []string{"zone"}, + } + s.ruleManager.SetRule(rule) + region := s.cluster.GetRegion(1) + c.Assert(s.rc.Check(region), IsNil) + + c.Assert(failpoint.Enable("github.com/tikv/pd/server/schedule/checker/assertCache", ""), IsNil) + c.Assert(s.rc.Check(region), IsNil) + c.Assert(failpoint.Disable("github.com/tikv/pd/server/schedule/checker/assertCache"), IsNil) +} + +// Ref https://github.com/tikv/pd/issues/4045 +func (s *testRuleCheckerSuite) TestSkipFixOrphanPeerIfSelectedPeerisPendingOrDown(c *C) { + s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) + s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3, 4) + + // set peer3 and peer4 to pending + r1 := s.cluster.GetRegion(1) + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetStorePeer(3), r1.GetStorePeer(4)})) + s.cluster.PutRegion(r1) + + // should not remove extra peer + op := s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) + + // set peer3 to down-peer + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetStorePeer(4)})) + r1 = r1.Clone(core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 42, + }, + })) + s.cluster.PutRegion(r1) + + // should not remove extra peer + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) + + // set peer3 to normal + r1 = r1.Clone(core.WithDownPeers(nil)) + s.cluster.PutRegion(r1) + + // should remove extra peer now + var remove operator.RemovePeer + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op.Step(0), FitsTypeOf, remove) + c.Assert(op.Desc(), Equals, "remove-orphan-peer") +} +>>>>>>> 53530bf84 (placement: do not delete orphan peers if some peers selected by RuleFit is down or pending (#4067))