diff --git a/server/cluster/coordinator_test.go b/server/cluster/coordinator_test.go index a4cefb9cd92..93e33e0131a 100644 --- a/server/cluster/coordinator_test.go +++ b/server/cluster/coordinator_test.go @@ -991,7 +991,7 @@ func (s *testOperatorControllerSuite) TestDownStoreLimit(c *C) { tc, co, cleanup := prepare(nil, nil, nil, c) defer cleanup() oc := co.opController - rc := co.checkers.GetRuleChecker() + rc := co.checkers.GetReplicaChecker() tc.addRegionStore(1, 100) tc.addRegionStore(2, 100) @@ -1001,14 +1001,25 @@ func (s *testOperatorControllerSuite) TestDownStoreLimit(c *C) { region := tc.GetRegion(1) tc.setStoreDown(1) tc.SetStoreLimit(1, storelimit.RemovePeer, 1) + region = region.Clone(core.WithDownPeers([]*pdpb.PeerStats{ { Peer: region.GetStorePeer(1), DownSeconds: 24 * 60 * 60, }, - })) + }), core.SetApproximateSize(1)) + tc.putRegion(region) + for i := uint64(1); i < 20; i++ { + tc.addRegionStore(i+3, 100) + op := rc.Check(region) + c.Assert(op, NotNil) + c.Assert(oc.AddOperator(op), IsTrue) + oc.RemoveOperator(op) + } - for i := uint64(1); i <= 5; i++ { + region = region.Clone(core.SetApproximateSize(100)) + tc.putRegion(region) + for i := uint64(20); i < 25; i++ { tc.addRegionStore(i+3, 100) op := rc.Check(region) c.Assert(op, NotNil) diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 2ef2fb922a2..266a633fecf 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -53,7 +53,6 @@ func (c *RuleChecker) GetType() string { // fix it. func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { checkerCounter.WithLabelValues("rule_checker", "check").Inc() - fit := c.cluster.FitRegion(region) if len(fit.RuleFits) == 0 { checkerCounter.WithLabelValues("rule_checker", "fix-range").Inc() diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 5de146ebe22..3ecfb1097ad 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -14,11 +14,7 @@ package checker import ( -<<<<<<< HEAD "encoding/hex" -======= - "context" ->>>>>>> 1a7caa95c (schedule: not limit remove peer of the down store (#4097)) . "github.com/pingcap/check" "github.com/pingcap/kvproto/pkg/metapb" @@ -337,321 +333,3 @@ func (s *testRuleCheckerSuite) TestIssue3521_PriorityFixOrphanPeer(c *C) { c.Assert(op.Step(0), FitsTypeOf, remove) c.Assert(op.Desc(), Equals, "remove-orphan-peer") } -<<<<<<< HEAD -======= - -func (s *testRuleCheckerSuite) TestIssue3293(c *C) { - s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) - s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) - s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) - s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) - s.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) - s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2) - err := s.ruleManager.SetRule(&placement.Rule{ - GroupID: "TiDB_DDL_51", - ID: "0", - Role: placement.Follower, - Count: 1, - LabelConstraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{ - "host5", - }, - Op: placement.In, - }, - }, - }) - c.Assert(err, IsNil) - s.cluster.DeleteStore(s.cluster.GetStore(5)) - err = s.ruleManager.SetRule(&placement.Rule{ - GroupID: "TiDB_DDL_51", - ID: "default", - Role: placement.Voter, - Count: 3, - }) - c.Assert(err, IsNil) - err = s.ruleManager.DeleteRule("pd", "default") - c.Assert(err, IsNil) - op := s.rc.Check(s.cluster.GetRegion(1)) - c.Assert(op, NotNil) - c.Assert(op.Desc(), Equals, "add-rule-peer") -} - -func (s *testRuleCheckerSuite) TestIssue3299(c *C) { - s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) - s.cluster.AddLabelsStore(2, 1, map[string]string{"dc": "sh"}) - s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2) - - testCases := []struct { - constraints []placement.LabelConstraint - err string - }{ - { - constraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{"host5"}, - Op: placement.In, - }, - }, - err: ".*can not match any store", - }, - { - constraints: []placement.LabelConstraint{ - { - Key: "ho", - Values: []string{"sh"}, - Op: placement.In, - }, - }, - err: ".*can not match any store", - }, - { - constraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{"host1"}, - Op: placement.In, - }, - { - Key: "host", - Values: []string{"host1"}, - Op: placement.NotIn, - }, - }, - err: ".*can not match any store", - }, - { - constraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{"host1"}, - Op: placement.In, - }, - { - Key: "host", - Values: []string{"host3"}, - Op: placement.In, - }, - }, - err: ".*can not match any store", - }, - { - constraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{"host1"}, - Op: placement.In, - }, - { - Key: "host", - Values: []string{"host1"}, - Op: placement.In, - }, - }, - err: "", - }, - } - - for _, t := range testCases { - err := s.ruleManager.SetRule(&placement.Rule{ - GroupID: "p", - ID: "0", - Role: placement.Follower, - Count: 1, - LabelConstraints: t.constraints, - }) - if t.err != "" { - c.Assert(err, ErrorMatches, t.err) - } else { - c.Assert(err, IsNil) - } - } -} - -// See issue: https://github.com/tikv/pd/issues/3705 -func (s *testRuleCheckerSuite) TestFixDownPeer(c *C) { - s.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) - s.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1"}) - s.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z2"}) - s.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"}) - s.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) - s.cluster.AddLeaderRegion(1, 1, 3, 4) - rule := &placement.Rule{ - GroupID: "pd", - ID: "test", - Index: 100, - Override: true, - Role: placement.Voter, - Count: 3, - LocationLabels: []string{"zone"}, - } - s.ruleManager.SetRule(rule) - - region := s.cluster.GetRegion(1) - c.Assert(s.rc.Check(region), IsNil) - - s.cluster.SetStoreDown(4) - region = region.Clone(core.WithDownPeers([]*pdpb.PeerStats{ - {Peer: region.GetStorePeer(4), DownSeconds: 6000}, - })) - testutil.CheckTransferPeer(c, s.rc.Check(region), operator.OpRegion, 4, 5) - - s.cluster.SetStoreDown(5) - testutil.CheckTransferPeer(c, s.rc.Check(region), operator.OpRegion, 4, 2) - - rule.IsolationLevel = "zone" - s.ruleManager.SetRule(rule) - c.Assert(s.rc.Check(region), IsNil) -} - -// See issue: https://github.com/tikv/pd/issues/3705 -func (s *testRuleCheckerSuite) TestFixOfflinePeer(c *C) { - s.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) - s.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1"}) - s.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z2"}) - s.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"}) - s.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) - s.cluster.AddLeaderRegion(1, 1, 3, 4) - rule := &placement.Rule{ - GroupID: "pd", - ID: "test", - Index: 100, - Override: true, - Role: placement.Voter, - Count: 3, - LocationLabels: []string{"zone"}, - } - s.ruleManager.SetRule(rule) - - region := s.cluster.GetRegion(1) - c.Assert(s.rc.Check(region), IsNil) - - s.cluster.SetStoreOffline(4) - testutil.CheckTransferPeer(c, s.rc.Check(region), operator.OpRegion, 4, 5) - - s.cluster.SetStoreOffline(5) - testutil.CheckTransferPeer(c, s.rc.Check(region), operator.OpRegion, 4, 2) - - rule.IsolationLevel = "zone" - s.ruleManager.SetRule(rule) - c.Assert(s.rc.Check(region), IsNil) -} - -func (s *testRuleCheckerSerialSuite) TestRuleCache(c *C) { - s.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) - s.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1"}) - s.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z2"}) - s.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"}) - s.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) - s.cluster.AddRegionStore(999, 1) - s.cluster.AddLeaderRegion(1, 1, 3, 4) - rule := &placement.Rule{ - GroupID: "pd", - ID: "test", - Index: 100, - Override: true, - Role: placement.Voter, - Count: 3, - LocationLabels: []string{"zone"}, - } - s.ruleManager.SetRule(rule) - region := s.cluster.GetRegion(1) - region = region.Clone(core.WithIncConfVer(), core.WithIncVersion()) - c.Assert(s.rc.Check(region), IsNil) - - testcases := []struct { - name string - region *core.RegionInfo - stillCached bool - }{ - { - name: "default", - region: region, - stillCached: true, - }, - { - name: "region topo changed", - region: func() *core.RegionInfo { - return region.Clone(core.WithAddPeer(&metapb.Peer{ - Id: 999, - StoreId: 999, - Role: metapb.PeerRole_Voter, - }), core.WithIncConfVer()) - }(), - stillCached: false, - }, - { - name: "region leader changed", - region: region.Clone( - core.WithLeader(&metapb.Peer{Role: metapb.PeerRole_Voter, Id: 2, StoreId: 3})), - stillCached: false, - }, - { - name: "region have down peers", - region: region.Clone(core.WithDownPeers([]*pdpb.PeerStats{ - { - Peer: region.GetPeer(3), - DownSeconds: 42, - }, - })), - stillCached: false, - }, - } - for _, testcase := range testcases { - c.Log(testcase.name) - if testcase.stillCached { - c.Assert(failpoint.Enable("github.com/tikv/pd/server/schedule/checker/assertShouldCache", "return(true)"), IsNil) - s.rc.Check(testcase.region) - c.Assert(failpoint.Disable("github.com/tikv/pd/server/schedule/checker/assertShouldCache"), IsNil) - } else { - c.Assert(failpoint.Enable("github.com/tikv/pd/server/schedule/checker/assertShouldNotCache", "return(true)"), IsNil) - s.rc.Check(testcase.region) - c.Assert(failpoint.Disable("github.com/tikv/pd/server/schedule/checker/assertShouldNotCache"), IsNil) - } - } -} - -// Ref https://github.com/tikv/pd/issues/4045 -func (s *testRuleCheckerSuite) TestSkipFixOrphanPeerIfSelectedPeerisPendingOrDown(c *C) { - s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) - s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) - s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) - s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) - s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3, 4) - - // set peer3 and peer4 to pending - r1 := s.cluster.GetRegion(1) - r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetStorePeer(3), r1.GetStorePeer(4)})) - s.cluster.PutRegion(r1) - - // should not remove extra peer - op := s.rc.Check(s.cluster.GetRegion(1)) - c.Assert(op, IsNil) - - // set peer3 to down-peer - r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetStorePeer(4)})) - r1 = r1.Clone(core.WithDownPeers([]*pdpb.PeerStats{ - { - Peer: r1.GetStorePeer(3), - DownSeconds: 42, - }, - })) - s.cluster.PutRegion(r1) - - // should not remove extra peer - op = s.rc.Check(s.cluster.GetRegion(1)) - c.Assert(op, IsNil) - - // set peer3 to normal - r1 = r1.Clone(core.WithDownPeers(nil)) - s.cluster.PutRegion(r1) - - // should remove extra peer now - var remove operator.RemovePeer - op = s.rc.Check(s.cluster.GetRegion(1)) - c.Assert(op.Step(0), FitsTypeOf, remove) - c.Assert(op.Desc(), Equals, "remove-orphan-peer") -} ->>>>>>> 1a7caa95c (schedule: not limit remove peer of the down store (#4097)) diff --git a/server/schedule/checker_controller.go b/server/schedule/checker_controller.go index 6880f939bb7..c2857dbafcc 100644 --- a/server/schedule/checker_controller.go +++ b/server/schedule/checker_controller.go @@ -93,36 +93,8 @@ func (c *CheckerController) CheckRegion(region *core.RegionInfo) (bool, []*opera func (c *CheckerController) GetMergeChecker() *checker.MergeChecker { return c.mergeChecker } -<<<<<<< HEAD -======= -// GetRuleChecker returns the rule checker. -func (c *CheckerController) GetRuleChecker() *checker.RuleChecker { - return c.ruleChecker +// GetReplicaChecker returns the replica checker. +func (c *CheckerController) GetReplicaChecker() *checker.ReplicaChecker { + return c.replicaChecker } - -// GetWaitingRegions returns the regions in the waiting list. -func (c *CheckerController) GetWaitingRegions() []*cache.Item { - return c.regionWaitingList.Elems() -} - -// AddWaitingRegion returns the regions in the waiting list. -func (c *CheckerController) AddWaitingRegion(region *core.RegionInfo) { - c.regionWaitingList.Put(region.GetID(), nil) -} - -// RemoveWaitingRegion removes the region from the waiting list. -func (c *CheckerController) RemoveWaitingRegion(id uint64) { - c.regionWaitingList.Remove(id) -} - -// GetPriorityRegions returns the region in priority queue -func (c *CheckerController) GetPriorityRegions() []uint64 { - return c.priorityChecker.GetPriorityRegions() -} - -// RemovePriorityRegions removes priority region from priority queue -func (c *CheckerController) RemovePriorityRegions(id uint64) { - c.priorityChecker.RemovePriorityRegion(id) -} ->>>>>>> 1a7caa95c (schedule: not limit remove peer of the down store (#4097)) diff --git a/server/schedule/operator/builder.go b/server/schedule/operator/builder.go index d4e2916baf1..2665a8beba6 100644 --- a/server/schedule/operator/builder.go +++ b/server/schedule/operator/builder.go @@ -332,41 +332,22 @@ func (b *Builder) execAddPeer(p *metapb.Peer) { if !p.GetIsLearner() { b.steps = append(b.steps, PromoteLearner{ToStore: p.GetStoreId(), PeerID: p.GetId()}) } -<<<<<<< HEAD b.currentPeers.Set(p) if b.peerAddStep == nil { b.peerAddStep = make(map[uint64]int) -======= - b.currentPeers.Set(peer) - b.peerAddStep[peer.GetStoreId()] = len(b.steps) - delete(b.toAdd, peer.GetStoreId()) -} - -func (b *Builder) execRemovePeer(peer *metapb.Peer) { - removeStoreID := peer.GetStoreId() - var isDownStore bool - store := b.cluster.GetStore(removeStoreID) - if store != nil { - isDownStore = store.DownTime() > b.cluster.GetOpts().GetMaxStoreDownTime() - } - b.steps = append(b.steps, RemovePeer{FromStore: removeStoreID, PeerID: peer.GetId(), IsDownStore: isDownStore}) - delete(b.currentPeers, removeStoreID) - delete(b.toRemove, removeStoreID) -} - -func (b *Builder) execChangePeerV2(needEnter bool, needTransferLeader bool) { - // Enter - step := ChangePeerV2Enter{ - PromoteLearners: make([]PromoteLearner, 0, len(b.toPromote)), - DemoteVoters: make([]DemoteVoter, 0, len(b.toDemote)), ->>>>>>> 1a7caa95c (schedule: not limit remove peer of the down store (#4097)) } b.peerAddStep[p.GetStoreId()] = len(b.steps) b.toAdd.Delete(p.GetStoreId()) } func (b *Builder) execRemovePeer(p *metapb.Peer) { - b.steps = append(b.steps, RemovePeer{FromStore: p.GetStoreId()}) + removeStoreID := p.GetStoreId() + var isDownStore bool + store := b.cluster.GetStore(removeStoreID) + if store != nil { + isDownStore = store.DownTime() > b.cluster.GetMaxStoreDownTime() + } + b.steps = append(b.steps, RemovePeer{FromStore: removeStoreID, IsDownStore: isDownStore}) b.currentPeers.Delete(p.GetStoreId()) b.toRemove.Delete(p.GetStoreId()) } diff --git a/server/schedule/operator/influence.go b/server/schedule/operator/influence.go index 1e02a388aa5..a3028920a77 100644 --- a/server/schedule/operator/influence.go +++ b/server/schedule/operator/influence.go @@ -80,7 +80,7 @@ func (s *StoreInfluence) addStepCost(limitType storelimit.Type, cost int64) { func (s *StoreInfluence) AdjustStepCost(limitType storelimit.Type, regionSize int64) { if regionSize > storelimit.SmallRegionThreshold { s.addStepCost(limitType, storelimit.RegionInfluence[limitType]) - } else if regionSize <= storelimit.SmallRegionThreshold && regionSize > core.EmptyRegionApproximateSize { + } else if regionSize > core.EmptyRegionApproximateSize { s.addStepCost(limitType, storelimit.SmallRegionInfluence[limitType]) } } diff --git a/server/schedule/operator/step.go b/server/schedule/operator/step.go index b70cee00299..18b113589dd 100644 --- a/server/schedule/operator/step.go +++ b/server/schedule/operator/step.go @@ -220,12 +220,8 @@ func (pl PromoteLearner) Influence(opInfluence OpInfluence, region *core.RegionI // RemovePeer is an OpStep that removes a region peer. type RemovePeer struct { -<<<<<<< HEAD - FromStore uint64 -======= - FromStore, PeerID uint64 - IsDownStore bool ->>>>>>> 1a7caa95c (schedule: not limit remove peer of the down store (#4097)) + FromStore uint64 + IsDownStore bool } // ConfVerChanged returns true if the conf version has been changed by this step @@ -257,9 +253,9 @@ func (rp RemovePeer) Influence(opInfluence OpInfluence, region *core.RegionInfo) regionSize := region.GetApproximateSize() from.RegionSize -= regionSize from.RegionCount-- - if rp.IsDownStore { - from.AdjustStepCost(storelimit.RemovePeer, storelimit.SmallRegionThreshold) - return + + if rp.IsDownStore && regionSize > storelimit.SmallRegionThreshold { + regionSize = storelimit.SmallRegionThreshold } from.AdjustStepCost(storelimit.RemovePeer, regionSize) }