From 7cf8e69f3a8e2b313b1cfcc477d5e1af7b0fffee Mon Sep 17 00:00:00 2001 From: nolouch Date: Wed, 27 Mar 2019 16:08:40 +0800 Subject: [PATCH 1/7] scheduler: fix region scatter may transfer leader to removed peer Signed-off-by: nolouch --- server/schedule/operator.go | 21 +++++++++++++++++++++ server/schedule/region_scatterer.go | 6 ++++-- server/schedulers/scheduler_test.go | 7 ++++++- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/server/schedule/operator.go b/server/schedule/operator.go index ca78345d2e2..20d713b66a5 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -630,3 +630,24 @@ func getIntersectionStores(a []*metapb.Peer, b []*metapb.Peer) map[uint64]struct return intersection } + +// CheckOperatorValid checks if the operator is valid. +func CheckOperatorValid(op *Operator) bool { + removeStores := []uint64{} + for _, step := range op.steps { + if tr, ok := step.(TransferLeader); ok { + for _, store := range removeStores { + if store == tr.FromStore { + return false + } + if store == tr.ToStore { + return false + } + } + } + if rp, ok := step.(RemovePeer); ok { + removeStores = append(removeStores, rp.FromStore) + } + } + return true +} diff --git a/server/schedule/region_scatterer.go b/server/schedule/region_scatterer.go index 22bbdb8df88..192f892166d 100644 --- a/server/schedule/region_scatterer.go +++ b/server/schedule/region_scatterer.go @@ -96,6 +96,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo) *Operator { stores := r.collectAvailableStores(region) var kind OperatorKind + newRegion := region.Clone() for _, peer := range region.GetPeers() { if len(stores) == 0 { // Reset selected stores if we have no available stores. @@ -116,13 +117,14 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo) *Operator { delete(stores, newPeer.GetStoreId()) r.selected.put(newPeer.GetStoreId()) - op, err := CreateMovePeerOperator("scatter-peer", r.cluster, region, OpAdmin, + op, err := CreateMovePeerOperator("scatter-peer", r.cluster, newRegion, OpAdmin, peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId()) if err != nil { continue } steps = append(steps, op.steps...) - steps = append(steps, TransferLeader{ToStore: newPeer.GetStoreId()}) + newRegion = newRegion.Clone(core.WithRemoveStorePeer(peer.GetStoreId()), core.WithAddPeer(newPeer)) + kind |= op.Kind() } diff --git a/server/schedulers/scheduler_test.go b/server/schedulers/scheduler_test.go index 3f90cec7243..7c9b393be25 100644 --- a/server/schedulers/scheduler_test.go +++ b/server/schedulers/scheduler_test.go @@ -174,6 +174,10 @@ func (s *testScatterRegionSuite) TestFiveStores(c *C) { s.scatter(c, 5, 5) } +func (s *testScatterRegionSuite) checkOperator(op *schedule.Operator, c *C) { + c.Assert(schedule.CheckOperatorValid(op), IsTrue) +} + func (s *testScatterRegionSuite) scatter(c *C, numStores, numRegions uint64) { opt := schedule.NewMockSchedulerOptions() tc := schedule.NewMockCluster(opt) @@ -184,7 +188,7 @@ func (s *testScatterRegionSuite) scatter(c *C, numStores, numRegions uint64) { } // Add regions 1~4. - seq := newSequencer(numStores) + seq := newSequencer(3) // Region 1 has the same distribution with the Region 2, which is used to test selectPeerToReplace. tc.AddLeaderRegion(1, 1, 2, 3) for i := uint64(2); i <= numRegions; i++ { @@ -196,6 +200,7 @@ func (s *testScatterRegionSuite) scatter(c *C, numStores, numRegions uint64) { for i := uint64(1); i <= numRegions; i++ { region := tc.GetRegion(i) if op := scatterer.Scatter(region); op != nil { + s.checkOperator(op, c) tc.ApplyOperator(op) } } From 2c3d01e23ab60c7476b1560166b4df376415280a Mon Sep 17 00:00:00 2001 From: nolouch Date: Thu, 28 Mar 2019 10:16:02 +0800 Subject: [PATCH 2/7] fix Signed-off-by: nolouch --- tests/cmd/pdctl_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cmd/pdctl_test.go b/tests/cmd/pdctl_test.go index a9fd1457502..5c135622b79 100644 --- a/tests/cmd/pdctl_test.go +++ b/tests/cmd/pdctl_test.go @@ -1041,7 +1041,6 @@ func (s *cmdTestSuite) TestOperator(c *C) { args = []string{"-u", pdAddr, "operator", "show", "region"} _, output, err = executeCommandC(cmd, args...) c.Assert(err, IsNil) - c.Assert(strings.Contains(string(output), "transfer leader from store 0 to store 3"), IsTrue) } func (s *cmdTestSuite) TestMember(c *C) { From 69579a76facc435201a25436a6b953f77c96a0fa Mon Sep 17 00:00:00 2001 From: nolouch Date: Thu, 28 Mar 2019 14:42:06 +0800 Subject: [PATCH 3/7] --wip-- [skip ci] --- server/schedulers/scheduler_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/schedulers/scheduler_test.go b/server/schedulers/scheduler_test.go index 7c9b393be25..972b977392d 100644 --- a/server/schedulers/scheduler_test.go +++ b/server/schedulers/scheduler_test.go @@ -14,6 +14,8 @@ package schedulers import ( + "fmt" + . "github.com/pingcap/check" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/pd/pkg/testutil" @@ -199,7 +201,9 @@ func (s *testScatterRegionSuite) scatter(c *C, numStores, numRegions uint64) { for i := uint64(1); i <= numRegions; i++ { region := tc.GetRegion(i) + fmt.Printf("scatter region: %+v\n", region.GetMeta()) if op := scatterer.Scatter(region); op != nil { + fmt.Printf("got scatter operator: %s\n", op) s.checkOperator(op, c) tc.ApplyOperator(op) } From 2f0fb456000e7b27720183fbae2acb676c2953a7 Mon Sep 17 00:00:00 2001 From: nolouch Date: Mon, 1 Apr 2019 20:05:38 +0800 Subject: [PATCH 4/7] *: random pick leader Signed-off-by: nolouch --- server/grpc_service.go | 7 +- server/handler.go | 6 +- server/schedule/mockcluster.go | 3 + server/schedule/operator.go | 20 ++++-- server/schedule/region_scatterer.go | 101 ++++++++++++++++++++++------ server/schedulers/scheduler_test.go | 2 +- tests/cmd/pdctl_test.go | 1 + 7 files changed, 111 insertions(+), 29 deletions(-) diff --git a/server/grpc_service.go b/server/grpc_service.go index b0e343e5398..94f4c58e51c 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -602,10 +602,15 @@ func (s *Server) ScatterRegion(ctx context.Context, request *pdpb.ScatterRegionR } region = core.NewRegionInfo(request.GetRegion(), request.GetLeader()) } + cluster.RLock() defer cluster.RUnlock() co := cluster.coordinator - if op := co.regionScatterer.Scatter(region); op != nil { + op, err := co.regionScatterer.Scatter(region) + if err != nil { + return nil, err + } + if op != nil { co.opController.AddOperator(op) } diff --git a/server/handler.go b/server/handler.go index 26bb3c55629..eb6cad154dc 100644 --- a/server/handler.go +++ b/server/handler.go @@ -642,7 +642,11 @@ func (h *Handler) AddScatterRegionOperator(regionID uint64) error { return ErrRegionNotFound(regionID) } - op := c.regionScatterer.Scatter(region) + op, err := c.regionScatterer.Scatter(region) + if err != nil { + return err + } + if op == nil { return nil } diff --git a/server/schedule/mockcluster.go b/server/schedule/mockcluster.go index dc0c5e50d28..271512ffc73 100644 --- a/server/schedule/mockcluster.go +++ b/server/schedule/mockcluster.go @@ -398,6 +398,9 @@ func (mc *MockCluster) ApplyOperator(op *Operator) { if region.GetStorePeer(s.FromStore) == nil { panic("Remove peer that doesn't exist") } + if region.GetLeader().GetStoreId() == s.FromStore { + panic("Cannot remove the leader peer") + } region = region.Clone(core.WithRemoveStorePeer(s.FromStore)) case AddLearner: if region.GetStorePeer(s.ToStore) != nil { diff --git a/server/schedule/operator.go b/server/schedule/operator.go index 20d713b66a5..0926dcfa901 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -436,14 +436,10 @@ func CreateRemovePeerOperator(desc string, cluster Cluster, kind OperatorKind, r return NewOperator(desc, region.GetID(), region.GetRegionEpoch(), removeKind|kind, steps...), nil } -// CreateMovePeerOperator creates an Operator that replaces an old peer with a new peer. -func CreateMovePeerOperator(desc string, cluster Cluster, region *core.RegionInfo, kind OperatorKind, oldStore, newStore uint64, peerID uint64) (*Operator, error) { - removeKind, steps, err := removePeerSteps(cluster, region, oldStore, append(getRegionFollowerIDs(region), newStore)) - if err != nil { - return nil, err - } +// CreateMovePeerOperator creates an OperatorStep list that add a new Peer. +func CreateAddPeerSteps(newStore uint64, peerID uint64, isRaftLearnerEnabled bool) []OperatorStep { var st []OperatorStep - if cluster.IsRaftLearnerEnabled() { + if isRaftLearnerEnabled { st = []OperatorStep{ AddLearner{ToStore: newStore, PeerID: peerID}, PromoteLearner{ToStore: newStore, PeerID: peerID}, @@ -453,6 +449,16 @@ func CreateMovePeerOperator(desc string, cluster Cluster, region *core.RegionInf AddPeer{ToStore: newStore, PeerID: peerID}, } } + return st +} + +// CreateMovePeerOperator creates an Operator that replaces an old peer with a new peer. +func CreateMovePeerOperator(desc string, cluster Cluster, region *core.RegionInfo, kind OperatorKind, oldStore, newStore uint64, peerID uint64) (*Operator, error) { + removeKind, steps, err := removePeerSteps(cluster, region, oldStore, append(getRegionFollowerIDs(region), newStore)) + if err != nil { + return nil, err + } + st := CreateAddPeerSteps(newStore, peerID, cluster.IsRaftLearnerEnabled()) steps = append(st, steps...) return NewOperator(desc, region.GetID(), region.GetRegionEpoch(), removeKind|kind|OpRegion, steps...), nil } diff --git a/server/schedule/region_scatterer.go b/server/schedule/region_scatterer.go index 192f892166d..1b315dfa80c 100644 --- a/server/schedule/region_scatterer.go +++ b/server/schedule/region_scatterer.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/pd/server/core" "github.com/pingcap/pd/server/namespace" + "github.com/pkg/errors" ) type selectedStores struct { @@ -79,24 +80,28 @@ func NewRegionScatterer(cluster Cluster, classifier namespace.Classifier) *Regio } // Scatter relocates the region. -func (r *RegionScatterer) Scatter(region *core.RegionInfo) *Operator { +func (r *RegionScatterer) Scatter(region *core.RegionInfo) (*Operator, error) { if r.cluster.IsRegionHot(region.GetID()) { - return nil + return nil, errors.Errorf("region %d is a hot region", region.GetID()) } if len(region.GetPeers()) != r.cluster.GetMaxReplicas() { - return nil + return nil, errors.Errorf("the number replicas of region %d is not expected", region.GetID()) + } + + if region.GetLeader() == nil { + return nil, errors.Errorf("region %d has no leader", region.GetID()) } - return r.scatterRegion(region) + return r.scatterRegion(region), nil } func (r *RegionScatterer) scatterRegion(region *core.RegionInfo) *Operator { - steps := make([]OperatorStep, 0, len(region.GetPeers())) - stores := r.collectAvailableStores(region) - var kind OperatorKind - newRegion := region.Clone() + var ( + targetPeers []*metapb.Peer + replacedPeers []*metapb.Peer + ) for _, peer := range region.GetPeers() { if len(stores) == 0 { // Reset selected stores if we have no available stores. @@ -106,32 +111,90 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo) *Operator { if r.selected.put(peer.GetStoreId()) { delete(stores, peer.GetStoreId()) + targetPeers = append(targetPeers, peer) + replacedPeers = append(replacedPeers, peer) continue } newPeer := r.selectPeerToReplace(stores, region, peer) if newPeer == nil { + targetPeers = append(targetPeers, peer) + replacedPeers = append(replacedPeers, peer) continue } - // Remove it from stores and mark it as selected. delete(stores, newPeer.GetStoreId()) r.selected.put(newPeer.GetStoreId()) + targetPeers = append(targetPeers, newPeer) + replacedPeers = append(replacedPeers, peer) + } + return r.createOperator(region, replacedPeers, targetPeers) +} - op, err := CreateMovePeerOperator("scatter-peer", r.cluster, newRegion, OpAdmin, - peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId()) - if err != nil { - continue +func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, targetPeers []*metapb.Peer) *Operator { + // random pick a leader + i := rand.Intn(len(targetPeers)) + targetLeaderPeer := targetPeers[i] + + storeIDs := origin.GetStoreIds() + steps := make([]OperatorStep, 0, len(targetPeers)*2+1) + deferSteps := make([]OperatorStep, 0, 2) + var kind OperatorKind + // no need to do anything + sameLeader := targetLeaderPeer.GetStoreId() == origin.GetLeader().GetStoreId() + if sameLeader { + isSame := true + for _, peer := range targetPeers { + if _, ok := storeIDs[peer.GetStoreId()]; !ok { + isSame = false + break + } } - steps = append(steps, op.steps...) - newRegion = newRegion.Clone(core.WithRemoveStorePeer(peer.GetStoreId()), core.WithAddPeer(newPeer)) + if isSame { + return nil + } + } - kind |= op.Kind() + // create first step + if _, ok := storeIDs[targetLeaderPeer.GetStoreId()]; !ok { + st := CreateAddPeerSteps(targetLeaderPeer.GetStoreId(), targetLeaderPeer.GetId(), r.cluster.IsRaftLearnerEnabled()) + steps = append(steps, st...) + // do not transfer leader to newly added peer + deferSteps = append(deferSteps, TransferLeader{FromStore: origin.GetLeader().GetStoreId(), ToStore: targetLeaderPeer.GetStoreId()}) + deferSteps = append(deferSteps, RemovePeer{FromStore: replacedPeers[i].GetStoreId()}) + kind |= OpLeader + kind |= OpRegion + } else { + if !sameLeader { + steps = append(steps, TransferLeader{FromStore: origin.GetLeader().GetStoreId(), ToStore: targetLeaderPeer.GetStoreId()}) + kind |= OpLeader + } } - if len(steps) == 0 { - return nil + // for the other steps + for j, peer := range targetPeers { + if peer.GetId() == targetLeaderPeer.GetId() { + continue + } + if _, ok := storeIDs[peer.GetStoreId()]; ok { + continue + } + if replacedPeers[j].GetStoreId() == origin.GetLeader().GetStoreId() { + st := CreateAddPeerSteps(peer.GetStoreId(), peer.GetId(), r.cluster.IsRaftLearnerEnabled()) + st = append(st, RemovePeer{FromStore: replacedPeers[j].GetStoreId()}) + deferSteps = append(deferSteps, st...) + kind |= OpRegion | OpLeader + continue + } + st := CreateAddPeerSteps(peer.GetStoreId(), peer.GetId(), r.cluster.IsRaftLearnerEnabled()) + steps = append(steps, st...) + steps = append(steps, RemovePeer{FromStore: replacedPeers[j].GetStoreId()}) + kind |= OpRegion } - return NewOperator("scatter-region", region.GetID(), region.GetRegionEpoch(), kind, steps...) + + steps = append(steps, deferSteps...) + op := NewOperator("scatter-region", origin.GetID(), origin.GetRegionEpoch(), kind, steps...) + op.SetPriorityLevel(core.HighPriority) + return op } func (r *RegionScatterer) selectPeerToReplace(stores map[uint64]*core.StoreInfo, region *core.RegionInfo, oldPeer *metapb.Peer) *metapb.Peer { diff --git a/server/schedulers/scheduler_test.go b/server/schedulers/scheduler_test.go index 972b977392d..4432466f989 100644 --- a/server/schedulers/scheduler_test.go +++ b/server/schedulers/scheduler_test.go @@ -202,7 +202,7 @@ func (s *testScatterRegionSuite) scatter(c *C, numStores, numRegions uint64) { for i := uint64(1); i <= numRegions; i++ { region := tc.GetRegion(i) fmt.Printf("scatter region: %+v\n", region.GetMeta()) - if op := scatterer.Scatter(region); op != nil { + if op, _ := scatterer.Scatter(region); op != nil { fmt.Printf("got scatter operator: %s\n", op) s.checkOperator(op, c) tc.ApplyOperator(op) diff --git a/tests/cmd/pdctl_test.go b/tests/cmd/pdctl_test.go index 5c135622b79..f556ed88c43 100644 --- a/tests/cmd/pdctl_test.go +++ b/tests/cmd/pdctl_test.go @@ -1041,6 +1041,7 @@ func (s *cmdTestSuite) TestOperator(c *C) { args = []string{"-u", pdAddr, "operator", "show", "region"} _, output, err = executeCommandC(cmd, args...) c.Assert(err, IsNil) + c.Assert(strings.Contains(string(output), "scatter-region"), IsTrue) } func (s *cmdTestSuite) TestMember(c *C) { From 3ce5b2d7b27bfee00bac28e3e1ee5eea7a00e2ee Mon Sep 17 00:00:00 2001 From: nolouch Date: Mon, 1 Apr 2019 20:13:01 +0800 Subject: [PATCH 5/7] clean up Signed-off-by: nolouch --- server/schedule/region_scatterer.go | 10 +++++----- server/schedulers/scheduler_test.go | 4 ---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/server/schedule/region_scatterer.go b/server/schedule/region_scatterer.go index 1b315dfa80c..0c652fcf4bd 100644 --- a/server/schedule/region_scatterer.go +++ b/server/schedule/region_scatterer.go @@ -131,7 +131,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo) *Operator { } func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, targetPeers []*metapb.Peer) *Operator { - // random pick a leader + // Randomly pick a leader i := rand.Intn(len(targetPeers)) targetLeaderPeer := targetPeers[i] @@ -139,8 +139,8 @@ func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, steps := make([]OperatorStep, 0, len(targetPeers)*2+1) deferSteps := make([]OperatorStep, 0, 2) var kind OperatorKind - // no need to do anything sameLeader := targetLeaderPeer.GetStoreId() == origin.GetLeader().GetStoreId() + // No need to do anything if sameLeader { isSame := true for _, peer := range targetPeers { @@ -154,11 +154,11 @@ func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, } } - // create first step + // Creates the first step if _, ok := storeIDs[targetLeaderPeer.GetStoreId()]; !ok { st := CreateAddPeerSteps(targetLeaderPeer.GetStoreId(), targetLeaderPeer.GetId(), r.cluster.IsRaftLearnerEnabled()) steps = append(steps, st...) - // do not transfer leader to newly added peer + // Do not transfer leader to the newly added peer deferSteps = append(deferSteps, TransferLeader{FromStore: origin.GetLeader().GetStoreId(), ToStore: targetLeaderPeer.GetStoreId()}) deferSteps = append(deferSteps, RemovePeer{FromStore: replacedPeers[i].GetStoreId()}) kind |= OpLeader @@ -170,7 +170,7 @@ func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, } } - // for the other steps + // For the other steps for j, peer := range targetPeers { if peer.GetId() == targetLeaderPeer.GetId() { continue diff --git a/server/schedulers/scheduler_test.go b/server/schedulers/scheduler_test.go index 4432466f989..c398ea0455b 100644 --- a/server/schedulers/scheduler_test.go +++ b/server/schedulers/scheduler_test.go @@ -14,8 +14,6 @@ package schedulers import ( - "fmt" - . "github.com/pingcap/check" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/pd/pkg/testutil" @@ -201,9 +199,7 @@ func (s *testScatterRegionSuite) scatter(c *C, numStores, numRegions uint64) { for i := uint64(1); i <= numRegions; i++ { region := tc.GetRegion(i) - fmt.Printf("scatter region: %+v\n", region.GetMeta()) if op, _ := scatterer.Scatter(region); op != nil { - fmt.Printf("got scatter operator: %s\n", op) s.checkOperator(op, c) tc.ApplyOperator(op) } From 4bf7f45633e0ea11214356aa2086ed4b4acdd70d Mon Sep 17 00:00:00 2001 From: nolouch Date: Mon, 1 Apr 2019 20:23:17 +0800 Subject: [PATCH 6/7] fix Signed-off-by: nolouch --- server/schedule/operator.go | 8 ++++---- server/schedule/region_scatterer.go | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/server/schedule/operator.go b/server/schedule/operator.go index 0926dcfa901..b2ad340b1bc 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -436,10 +436,10 @@ func CreateRemovePeerOperator(desc string, cluster Cluster, kind OperatorKind, r return NewOperator(desc, region.GetID(), region.GetRegionEpoch(), removeKind|kind, steps...), nil } -// CreateMovePeerOperator creates an OperatorStep list that add a new Peer. -func CreateAddPeerSteps(newStore uint64, peerID uint64, isRaftLearnerEnabled bool) []OperatorStep { +// CreateAddPeerSteps creates an OperatorStep list that add a new Peer. +func CreateAddPeerSteps(newStore uint64, peerID uint64, cluster Cluster) []OperatorStep { var st []OperatorStep - if isRaftLearnerEnabled { + if cluster.IsRaftLearnerEnabled() { st = []OperatorStep{ AddLearner{ToStore: newStore, PeerID: peerID}, PromoteLearner{ToStore: newStore, PeerID: peerID}, @@ -458,7 +458,7 @@ func CreateMovePeerOperator(desc string, cluster Cluster, region *core.RegionInf if err != nil { return nil, err } - st := CreateAddPeerSteps(newStore, peerID, cluster.IsRaftLearnerEnabled()) + st := CreateAddPeerSteps(newStore, peerID, cluster) steps = append(st, steps...) return NewOperator(desc, region.GetID(), region.GetRegionEpoch(), removeKind|kind|OpRegion, steps...), nil } diff --git a/server/schedule/region_scatterer.go b/server/schedule/region_scatterer.go index 0c652fcf4bd..2d3d6fe9a60 100644 --- a/server/schedule/region_scatterer.go +++ b/server/schedule/region_scatterer.go @@ -156,7 +156,7 @@ func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, // Creates the first step if _, ok := storeIDs[targetLeaderPeer.GetStoreId()]; !ok { - st := CreateAddPeerSteps(targetLeaderPeer.GetStoreId(), targetLeaderPeer.GetId(), r.cluster.IsRaftLearnerEnabled()) + st := CreateAddPeerSteps(targetLeaderPeer.GetStoreId(), targetLeaderPeer.GetId(), r.cluster) steps = append(steps, st...) // Do not transfer leader to the newly added peer deferSteps = append(deferSteps, TransferLeader{FromStore: origin.GetLeader().GetStoreId(), ToStore: targetLeaderPeer.GetStoreId()}) @@ -179,13 +179,13 @@ func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, continue } if replacedPeers[j].GetStoreId() == origin.GetLeader().GetStoreId() { - st := CreateAddPeerSteps(peer.GetStoreId(), peer.GetId(), r.cluster.IsRaftLearnerEnabled()) + st := CreateAddPeerSteps(peer.GetStoreId(), peer.GetId(), r.cluster) st = append(st, RemovePeer{FromStore: replacedPeers[j].GetStoreId()}) deferSteps = append(deferSteps, st...) kind |= OpRegion | OpLeader continue } - st := CreateAddPeerSteps(peer.GetStoreId(), peer.GetId(), r.cluster.IsRaftLearnerEnabled()) + st := CreateAddPeerSteps(peer.GetStoreId(), peer.GetId(), r.cluster) steps = append(steps, st...) steps = append(steps, RemovePeer{FromStore: replacedPeers[j].GetStoreId()}) kind |= OpRegion From cd8969c21c5fecb7daa28bc042ef855eddb97d1f Mon Sep 17 00:00:00 2001 From: nolouch Date: Tue, 2 Apr 2019 14:12:50 +0800 Subject: [PATCH 7/7] address comments Signed-off-by: nolouch --- server/schedule/region_scatterer.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/server/schedule/region_scatterer.go b/server/schedule/region_scatterer.go index 2d3d6fe9a60..8e205739ad7 100644 --- a/server/schedule/region_scatterer.go +++ b/server/schedule/region_scatterer.go @@ -134,17 +134,19 @@ func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, // Randomly pick a leader i := rand.Intn(len(targetPeers)) targetLeaderPeer := targetPeers[i] + originLeaderStoreID := origin.GetLeader().GetStoreId() - storeIDs := origin.GetStoreIds() - steps := make([]OperatorStep, 0, len(targetPeers)*2+1) - deferSteps := make([]OperatorStep, 0, 2) + originStoreIDs := origin.GetStoreIds() + steps := make([]OperatorStep, 0, len(targetPeers)*3+1) + // deferSteps will append to the end of the steps + deferSteps := make([]OperatorStep, 0, 5) var kind OperatorKind - sameLeader := targetLeaderPeer.GetStoreId() == origin.GetLeader().GetStoreId() + sameLeader := targetLeaderPeer.GetStoreId() == originLeaderStoreID // No need to do anything if sameLeader { isSame := true for _, peer := range targetPeers { - if _, ok := storeIDs[peer.GetStoreId()]; !ok { + if _, ok := originStoreIDs[peer.GetStoreId()]; !ok { isSame = false break } @@ -155,17 +157,18 @@ func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, } // Creates the first step - if _, ok := storeIDs[targetLeaderPeer.GetStoreId()]; !ok { + if _, ok := originStoreIDs[targetLeaderPeer.GetStoreId()]; !ok { st := CreateAddPeerSteps(targetLeaderPeer.GetStoreId(), targetLeaderPeer.GetId(), r.cluster) steps = append(steps, st...) // Do not transfer leader to the newly added peer - deferSteps = append(deferSteps, TransferLeader{FromStore: origin.GetLeader().GetStoreId(), ToStore: targetLeaderPeer.GetStoreId()}) + // Ref: https://github.com/tikv/tikv/issues/3819 + deferSteps = append(deferSteps, TransferLeader{FromStore: originLeaderStoreID, ToStore: targetLeaderPeer.GetStoreId()}) deferSteps = append(deferSteps, RemovePeer{FromStore: replacedPeers[i].GetStoreId()}) kind |= OpLeader kind |= OpRegion } else { if !sameLeader { - steps = append(steps, TransferLeader{FromStore: origin.GetLeader().GetStoreId(), ToStore: targetLeaderPeer.GetStoreId()}) + steps = append(steps, TransferLeader{FromStore: originLeaderStoreID, ToStore: targetLeaderPeer.GetStoreId()}) kind |= OpLeader } } @@ -175,10 +178,10 @@ func (r *RegionScatterer) createOperator(origin *core.RegionInfo, replacedPeers, if peer.GetId() == targetLeaderPeer.GetId() { continue } - if _, ok := storeIDs[peer.GetStoreId()]; ok { + if _, ok := originStoreIDs[peer.GetStoreId()]; ok { continue } - if replacedPeers[j].GetStoreId() == origin.GetLeader().GetStoreId() { + if replacedPeers[j].GetStoreId() == originLeaderStoreID { st := CreateAddPeerSteps(peer.GetStoreId(), peer.GetId(), r.cluster) st = append(st, RemovePeer{FromStore: replacedPeers[j].GetStoreId()}) deferSteps = append(deferSteps, st...)