From 0e8155a565ea15f9d7aa8a4c411e69e09bf5cda2 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 13 Oct 2022 14:43:51 +0800 Subject: [PATCH 01/67] schedule: fix priority bucket number (#5594) close tikv/pd#5595 Signed-off-by: Ryan Leung --- server/schedule/waiting_operator.go | 2 +- server/schedule/waiting_operator_test.go | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/server/schedule/waiting_operator.go b/server/schedule/waiting_operator.go index e10848bd4625..513f8edea1ae 100644 --- a/server/schedule/waiting_operator.go +++ b/server/schedule/waiting_operator.go @@ -22,7 +22,7 @@ import ( ) // PriorityWeight is used to represent the weight of different priorities of operators. -var PriorityWeight = []float64{1.0, 4.0, 9.0} +var PriorityWeight = []float64{1.0, 4.0, 9.0, 16.0} // WaitingOperator is an interface of waiting operators. type WaitingOperator interface { diff --git a/server/schedule/waiting_operator_test.go b/server/schedule/waiting_operator_test.go index 256af2264df6..8c639c1568ca 100644 --- a/server/schedule/waiting_operator_test.go +++ b/server/schedule/waiting_operator_test.go @@ -27,7 +27,7 @@ func TestRandBuckets(t *testing.T) { re := require.New(t) rb := NewRandBuckets() addOperators(rb) - for i := 0; i < 3; i++ { + for i := 0; i < len(PriorityWeight); i++ { op := rb.GetOperator() re.NotNil(op) } @@ -38,6 +38,7 @@ func addOperators(wop WaitingOperator) { op := operator.NewTestOperator(uint64(1), &metapb.RegionEpoch{}, operator.OpRegion, []operator.OpStep{ operator.RemovePeer{FromStore: uint64(1)}, }...) + op.SetPriorityLevel(core.Medium) wop.PutOperator(op) op = operator.NewTestOperator(uint64(2), &metapb.RegionEpoch{}, operator.OpRegion, []operator.OpStep{ operator.RemovePeer{FromStore: uint64(2)}, @@ -49,13 +50,18 @@ func addOperators(wop WaitingOperator) { }...) op.SetPriorityLevel(core.Low) wop.PutOperator(op) + op = operator.NewTestOperator(uint64(4), &metapb.RegionEpoch{}, operator.OpRegion, []operator.OpStep{ + operator.RemovePeer{FromStore: uint64(4)}, + }...) + op.SetPriorityLevel(core.Urgent) + wop.PutOperator(op) } func TestListOperator(t *testing.T) { re := require.New(t) rb := NewRandBuckets() addOperators(rb) - re.Len(rb.ListOperator(), 3) + re.Len(rb.ListOperator(), len(PriorityWeight)) } func TestRandomBucketsWithMergeRegion(t *testing.T) { From d70a685d3c4b144fae6d23d54a72a69393dfca8f Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Thu, 13 Oct 2022 17:11:53 +0800 Subject: [PATCH 02/67] schdeuler: Set resource with step for collecting plan (#5591) ref tikv/pd#5539 Signed-off-by: Cabinfever_B Co-authored-by: Ti Chi Robot --- server/schedule/plan/plan.go | 11 +++++++++++ server/schedulers/balance_plan.go | 5 +++++ server/schedulers/balance_plan_test.go | 24 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/server/schedule/plan/plan.go b/server/schedule/plan/plan.go index 2cc0607187f8..fcd5102012c8 100644 --- a/server/schedule/plan/plan.go +++ b/server/schedule/plan/plan.go @@ -23,6 +23,10 @@ type Plan interface { Clone(ops ...Option) Plan // generate plan for clone option SetResource(interface{}) + // SetResourceWithStep is used to set resource for specific step. + // The meaning of step is different for different plans. + // Such as balancePlan, pickSource = 0, pickRegion = 1, pickTarget = 2 + SetResourceWithStep(resource interface{}, step int) SetStatus(*Status) } @@ -83,3 +87,10 @@ func SetResource(resource interface{}) Option { plan.SetResource(resource) } } + +// SetResourceWithStep is used to generate Resource for plan +func SetResourceWithStep(resource interface{}, step int) Option { + return func(plan Plan) { + plan.SetResourceWithStep(resource, step) + } +} diff --git a/server/schedulers/balance_plan.go b/server/schedulers/balance_plan.go index 306c0e52a95d..421b24ab9acf 100644 --- a/server/schedulers/balance_plan.go +++ b/server/schedulers/balance_plan.go @@ -63,6 +63,11 @@ func (p *balanceSchedulerPlan) SetResource(resource interface{}) { } } +func (p *balanceSchedulerPlan) SetResourceWithStep(resource interface{}, step int) { + p.step = step + p.SetResource(resource) +} + func (p *balanceSchedulerPlan) GetResource(step int) uint64 { if p.step < step { return 0 diff --git a/server/schedulers/balance_plan_test.go b/server/schedulers/balance_plan_test.go index 6e6debffdef4..266dc22b60c8 100644 --- a/server/schedulers/balance_plan_test.go +++ b/server/schedulers/balance_plan_test.go @@ -248,3 +248,27 @@ func (suite *balanceSchedulerPlanAnalyzeTestSuite) TestAnalyzerResult5() { 5: plan.NewStatus(plan.StatusCreateOperatorFailed), })) } + +func (suite *balanceSchedulerPlanAnalyzeTestSuite) TestAnalyzerResult6() { + basePlan := NewBalanceSchedulerPlan() + collector := plan.NewCollector(basePlan) + collector.Collect(plan.SetResourceWithStep(suite.stores[0], 2), plan.SetStatus(plan.NewStatus(plan.StatusStoreDown))) + collector.Collect(plan.SetResourceWithStep(suite.stores[1], 2), plan.SetStatus(plan.NewStatus(plan.StatusStoreDown))) + collector.Collect(plan.SetResourceWithStep(suite.stores[2], 2), plan.SetStatus(plan.NewStatus(plan.StatusStoreDown))) + collector.Collect(plan.SetResourceWithStep(suite.stores[3], 2), plan.SetStatus(plan.NewStatus(plan.StatusStoreDown))) + collector.Collect(plan.SetResourceWithStep(suite.stores[4], 2), plan.SetStatus(plan.NewStatus(plan.StatusStoreDown))) + basePlan.source = suite.stores[0] + basePlan.step++ + collector.Collect(plan.SetResource(suite.regions[0]), plan.SetStatus(plan.NewStatus(plan.StatusRegionNoLeader))) + statuses, isNormal, err := BalancePlanSummary(collector.GetPlans()) + suite.NoError(err) + suite.False(isNormal) + suite.True(suite.check(statuses, + map[uint64]*plan.Status{ + 1: plan.NewStatus(plan.StatusStoreDown), + 2: plan.NewStatus(plan.StatusStoreDown), + 3: plan.NewStatus(plan.StatusStoreDown), + 4: plan.NewStatus(plan.StatusStoreDown), + 5: plan.NewStatus(plan.StatusStoreDown), + })) +} From 0a5b7b66d7a6e9bb85bba6cc189e36ad12149749 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 14 Oct 2022 14:47:51 +0800 Subject: [PATCH 03/67] simulator: the operator should share one limiter. (#5323) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/pd#5322 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: 混沌DM --- tools/pd-simulator/simulator/node.go | 9 +- tools/pd-simulator/simulator/raft.go | 2 +- tools/pd-simulator/simulator/task.go | 210 ++++++++++++++++----------- 3 files changed, 132 insertions(+), 89 deletions(-) diff --git a/tools/pd-simulator/simulator/node.go b/tools/pd-simulator/simulator/node.go index c2e904dd85df..a6a9c4787351 100644 --- a/tools/pd-simulator/simulator/node.go +++ b/tools/pd-simulator/simulator/node.go @@ -24,6 +24,7 @@ import ( "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/tikv/pd/pkg/ratelimit" "github.com/tikv/pd/tools/pd-simulator/simulator/cases" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" @@ -48,7 +49,7 @@ type Node struct { ctx context.Context cancel context.CancelFunc raftEngine *RaftEngine - ioRate int64 + limiter *ratelimit.RateLimiter sizeMutex sync.Mutex } @@ -90,6 +91,8 @@ func NewNode(s *cases.Store, pdAddr string, config *SimConfig) (*Node, error) { cancel() return nil, err } + ratio := int64(time.Second) / config.SimTickInterval.Milliseconds() + speed := config.StoreIOMBPerSecond * units.MiB * ratio return &Node{ Store: store, stats: stats, @@ -98,7 +101,7 @@ func NewNode(s *cases.Store, pdAddr string, config *SimConfig) (*Node, error) { cancel: cancel, tasks: make(map[uint64]Task), receiveRegionHeartbeatCh: receiveRegionHeartbeatCh, - ioRate: config.StoreIOMBPerSecond * units.MiB, + limiter: ratelimit.NewRateLimiter(float64(speed), int(speed)), tick: uint64(rand.Intn(storeHeartBeatPeriod)), }, nil } @@ -155,7 +158,7 @@ func (n *Node) stepTask() { for _, task := range n.tasks { task.Step(n.raftEngine) if task.IsFinished() { - simutil.Logger.Debug("task finished", + simutil.Logger.Debug("task status", zap.Uint64("node-id", n.Id), zap.Uint64("region-id", task.RegionID()), zap.String("task", task.Desc())) diff --git a/tools/pd-simulator/simulator/raft.go b/tools/pd-simulator/simulator/raft.go index 76c8587431c9..cfd1e8719f03 100644 --- a/tools/pd-simulator/simulator/raft.go +++ b/tools/pd-simulator/simulator/raft.go @@ -139,7 +139,7 @@ func (r *RaftEngine) stepSplit(region *core.RegionInfo) { if r.useTiDBEncodedKey { splitKey, err = simutil.GenerateTiDBEncodedSplitKey(region.GetStartKey(), region.GetEndKey()) if err != nil { - simutil.Logger.Fatal("generate TiDB encoded split key failed", zap.Error(err)) + simutil.Logger.Fatal("Generate TiDB encoded split key failed", zap.Error(err)) } } else { splitKey = simutil.GenerateSplitKey(region.GetStartKey(), region.GetEndKey()) diff --git a/tools/pd-simulator/simulator/task.go b/tools/pd-simulator/simulator/task.go index e502b0be40d3..083d8b6774c8 100644 --- a/tools/pd-simulator/simulator/task.go +++ b/tools/pd-simulator/simulator/task.go @@ -17,7 +17,9 @@ package simulator import ( "bytes" "fmt" + "time" + "github.com/docker/go-units" "github.com/pingcap/kvproto/pkg/eraftpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" @@ -25,6 +27,28 @@ import ( "github.com/tikv/pd/tools/pd-analysis/analysis" ) +var ( + chunkSize = int64(4 * units.KiB) + maxSnapGeneratorPoolSize = uint32(2) + maxSnapReceivePoolSize = uint32(4) + compressionRatio = int64(2) +) + +type snapAction int + +const ( + generate = iota + receive +) + +type snapStatus int + +const ( + pending snapStatus = iota + running + finished +) + // Task running in node. type Task interface { Desc() string @@ -45,14 +69,8 @@ func responseToTask(resp *pdpb.RegionHeartbeatResponse, r *RaftEngine) Task { case eraftpb.ConfChangeType_AddNode: return &addPeer{ regionID: regionID, - size: region.GetApproximateSize(), - keys: region.GetApproximateKeys(), - speed: 100 * 1000 * 1000, epoch: epoch, peer: changePeer.GetPeer(), - // This two variables are used to simulate sending and receiving snapshot processes. - sendingStat: &snapshotStat{"sending", region.GetApproximateSize(), false}, - receivingStat: &snapshotStat{"receiving", region.GetApproximateSize(), false}, } case eraftpb.ConfChangeType_RemoveNode: return &removePeer{ @@ -68,9 +86,11 @@ func responseToTask(resp *pdpb.RegionHeartbeatResponse, r *RaftEngine) Task { regionID: regionID, size: region.GetApproximateSize(), keys: region.GetApproximateKeys(), - speed: 100 * 1000 * 1000, epoch: epoch, peer: changePeer.GetPeer(), + // This two variables are used to simulate sending and receiving snapshot processes. + sendingStat: newSnapshotState(region.GetApproximateSize(), generate), + receivingStat: newSnapshotState(region.GetApproximateSize(), receive), } } } else if resp.GetTransferLeader() != nil { @@ -94,9 +114,22 @@ func responseToTask(resp *pdpb.RegionHeartbeatResponse, r *RaftEngine) Task { } type snapshotStat struct { - kind string + action snapAction remainSize int64 - finished bool + status snapStatus + start time.Time +} + +func newSnapshotState(size int64, action snapAction) *snapshotStat { + if action == receive { + size /= compressionRatio + } + return &snapshotStat{ + remainSize: size, + action: action, + status: pending, + start: time.Now(), + } } type mergeRegion struct { @@ -209,15 +242,10 @@ func (t *transferLeader) IsFinished() bool { } type addPeer struct { - regionID uint64 - size int64 - keys int64 - speed int64 - epoch *metapb.RegionEpoch - peer *metapb.Peer - finished bool - sendingStat *snapshotStat - receivingStat *snapshotStat + regionID uint64 + epoch *metapb.RegionEpoch + peer *metapb.Peer + finished bool } func (a *addPeer) Desc() string { @@ -234,44 +262,19 @@ func (a *addPeer) Step(r *RaftEngine) { return } - snapshotSize := region.GetApproximateSize() - sendNode := r.conn.Nodes[region.GetLeader().GetStoreId()] - if sendNode == nil { - a.finished = true - return - } - if !processSnapshot(sendNode, a.sendingStat, snapshotSize) { - return - } - r.schedulerStats.snapshotStats.incSendSnapshot(sendNode.Id) - - recvNode := r.conn.Nodes[a.peer.GetStoreId()] - if recvNode == nil { - a.finished = true - return - } - if !processSnapshot(recvNode, a.receivingStat, snapshotSize) { - return - } - r.schedulerStats.snapshotStats.incReceiveSnapshot(recvNode.Id) - - a.size -= a.speed - if a.size < 0 { - var opts []core.RegionCreateOption - if region.GetPeer(a.peer.GetId()) == nil { - opts = append(opts, core.WithAddPeer(a.peer)) - r.schedulerStats.taskStats.incAddPeer(region.GetID()) - } else { - opts = append(opts, core.WithPromoteLearner(a.peer.GetId())) - r.schedulerStats.taskStats.incPromoteLeaner(region.GetID()) - } - opts = append(opts, core.WithIncConfVer()) - newRegion := region.Clone(opts...) - r.SetRegion(newRegion) - r.recordRegionChange(newRegion) - recvNode.incUsedSize(uint64(snapshotSize)) - a.finished = true + var opts []core.RegionCreateOption + if region.GetPeer(a.peer.GetId()) == nil { + opts = append(opts, core.WithAddPeer(a.peer)) + r.schedulerStats.taskStats.incAddPeer(region.GetID()) + } else { + opts = append(opts, core.WithPromoteLearner(a.peer.GetId())) + r.schedulerStats.taskStats.incPromoteLeaner(region.GetID()) } + opts = append(opts, core.WithIncConfVer()) + newRegion := region.Clone(opts...) + r.SetRegion(newRegion) + r.recordRegionChange(newRegion) + a.finished = true } func (a *addPeer) RegionID() uint64 { @@ -352,13 +355,14 @@ func (a *removePeer) IsFinished() bool { } type addLearner struct { - regionID uint64 - size int64 - keys int64 - speed int64 - epoch *metapb.RegionEpoch - peer *metapb.Peer - finished bool + regionID uint64 + size int64 + keys int64 + epoch *metapb.RegionEpoch + peer *metapb.Peer + finished bool + sendingStat *snapshotStat + receivingStat *snapshotStat } func (a *addLearner) Desc() string { @@ -375,21 +379,41 @@ func (a *addLearner) Step(r *RaftEngine) { return } - a.size -= a.speed - if a.size < 0 { - if region.GetPeer(a.peer.GetId()) == nil { - newRegion := region.Clone( - core.WithAddPeer(a.peer), - core.WithIncConfVer(), - ) - r.SetRegion(newRegion) - r.recordRegionChange(newRegion) - r.schedulerStats.taskStats.incAddLeaner(region.GetID()) - } + snapshotSize := region.GetApproximateSize() + sendNode := r.conn.Nodes[region.GetLeader().GetStoreId()] + if sendNode == nil { a.finished = true - if analysis.GetTransferCounter().IsValid { - analysis.GetTransferCounter().AddTarget(a.regionID, a.peer.StoreId) - } + return + } + if !processSnapshot(sendNode, a.sendingStat) { + return + } + r.schedulerStats.snapshotStats.incSendSnapshot(sendNode.Id) + + recvNode := r.conn.Nodes[a.peer.GetStoreId()] + if recvNode == nil { + a.finished = true + return + } + if !processSnapshot(recvNode, a.receivingStat) { + return + } + r.schedulerStats.snapshotStats.incReceiveSnapshot(recvNode.Id) + + if region.GetPeer(a.peer.GetId()) == nil { + newRegion := region.Clone( + core.WithAddPeer(a.peer), + core.WithIncConfVer(), + ) + r.SetRegion(newRegion) + r.recordRegionChange(newRegion) + r.schedulerStats.taskStats.incAddLeaner(region.GetID()) + recvNode.incUsedSize(uint64(snapshotSize)) + a.finished = true + } + + if analysis.GetTransferCounter().IsValid { + analysis.GetTransferCounter().AddTarget(a.regionID, a.peer.StoreId) } } @@ -401,23 +425,39 @@ func (a *addLearner) IsFinished() bool { return a.finished } -func processSnapshot(n *Node, stat *snapshotStat, snapshotSize int64) bool { - // If the statement is true, it will start to send or receive the snapshot. - if stat.remainSize == snapshotSize { - if stat.kind == "sending" { +func processSnapshot(n *Node, stat *snapshotStat) bool { + if stat.status == finished { + return true + } + if stat.status == pending { + if stat.action == generate && n.stats.SendingSnapCount > maxSnapGeneratorPoolSize { + return false + } + if stat.action == receive && n.stats.ReceivingSnapCount > maxSnapReceivePoolSize { + return false + } + stat.status = running + // If the statement is true, it will start to send or Receive the snapshot. + if stat.action == generate { n.stats.SendingSnapCount++ } else { n.stats.ReceivingSnapCount++ } } - stat.remainSize -= n.ioRate - // The sending or receiving process has not finished yet. + + // store should Generate/Receive snapshot by chunk size. + // todo: the process of snapshot is single thread, the later snapshot task must wait the first one. + for n.limiter.AllowN(int(chunkSize)) { + stat.remainSize -= chunkSize + } + + // The sending or receiving process has not status yet. if stat.remainSize > 0 { return false } - if !stat.finished { - stat.finished = true - if stat.kind == "sending" { + if stat.status == running { + stat.status = finished + if stat.action == generate { n.stats.SendingSnapCount-- } else { n.stats.ReceivingSnapCount-- From bc2019e0c4a0eea1a9559f03a2f9de88511f793e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 14 Oct 2022 19:59:52 +0800 Subject: [PATCH 04/67] metrics: add scheduling hot peer load (#5525) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref tikv/pd#5521, fix tikv/pd#5527 Signed-off-by: lhy1024 Co-authored-by: 混沌DM --- pkg/mock/mockcluster/mockcluster.go | 4 ++++ server/cluster/cluster.go | 5 +++++ server/cluster/coordinator.go | 31 ++++++++-------------------- server/cluster/metrics.go | 8 +++++++ server/schedule/cluster.go | 1 + server/schedulers/hot_region.go | 15 ++++++++++++-- server/schedulers/hot_region_test.go | 6 +++--- server/schedulers/metrics.go | 12 ++++++++++- server/statistics/kind.go | 9 ++++++++ server/statistics/store_load.go | 12 ----------- 10 files changed, 63 insertions(+), 40 deletions(-) diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index 99a6b7df54fc..8b38fa50ae6f 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -778,6 +778,10 @@ func (mc *Cluster) AddSuspectRegions(ids ...uint64) { } } +// SetHotPendingInfluenceMetrics mock method +func (mc *Cluster) SetHotPendingInfluenceMetrics(storeLabel, rwTy, dim string, load float64) { +} + // GetBasicCluster mock method func (mc *Cluster) GetBasicCluster() *core.BasicCluster { return mc.BasicCluster diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 75ae2a78ebc9..a29b35fac9b7 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -1797,6 +1797,11 @@ func (c *RaftCluster) deleteStoreLocked(store *core.StoreInfo) error { return nil } +// SetHotPendingInfluenceMetrics sets pending influence in hot scheduler. +func (c *RaftCluster) SetHotPendingInfluenceMetrics(storeLabel, rwTy, dim string, load float64) { + hotPendingSum.WithLabelValues(storeLabel, rwTy, dim).Set(load) +} + func (c *RaftCluster) collectMetrics() { statsMap := statistics.NewStoreStatisticsMap(c.opt, c.storeConfigManager.GetStoreConfig()) stores := c.GetStores() diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index a05eaf7d99e7..581e76557e3c 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -539,8 +539,6 @@ func (c *coordinator) collectHotSpotMetrics() { collectHotMetrics(c.cluster, stores, statistics.Write) // Collects hot read region metrics. collectHotMetrics(c.cluster, stores, statistics.Read) - // Collects pending influence. - collectPendingInfluence(stores) } func collectHotMetrics(cluster *RaftCluster, stores []*core.StoreInfo, typ statistics.RWType) { @@ -563,8 +561,8 @@ func collectHotMetrics(cluster *RaftCluster, stores []*core.StoreInfo, typ stati storeAddress := s.GetAddress() storeID := s.GetID() storeLabel := strconv.FormatUint(storeID, 10) - stat, ok := status.AsLeader[storeID] - if ok { + stat, hasHotLeader := status.AsLeader[storeID] + if hasHotLeader { hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_"+kind+"_bytes_as_leader").Set(stat.TotalBytesRate) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_"+kind+"_keys_as_leader").Set(stat.TotalKeysRate) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_"+kind+"_query_as_leader").Set(stat.TotalQueryRate) @@ -576,8 +574,8 @@ func collectHotMetrics(cluster *RaftCluster, stores []*core.StoreInfo, typ stati hotSpotStatusGauge.DeleteLabelValues(storeAddress, storeLabel, "hot_"+kind+"_region_as_leader") } - stat, ok = status.AsPeer[storeID] - if ok { + stat, hasHotPeer := status.AsPeer[storeID] + if hasHotPeer { hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_"+kind+"_bytes_as_peer").Set(stat.TotalBytesRate) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_"+kind+"_keys_as_peer").Set(stat.TotalKeysRate) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_"+kind+"_query_as_peer").Set(stat.TotalQueryRate) @@ -588,29 +586,18 @@ func collectHotMetrics(cluster *RaftCluster, stores []*core.StoreInfo, typ stati hotSpotStatusGauge.DeleteLabelValues(storeAddress, storeLabel, "total_"+kind+"_query_as_peer") hotSpotStatusGauge.DeleteLabelValues(storeAddress, storeLabel, "hot_"+kind+"_region_as_peer") } - } -} -func collectPendingInfluence(stores []*core.StoreInfo) { - pendings := statistics.GetPendingInfluence(stores) - for _, s := range stores { - storeAddress := s.GetAddress() - storeID := s.GetID() - storeLabel := strconv.FormatUint(storeID, 10) - if infl := pendings[storeID]; infl != nil { - hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "read_pending_influence_byte_rate").Set(infl.Loads[statistics.RegionReadBytes]) - hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "read_pending_influence_key_rate").Set(infl.Loads[statistics.RegionReadKeys]) - hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "read_pending_influence_query_rate").Set(infl.Loads[statistics.RegionReadQueryNum]) - hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "write_pending_influence_byte_rate").Set(infl.Loads[statistics.RegionWriteBytes]) - hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "write_pending_influence_key_rate").Set(infl.Loads[statistics.RegionWriteKeys]) - hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "write_pending_influence_query_rate").Set(infl.Loads[statistics.RegionWriteQueryNum]) - hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "pending_influence_count").Set(infl.Count) + if !hasHotLeader && !hasHotPeer { + statistics.ForeachRegionStats(func(rwTy statistics.RWType, dim int, _ statistics.RegionStatKind) { + hotPendingSum.DeleteLabelValues(storeLabel, rwTy.String(), statistics.DimToString(dim)) + }) } } } func (c *coordinator) resetHotSpotMetrics() { hotSpotStatusGauge.Reset() + hotPendingSum.Reset() } func (c *coordinator) shouldRun() bool { diff --git a/server/cluster/metrics.go b/server/cluster/metrics.go index 8afb441d65ff..8ebafcab46f4 100644 --- a/server/cluster/metrics.go +++ b/server/cluster/metrics.go @@ -57,6 +57,14 @@ var ( Help: "Status of the hotspot.", }, []string{"address", "store", "type"}) + hotPendingSum = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "scheduler", + Name: "hot_pending_sum", + Help: "Pending influence sum of store in hot region scheduler.", + }, []string{"store", "rw", "dim"}) + patrolCheckRegionsGauge = prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: "pd", diff --git a/server/schedule/cluster.go b/server/schedule/cluster.go index 842b88bea98a..567ca9c96ddb 100644 --- a/server/schedule/cluster.go +++ b/server/schedule/cluster.go @@ -35,4 +35,5 @@ type Cluster interface { RemoveScheduler(name string) error AddSuspectRegions(ids ...uint64) + SetHotPendingInfluenceMetrics(storeLabel, rwTy, dim string, load float64) } diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index fb56a3bb22b3..c53dff369ab1 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -182,7 +182,7 @@ func (h *hotScheduler) dispatch(typ statistics.RWType, cluster schedule.Cluster) // each store func (h *hotScheduler) prepareForBalance(typ statistics.RWType, cluster schedule.Cluster) { h.stInfos = statistics.SummaryStoreInfos(cluster.GetStores()) - h.summaryPendingInfluence() + h.summaryPendingInfluence(cluster) storesLoads := cluster.GetStoresLoads() isTraceRegionFlow := cluster.GetOpts().IsTraceRegionFlow() @@ -223,7 +223,7 @@ func (h *hotScheduler) prepareForBalance(typ statistics.RWType, cluster schedule // summaryPendingInfluence calculate the summary of pending Influence for each store // and clean the region from regionInfluence if they have ended operator. // It makes each dim rate or count become `weight` times to the origin value. -func (h *hotScheduler) summaryPendingInfluence() { +func (h *hotScheduler) summaryPendingInfluence(cluster schedule.Cluster) { for id, p := range h.regionPendings { from := h.stInfos[p.from] to := h.stInfos[p.to] @@ -248,6 +248,14 @@ func (h *hotScheduler) summaryPendingInfluence() { to.AddInfluence(&p.origin, weight) } } + for storeID, info := range h.stInfos { + storeLabel := strconv.FormatUint(storeID, 10) + if infl := info.PendingSum; infl != nil { + statistics.ForeachRegionStats(func(rwTy statistics.RWType, dim int, kind statistics.RegionStatKind) { + cluster.SetHotPendingInfluenceMetrics(storeLabel, rwTy.String(), statistics.DimToString(dim), infl.Loads[kind]) + }) + } + } } func (h *hotScheduler) tryAddPendingInfluence(op *operator.Operator, srcStore, dstStore uint64, infl statistics.Influence, maxZombieDur time.Duration) bool { @@ -262,6 +270,9 @@ func (h *hotScheduler) tryAddPendingInfluence(op *operator.Operator, srcStore, d h.regionPendings[regionID] = influence schedulerStatus.WithLabelValues(h.GetName(), "pending_op_infos").Inc() + statistics.ForeachRegionStats(func(rwTy statistics.RWType, dim int, kind statistics.RegionStatKind) { + hotPeerHist.WithLabelValues(h.GetName(), rwTy.String(), statistics.DimToString(dim)).Observe(infl.Loads[kind]) + }) return true } diff --git a/server/schedulers/hot_region_test.go b/server/schedulers/hot_region_test.go index c8a74919f1bc..338be550a2d5 100644 --- a/server/schedulers/hot_region_test.go +++ b/server/schedulers/hot_region_test.go @@ -121,7 +121,7 @@ func TestGCPendingOpInfos(t *testing.T) { } } - hb.summaryPendingInfluence() // Calling this function will GC. + hb.summaryPendingInfluence(tc) // Calling this function will GC. for i := range opInfluenceCreators { for j, typ := range typs { @@ -1781,7 +1781,7 @@ func TestInfluenceByRWType(t *testing.T) { op := ops[0] re.NotNil(op) - hb.(*hotScheduler).summaryPendingInfluence() + hb.(*hotScheduler).summaryPendingInfluence(tc) stInfos := hb.(*hotScheduler).stInfos re.True(nearlyAbout(stInfos[1].PendingSum.Loads[statistics.RegionWriteKeys], -0.5*units.MiB)) re.True(nearlyAbout(stInfos[1].PendingSum.Loads[statistics.RegionWriteBytes], -0.5*units.MiB)) @@ -1806,7 +1806,7 @@ func TestInfluenceByRWType(t *testing.T) { op = ops[0] re.NotNil(op) - hb.(*hotScheduler).summaryPendingInfluence() + hb.(*hotScheduler).summaryPendingInfluence(tc) stInfos = hb.(*hotScheduler).stInfos // assert read/write influence is the sum of write peer and write leader re.True(nearlyAbout(stInfos[1].PendingSum.Loads[statistics.RegionWriteKeys], -1.2*units.MiB)) diff --git a/server/schedulers/metrics.go b/server/schedulers/metrics.go index fa7e347844e8..c4914f671dab 100644 --- a/server/schedulers/metrics.go +++ b/server/schedulers/metrics.go @@ -109,9 +109,18 @@ var hotPendingStatus = prometheus.NewGaugeVec( Namespace: "pd", Subsystem: "scheduler", Name: "hot_pending", - Help: "Counter of direction of balance related schedulers.", + Help: "Pending influence status in hot region scheduler.", }, []string{"type", "source", "target"}) +var hotPeerHist = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "pd", + Subsystem: "scheduler", + Name: "hot_peer", + Help: "Bucketed histogram of the scheduling hot peer.", + Buckets: prometheus.ExponentialBuckets(1, 2, 30), + }, []string{"type", "rw", "dim"}) + func init() { prometheus.MustRegister(schedulerCounter) prometheus.MustRegister(schedulerStatus) @@ -125,4 +134,5 @@ func init() { prometheus.MustRegister(opInfluenceStatus) prometheus.MustRegister(tolerantResourceStatus) prometheus.MustRegister(hotPendingStatus) + prometheus.MustRegister(hotPeerHist) } diff --git a/server/statistics/kind.go b/server/statistics/kind.go index f9cd78a76263..612829379552 100644 --- a/server/statistics/kind.go +++ b/server/statistics/kind.go @@ -208,6 +208,15 @@ func (rw RWType) Inverse() RWType { } } +// ForeachRegionStats foreach all region stats of read and write. +func ForeachRegionStats(f func(RWType, int, RegionStatKind)) { + for _, rwTy := range []RWType{Read, Write} { + for dim, kind := range rwTy.RegionStats() { + f(rwTy, dim, kind) + } + } +} + // GetLoadRatesFromPeer gets the load rates of the read or write type from PeerInfo. func (rw RWType) GetLoadRatesFromPeer(peer *core.PeerInfo) []float64 { deltaLoads := peer.GetLoads() diff --git a/server/statistics/store_load.go b/server/statistics/store_load.go index 0525a52c4475..a7cc723c74a4 100644 --- a/server/statistics/store_load.go +++ b/server/statistics/store_load.go @@ -144,18 +144,6 @@ func (s *StoreSummaryInfo) SetEngineAsTiFlash() { s.isTiFlash = true } -// GetPendingInfluence returns the current pending influence. -func GetPendingInfluence(stores []*core.StoreInfo) map[uint64]*Influence { - stInfos := SummaryStoreInfos(stores) - ret := make(map[uint64]*Influence, len(stInfos)) - for id, info := range stInfos { - if info.PendingSum != nil { - ret[id] = info.PendingSum - } - } - return ret -} - // StoreLoad records the current load. type StoreLoad struct { Loads []float64 From 19aa1c84dd39028d69b86eff901a5506b247c40b Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Mon, 17 Oct 2022 11:07:52 +0800 Subject: [PATCH 05/67] api,config: enable audit-middleware in default config (#5592) ref tikv/pd#4480 Signed-off-by: Cabinfever_B Co-authored-by: Ti Chi Robot --- metrics/grafana/pd.json | 228 ++++++++++++++++- pkg/audit/audit.go | 2 +- pkg/audit/audit_test.go | 7 +- server/api/router.go | 269 +++++++++++---------- server/api/service_middleware_test.go | 2 +- server/config/service_middleware_config.go | 2 +- server/metrics.go | 2 +- tests/server/api/api_test.go | 79 +++--- 8 files changed, 405 insertions(+), 186 deletions(-) diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 38b0fc714056..a690a7842ab6 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -8176,7 +8176,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Completed commands rate", + "title": "gRPC Completed commands rate", "tooltip": { "msResolution": false, "shared": true, @@ -8274,7 +8274,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "99% Completed commands duration", + "title": "gRPC 99% Completed commands duration", "tooltip": { "msResolution": false, "shared": true, @@ -8326,6 +8326,216 @@ "y": 27 }, "id": 124, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": null, + "description": "The rate of completing each kind of HTTP requests", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 119 + }, + "id": 1461, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(pd_service_audit_handling_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\"}[1m])) by (service,ip)", + "intervalFactor": 2, + "legendFormat": "{{service}}-{{ip}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "HTTP Completed commands rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 10, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The time consumed of completing each kind of HTTP request in .99", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 119 + }, + "id": 1462, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(pd_service_audit_handling_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\"}[5m])) by (service, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{service}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "HTTP 99% Completed commands duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "HTTP", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 125, "panels": [ { "aliasColors": {}, @@ -9414,9 +9624,9 @@ "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 29 }, - "id": 125, + "id": 126, "panels": [ { "aliasColors": {}, @@ -9870,9 +10080,9 @@ "h": 1, "w": 24, "x": 0, - "y": 29 + "y": 30 }, - "id": 126, + "id": 127, "panels": [ { "aliasColors": {}, @@ -11059,7 +11269,7 @@ "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 31 }, "id": 1420, "panels": [ @@ -11531,9 +11741,9 @@ "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 32 }, - "id": 127, + "id": 128, "panels": [ { "aliasColors": {}, diff --git a/pkg/audit/audit.go b/pkg/audit/audit.go index a1f0f131126d..5063225cf630 100644 --- a/pkg/audit/audit.go +++ b/pkg/audit/audit.go @@ -98,7 +98,7 @@ func (b *PrometheusHistogramBackend) ProcessHTTPRequest(req *http.Request) bool if !ok { return false } - b.histogramVec.WithLabelValues(requestInfo.ServiceLabel, "HTTP", requestInfo.Component).Observe(float64(endTime - requestInfo.StartTimeStamp)) + b.histogramVec.WithLabelValues(requestInfo.ServiceLabel, "HTTP", requestInfo.Component, requestInfo.IP).Observe(float64(endTime - requestInfo.StartTimeStamp)) return true } diff --git a/pkg/audit/audit_test.go b/pkg/audit/audit_test.go index 2b5bd7a82625..6384d5f6b29b 100644 --- a/pkg/audit/audit_test.go +++ b/pkg/audit/audit_test.go @@ -51,7 +51,7 @@ func TestPrometheusHistogramBackend(t *testing.T) { Name: "audit_handling_seconds_test", Help: "PD server service handling audit", Buckets: prometheus.DefBuckets, - }, []string{"service", "method", "component"}) + }, []string{"service", "method", "component", "ip"}) prometheus.MustRegister(serviceAuditHistogramTest) @@ -63,6 +63,7 @@ func TestPrometheusHistogramBackend(t *testing.T) { info := requestutil.GetRequestInfo(req) info.ServiceLabel = "test" info.Component = "user1" + info.IP = "localhost" req = req.WithContext(requestutil.WithRequestInfo(req.Context(), info)) re.False(backend.ProcessHTTPRequest(req)) @@ -84,8 +85,8 @@ func TestPrometheusHistogramBackend(t *testing.T) { defer resp.Body.Close() content, _ := io.ReadAll(resp.Body) output := string(content) - re.Contains(output, "pd_service_audit_handling_seconds_test_count{component=\"user1\",method=\"HTTP\",service=\"test\"} 2") - re.Contains(output, "pd_service_audit_handling_seconds_test_count{component=\"user2\",method=\"HTTP\",service=\"test\"} 1") + re.Contains(output, "pd_service_audit_handling_seconds_test_count{component=\"user1\",ip=\"localhost\",method=\"HTTP\",service=\"test\"} 2") + re.Contains(output, "pd_service_audit_handling_seconds_test_count{component=\"user2\",ip=\"localhost\",method=\"HTTP\",service=\"test\"} 1") } func TestLocalLogBackendUsingFile(t *testing.T) { diff --git a/server/api/router.go b/server/api/router.go index 972794798315..e98bd86f4b3d 100644 --- a/server/api/router.go +++ b/server/api/router.go @@ -103,8 +103,9 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { } } + // localLog should be used in modifying the configuration or admin operations. localLog := audit.LocalLogLabel - // Please don't use PrometheusHistogram in the hot path. + // prometheus will be used in all API. prometheus := audit.PrometheusHistogram setRateLimitAllowList := func() createRouteOption { @@ -126,105 +127,105 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { escapeRouter := clusterRouter.NewRoute().Subrouter().UseEncodedPath() operatorHandler := newOperatorHandler(handler, rd) - registerFunc(apiRouter, "/operators", operatorHandler.GetOperators, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/operators", operatorHandler.CreateOperator, setMethods(http.MethodPost), setAuditBackend(prometheus)) - registerFunc(apiRouter, "/operators/records", operatorHandler.GetOperatorRecords, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/operators/{region_id}", operatorHandler.GetOperatorsByRegion, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/operators/{region_id}", operatorHandler.DeleteOperatorByRegion, setMethods(http.MethodDelete)) + registerFunc(apiRouter, "/operators", operatorHandler.GetOperators, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/operators", operatorHandler.CreateOperator, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/operators/records", operatorHandler.GetOperatorRecords, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/operators/{region_id}", operatorHandler.GetOperatorsByRegion, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/operators/{region_id}", operatorHandler.DeleteOperatorByRegion, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) checkerHandler := newCheckerHandler(svr, rd) - registerFunc(apiRouter, "/checker/{name}", checkerHandler.PauseOrResumeChecker, setMethods(http.MethodPost)) - registerFunc(apiRouter, "/checker/{name}", checkerHandler.GetCheckerStatus, setMethods(http.MethodGet)) + registerFunc(apiRouter, "/checker/{name}", checkerHandler.PauseOrResumeChecker, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/checker/{name}", checkerHandler.GetCheckerStatus, setMethods(http.MethodGet), setAuditBackend(prometheus)) schedulerHandler := newSchedulerHandler(svr, rd) - registerFunc(apiRouter, "/schedulers", schedulerHandler.GetSchedulers, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/schedulers", schedulerHandler.CreateScheduler, setMethods(http.MethodPost)) - registerFunc(apiRouter, "/schedulers/{name}", schedulerHandler.DeleteScheduler, setMethods(http.MethodDelete)) - registerFunc(apiRouter, "/schedulers/{name}", schedulerHandler.PauseOrResumeScheduler, setMethods(http.MethodPost)) + registerFunc(apiRouter, "/schedulers", schedulerHandler.GetSchedulers, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/schedulers", schedulerHandler.CreateScheduler, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/schedulers/{name}", schedulerHandler.DeleteScheduler, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/schedulers/{name}", schedulerHandler.PauseOrResumeScheduler, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) diagnosticHandler := newDiagnosticHandler(svr, rd) - registerFunc(clusterRouter, "/schedulers/diagnostic/{name}", diagnosticHandler.GetDiagnosticResult, setMethods(http.MethodGet)) + registerFunc(clusterRouter, "/schedulers/diagnostic/{name}", diagnosticHandler.GetDiagnosticResult, setMethods(http.MethodGet), setAuditBackend(prometheus)) schedulerConfigHandler := newSchedulerConfigHandler(svr, rd) - registerPrefix(apiRouter, "/scheduler-config", schedulerConfigHandler.GetSchedulerConfig) + registerPrefix(apiRouter, "/scheduler-config", schedulerConfigHandler.GetSchedulerConfig, setAuditBackend(prometheus)) clusterHandler := newClusterHandler(svr, rd) - registerFunc(apiRouter, "/cluster", clusterHandler.GetCluster, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/cluster/status", clusterHandler.GetClusterStatus) + registerFunc(apiRouter, "/cluster", clusterHandler.GetCluster, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/cluster/status", clusterHandler.GetClusterStatus, setAuditBackend(prometheus)) confHandler := newConfHandler(svr, rd) - registerFunc(apiRouter, "/config", confHandler.GetConfig, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/config", confHandler.SetConfig, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/config/default", confHandler.GetDefaultConfig, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/config/schedule", confHandler.GetScheduleConfig, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/config/schedule", confHandler.SetScheduleConfig, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/config/pd-server", confHandler.GetPDServerConfig, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/config/replicate", confHandler.GetReplicationConfig, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/config/replicate", confHandler.SetReplicationConfig, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/config/label-property", confHandler.GetLabelPropertyConfig, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/config/label-property", confHandler.SetLabelPropertyConfig, setMethods(http.MethodPost)) - registerFunc(apiRouter, "/config/cluster-version", confHandler.GetClusterVersion, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/config/cluster-version", confHandler.SetClusterVersion, setMethods(http.MethodPost)) - registerFunc(apiRouter, "/config/replication-mode", confHandler.GetReplicationModeConfig, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/config/replication-mode", confHandler.SetReplicationModeConfig, setMethods(http.MethodPost)) + registerFunc(apiRouter, "/config", confHandler.GetConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/config", confHandler.SetConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/config/default", confHandler.GetDefaultConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/config/schedule", confHandler.GetScheduleConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/config/schedule", confHandler.SetScheduleConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/config/pd-server", confHandler.GetPDServerConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/config/replicate", confHandler.GetReplicationConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/config/replicate", confHandler.SetReplicationConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/config/label-property", confHandler.GetLabelPropertyConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/config/label-property", confHandler.SetLabelPropertyConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/config/cluster-version", confHandler.GetClusterVersion, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/config/cluster-version", confHandler.SetClusterVersion, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/config/replication-mode", confHandler.GetReplicationModeConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/config/replication-mode", confHandler.SetReplicationModeConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) rulesHandler := newRulesHandler(svr, rd) - registerFunc(clusterRouter, "/config/rules", rulesHandler.GetAllRules, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/rules", rulesHandler.SetAllRules, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/config/rules/batch", rulesHandler.BatchRules, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/config/rules/group/{group}", rulesHandler.GetRuleByGroup, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/rules/region/{region}", rulesHandler.GetRulesByRegion, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/rules/region/{region}/detail", rulesHandler.CheckRegionPlacementRule, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/rules/key/{key}", rulesHandler.GetRulesByKey, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/rule/{group}/{id}", rulesHandler.GetRuleByGroupAndID, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/rule", rulesHandler.SetRule, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/config/rule/{group}/{id}", rulesHandler.DeleteRuleByGroup, setMethods(http.MethodDelete), setAuditBackend(localLog)) - - registerFunc(clusterRouter, "/config/rule_group/{id}", rulesHandler.GetGroupConfig, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/rule_group", rulesHandler.SetGroupConfig, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/config/rule_group/{id}", rulesHandler.DeleteGroupConfig, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/config/rule_groups", rulesHandler.GetAllGroupConfigs, setMethods(http.MethodGet)) - - registerFunc(clusterRouter, "/config/placement-rule", rulesHandler.GetPlacementRules, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/placement-rule", rulesHandler.SetPlacementRules, setMethods(http.MethodPost), setAuditBackend(localLog)) + registerFunc(clusterRouter, "/config/rules", rulesHandler.GetAllRules, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/rules", rulesHandler.SetAllRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/config/rules/batch", rulesHandler.BatchRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/config/rules/group/{group}", rulesHandler.GetRuleByGroup, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/rules/region/{region}", rulesHandler.GetRulesByRegion, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/rules/region/{region}/detail", rulesHandler.CheckRegionPlacementRule, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/rules/key/{key}", rulesHandler.GetRulesByKey, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/rule/{group}/{id}", rulesHandler.GetRuleByGroupAndID, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/rule", rulesHandler.SetRule, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/config/rule/{group}/{id}", rulesHandler.DeleteRuleByGroup, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + + registerFunc(clusterRouter, "/config/rule_group/{id}", rulesHandler.GetGroupConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/rule_group", rulesHandler.SetGroupConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/config/rule_group/{id}", rulesHandler.DeleteGroupConfig, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/config/rule_groups", rulesHandler.GetAllGroupConfigs, setMethods(http.MethodGet), setAuditBackend(prometheus)) + + registerFunc(clusterRouter, "/config/placement-rule", rulesHandler.GetPlacementRules, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/placement-rule", rulesHandler.SetPlacementRules, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) // {group} can be a regular expression, we should enable path encode to // support special characters. - registerFunc(clusterRouter, "/config/placement-rule/{group}", rulesHandler.GetPlacementRuleByGroup, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/placement-rule/{group}", rulesHandler.SetPlacementRuleByGroup, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(escapeRouter, "/config/placement-rule/{group}", rulesHandler.DeletePlacementRuleByGroup, setMethods(http.MethodDelete), setAuditBackend(localLog)) + registerFunc(clusterRouter, "/config/placement-rule/{group}", rulesHandler.GetPlacementRuleByGroup, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/placement-rule/{group}", rulesHandler.SetPlacementRuleByGroup, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(escapeRouter, "/config/placement-rule/{group}", rulesHandler.DeletePlacementRuleByGroup, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) regionLabelHandler := newRegionLabelHandler(svr, rd) - registerFunc(clusterRouter, "/config/region-label/rules", regionLabelHandler.GetAllRegionLabelRules, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/config/region-label/rules/ids", regionLabelHandler.GetRegionLabelRulesByIDs, setMethods(http.MethodGet)) + registerFunc(clusterRouter, "/config/region-label/rules", regionLabelHandler.GetAllRegionLabelRules, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/config/region-label/rules/ids", regionLabelHandler.GetRegionLabelRulesByIDs, setMethods(http.MethodGet), setAuditBackend(prometheus)) // {id} can be a string with special characters, we should enable path encode to support it. - registerFunc(escapeRouter, "/config/region-label/rule/{id}", regionLabelHandler.GetRegionLabelRuleByID, setMethods(http.MethodGet)) - registerFunc(escapeRouter, "/config/region-label/rule/{id}", regionLabelHandler.DeleteRegionLabelRule, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/config/region-label/rule", regionLabelHandler.SetRegionLabelRule, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/config/region-label/rules", regionLabelHandler.PatchRegionLabelRules, setMethods(http.MethodPatch), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/region/id/{id}/label/{key}", regionLabelHandler.GetRegionLabelByKey, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/region/id/{id}/labels", regionLabelHandler.GetRegionLabels, setMethods(http.MethodGet)) + registerFunc(escapeRouter, "/config/region-label/rule/{id}", regionLabelHandler.GetRegionLabelRuleByID, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(escapeRouter, "/config/region-label/rule/{id}", regionLabelHandler.DeleteRegionLabelRule, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/config/region-label/rule", regionLabelHandler.SetRegionLabelRule, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/config/region-label/rules", regionLabelHandler.PatchRegionLabelRules, setMethods(http.MethodPatch), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/region/id/{id}/label/{key}", regionLabelHandler.GetRegionLabelByKey, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/region/id/{id}/labels", regionLabelHandler.GetRegionLabels, setMethods(http.MethodGet), setAuditBackend(prometheus)) storeHandler := newStoreHandler(handler, rd) - registerFunc(clusterRouter, "/store/{id}", storeHandler.GetStore, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/store/{id}", storeHandler.DeleteStore, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/store/{id}/state", storeHandler.SetStoreState, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/store/{id}/label", storeHandler.SetStoreLabel, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/store/{id}/label", storeHandler.DeleteStoreLabel, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/store/{id}/weight", storeHandler.SetStoreWeight, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/store/{id}/limit", storeHandler.SetStoreLimit, setMethods(http.MethodPost), setAuditBackend(localLog)) + registerFunc(clusterRouter, "/store/{id}", storeHandler.GetStore, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/store/{id}", storeHandler.DeleteStore, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/store/{id}/state", storeHandler.SetStoreState, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/store/{id}/label", storeHandler.SetStoreLabel, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/store/{id}/label", storeHandler.DeleteStoreLabel, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/store/{id}/weight", storeHandler.SetStoreWeight, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/store/{id}/limit", storeHandler.SetStoreLimit, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) storesHandler := newStoresHandler(handler, rd) - registerFunc(clusterRouter, "/stores", storesHandler.GetStores, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/stores/remove-tombstone", storesHandler.RemoveTombStone, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/stores/limit", storesHandler.GetAllStoresLimit, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/stores/limit", storesHandler.SetAllStoresLimit, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/stores/limit/scene", storesHandler.SetStoreLimitScene, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/stores/limit/scene", storesHandler.GetStoreLimitScene, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/stores/progress", storesHandler.GetStoresProgress, setMethods(http.MethodGet)) + registerFunc(clusterRouter, "/stores", storesHandler.GetStores, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/stores/remove-tombstone", storesHandler.RemoveTombStone, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/stores/limit", storesHandler.GetAllStoresLimit, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/stores/limit", storesHandler.SetAllStoresLimit, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/stores/limit/scene", storesHandler.SetStoreLimitScene, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/stores/limit/scene", storesHandler.GetStoreLimitScene, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/stores/progress", storesHandler.GetStoresProgress, setMethods(http.MethodGet), setAuditBackend(prometheus)) labelsHandler := newLabelsHandler(svr, rd) - registerFunc(clusterRouter, "/labels", labelsHandler.GetLabels, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/labels/stores", labelsHandler.GetStoresByLabel, setMethods(http.MethodGet)) + registerFunc(clusterRouter, "/labels", labelsHandler.GetLabels, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/labels/stores", labelsHandler.GetStoresByLabel, setMethods(http.MethodGet), setAuditBackend(prometheus)) hotStatusHandler := newHotStatusHandler(handler, rd) registerFunc(apiRouter, "/hotspot/regions/write", hotStatusHandler.GetHotWriteRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) @@ -243,89 +244,89 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { regionsHandler := newRegionsHandler(svr, rd) registerFunc(clusterRouter, "/regions/key", regionsHandler.ScanRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/count", regionsHandler.GetRegionCount, setMethods(http.MethodGet), setAuditBackend(prometheus)) - registerFunc(clusterRouter, "/regions/store/{id}", regionsHandler.GetStoreRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/writeflow", regionsHandler.GetTopWriteFlowRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/readflow", regionsHandler.GetTopReadFlowRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/confver", regionsHandler.GetTopConfVerRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/version", regionsHandler.GetTopVersionRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/size", regionsHandler.GetTopSizeRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/keys", regionsHandler.GetTopKeysRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/miss-peer", regionsHandler.GetMissPeerRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/extra-peer", regionsHandler.GetExtraPeerRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/pending-peer", regionsHandler.GetPendingPeerRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/down-peer", regionsHandler.GetDownPeerRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/learner-peer", regionsHandler.GetLearnerPeerRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/empty-region", regionsHandler.GetEmptyRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/offline-peer", regionsHandler.GetOfflinePeerRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/oversized-region", regionsHandler.GetOverSizedRegions, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/undersized-region", regionsHandler.GetUndersizedRegions, setMethods(http.MethodGet)) - - registerFunc(clusterRouter, "/regions/check/hist-size", regionsHandler.GetSizeHistogram, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/check/hist-keys", regionsHandler.GetKeysHistogram, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/sibling/{id}", regionsHandler.GetRegionSiblings, setMethods(http.MethodGet)) + registerFunc(clusterRouter, "/regions/store/{id}", regionsHandler.GetStoreRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/writeflow", regionsHandler.GetTopWriteFlowRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/readflow", regionsHandler.GetTopReadFlowRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/confver", regionsHandler.GetTopConfVerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/version", regionsHandler.GetTopVersionRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/size", regionsHandler.GetTopSizeRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/keys", regionsHandler.GetTopKeysRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/miss-peer", regionsHandler.GetMissPeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/extra-peer", regionsHandler.GetExtraPeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/pending-peer", regionsHandler.GetPendingPeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/down-peer", regionsHandler.GetDownPeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/learner-peer", regionsHandler.GetLearnerPeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/empty-region", regionsHandler.GetEmptyRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/offline-peer", regionsHandler.GetOfflinePeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/oversized-region", regionsHandler.GetOverSizedRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/undersized-region", regionsHandler.GetUndersizedRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + + registerFunc(clusterRouter, "/regions/check/hist-size", regionsHandler.GetSizeHistogram, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/check/hist-keys", regionsHandler.GetKeysHistogram, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/sibling/{id}", regionsHandler.GetRegionSiblings, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/accelerate-schedule", regionsHandler.AccelerateRegionsScheduleInRange, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) registerFunc(clusterRouter, "/regions/scatter", regionsHandler.ScatterRegions, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) - registerFunc(clusterRouter, "/regions/split", regionsHandler.SplitRegions, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/regions/range-holes", regionsHandler.GetRangeHoles, setMethods(http.MethodGet)) - registerFunc(clusterRouter, "/regions/replicated", regionsHandler.CheckRegionsReplicated, setMethods(http.MethodGet), setQueries("startKey", "{startKey}", "endKey", "{endKey}")) + registerFunc(clusterRouter, "/regions/split", regionsHandler.SplitRegions, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/regions/range-holes", regionsHandler.GetRangeHoles, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/replicated", regionsHandler.CheckRegionsReplicated, setMethods(http.MethodGet), setQueries("startKey", "{startKey}", "endKey", "{endKey}"), setAuditBackend(prometheus)) - registerFunc(apiRouter, "/version", newVersionHandler(rd).GetVersion, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/status", newStatusHandler(svr, rd).GetPDStatus, setMethods(http.MethodGet)) + registerFunc(apiRouter, "/version", newVersionHandler(rd).GetVersion, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/status", newStatusHandler(svr, rd).GetPDStatus, setMethods(http.MethodGet), setAuditBackend(prometheus)) memberHandler := newMemberHandler(svr, rd) - registerFunc(apiRouter, "/members", memberHandler.GetMembers, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/members/name/{name}", memberHandler.DeleteMemberByName, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(apiRouter, "/members/id/{id}", memberHandler.DeleteMemberByID, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(apiRouter, "/members/name/{name}", memberHandler.SetMemberPropertyByName, setMethods(http.MethodPost), setAuditBackend(localLog)) + registerFunc(apiRouter, "/members", memberHandler.GetMembers, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/members/name/{name}", memberHandler.DeleteMemberByName, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/members/id/{id}", memberHandler.DeleteMemberByID, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/members/name/{name}", memberHandler.SetMemberPropertyByName, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) leaderHandler := newLeaderHandler(svr, rd) - registerFunc(apiRouter, "/leader", leaderHandler.GetLeader, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/leader/resign", leaderHandler.ResignLeader, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/leader/transfer/{next_leader}", leaderHandler.TransferLeader, setMethods(http.MethodPost), setAuditBackend(localLog)) + registerFunc(apiRouter, "/leader", leaderHandler.GetLeader, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/leader/resign", leaderHandler.ResignLeader, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/leader/transfer/{next_leader}", leaderHandler.TransferLeader, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) statsHandler := newStatsHandler(svr, rd) - registerFunc(clusterRouter, "/stats/region", statsHandler.GetRegionStatus, setMethods(http.MethodGet)) + registerFunc(clusterRouter, "/stats/region", statsHandler.GetRegionStatus, setMethods(http.MethodGet), setAuditBackend(prometheus)) trendHandler := newTrendHandler(svr, rd) registerFunc(apiRouter, "/trend", trendHandler.GetTrend, setMethods(http.MethodGet), setAuditBackend(prometheus)) adminHandler := newAdminHandler(svr, rd) - registerFunc(clusterRouter, "/admin/cache/region/{id}", adminHandler.DeleteRegionCache, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(clusterRouter, "/admin/cache/regions", adminHandler.DeleteAllRegionCache, setMethods(http.MethodDelete), setAuditBackend(localLog)) + registerFunc(clusterRouter, "/admin/cache/region/{id}", adminHandler.DeleteRegionCache, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/admin/cache/regions", adminHandler.DeleteAllRegionCache, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) // br ebs restore phase 1 will reset ts, but at that time the cluster hasn't bootstrapped, so cannot use clusterRouter - registerFunc(apiRouter, "/admin/reset-ts", adminHandler.ResetTS, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/admin/persist-file/{file_name}", adminHandler.SavePersistFile, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/admin/persist-file/{file_name}", adminHandler.SavePersistFile, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/admin/cluster/markers/snapshot-recovering", adminHandler.IsSnapshotRecovering, setMethods(http.MethodGet), setAuditBackend(localLog)) - registerFunc(apiRouter, "/admin/cluster/markers/snapshot-recovering", adminHandler.MarkSnapshotRecovering, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/admin/cluster/markers/snapshot-recovering", adminHandler.UnmarkSnapshotRecovering, setMethods(http.MethodDelete), setAuditBackend(localLog)) - registerFunc(apiRouter, "/admin/base-alloc-id", adminHandler.RecoverAllocID, setMethods(http.MethodPost), setAuditBackend(localLog)) + registerFunc(apiRouter, "/admin/reset-ts", adminHandler.ResetTS, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/admin/persist-file/{file_name}", adminHandler.SavePersistFile, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/admin/persist-file/{file_name}", adminHandler.SavePersistFile, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/admin/cluster/markers/snapshot-recovering", adminHandler.IsSnapshotRecovering, setMethods(http.MethodGet), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/admin/cluster/markers/snapshot-recovering", adminHandler.MarkSnapshotRecovering, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/admin/cluster/markers/snapshot-recovering", adminHandler.UnmarkSnapshotRecovering, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/admin/base-alloc-id", adminHandler.RecoverAllocID, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) serviceMiddlewareHandler := newServiceMiddlewareHandler(svr, rd) - registerFunc(apiRouter, "/service-middleware/config", serviceMiddlewareHandler.GetServiceMiddlewareConfig, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/service-middleware/config", serviceMiddlewareHandler.SetServiceMiddlewareConfig, setMethods(http.MethodPost), setAuditBackend(localLog)) - registerFunc(apiRouter, "/service-middleware/config/rate-limit", serviceMiddlewareHandler.SetRatelimitConfig, setMethods(http.MethodPost), setAuditBackend(localLog), setRateLimitAllowList()) + registerFunc(apiRouter, "/service-middleware/config", serviceMiddlewareHandler.GetServiceMiddlewareConfig, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/service-middleware/config", serviceMiddlewareHandler.SetServiceMiddlewareConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(apiRouter, "/service-middleware/config/rate-limit", serviceMiddlewareHandler.SetRatelimitConfig, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus), setRateLimitAllowList()) logHandler := newLogHandler(svr, rd) - registerFunc(apiRouter, "/admin/log", logHandler.SetLogLevel, setMethods(http.MethodPost), setAuditBackend(localLog)) + registerFunc(apiRouter, "/admin/log", logHandler.SetLogLevel, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) replicationModeHandler := newReplicationModeHandler(svr, rd) - registerFunc(clusterRouter, "/replication_mode/status", replicationModeHandler.GetReplicationModeStatus) + registerFunc(clusterRouter, "/replication_mode/status", replicationModeHandler.GetReplicationModeStatus, setAuditBackend(prometheus)) pluginHandler := newPluginHandler(handler, rd) - registerFunc(apiRouter, "/plugin", pluginHandler.LoadPlugin, setMethods(http.MethodPost)) - registerFunc(apiRouter, "/plugin", pluginHandler.UnloadPlugin, setMethods(http.MethodDelete)) + registerFunc(apiRouter, "/plugin", pluginHandler.LoadPlugin, setMethods(http.MethodPost), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/plugin", pluginHandler.UnloadPlugin, setMethods(http.MethodDelete), setAuditBackend(prometheus)) healthHandler := newHealthHandler(svr, rd) - registerFunc(apiRouter, "/health", healthHandler.GetHealthStatus, setMethods(http.MethodGet)) - registerFunc(apiRouter, "/ping", healthHandler.Ping, setMethods(http.MethodGet)) + registerFunc(apiRouter, "/health", healthHandler.GetHealthStatus, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/ping", healthHandler.Ping, setMethods(http.MethodGet), setAuditBackend(prometheus)) // metric query use to query metric data, the protocol is compatible with prometheus. - registerFunc(apiRouter, "/metric/query", newQueryMetric(svr).QueryMetric, setMethods(http.MethodGet, http.MethodPost)) - registerFunc(apiRouter, "/metric/query_range", newQueryMetric(svr).QueryMetric, setMethods(http.MethodGet, http.MethodPost)) + registerFunc(apiRouter, "/metric/query", newQueryMetric(svr).QueryMetric, setMethods(http.MethodGet, http.MethodPost), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/metric/query_range", newQueryMetric(svr).QueryMetric, setMethods(http.MethodGet, http.MethodPost), setAuditBackend(prometheus)) // tso API tsoHandler := newTSOHandler(svr, rd) - registerFunc(apiRouter, "/tso/allocator/transfer/{name}", tsoHandler.TransferLocalTSOAllocator, setMethods(http.MethodPost), setAuditBackend(localLog)) + registerFunc(apiRouter, "/tso/allocator/transfer/{name}", tsoHandler.TransferLocalTSOAllocator, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) pprofHandler := newPprofHandler(svr, rd) // profile API @@ -342,19 +343,19 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { // service GC safepoint API serviceGCSafepointHandler := newServiceGCSafepointHandler(svr, rd) - registerFunc(apiRouter, "/gc/safepoint", serviceGCSafepointHandler.GetGCSafePoint, setMethods(http.MethodGet), setAuditBackend(localLog)) - registerFunc(apiRouter, "/gc/safepoint/{service_id}", serviceGCSafepointHandler.DeleteGCSafePoint, setMethods(http.MethodDelete), setAuditBackend(localLog)) + registerFunc(apiRouter, "/gc/safepoint", serviceGCSafepointHandler.GetGCSafePoint, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(apiRouter, "/gc/safepoint/{service_id}", serviceGCSafepointHandler.DeleteGCSafePoint, setMethods(http.MethodDelete), setAuditBackend(localLog, prometheus)) // min resolved ts API minResolvedTSHandler := newMinResolvedTSHandler(svr, rd) - registerFunc(clusterRouter, "/min-resolved-ts", minResolvedTSHandler.GetMinResolvedTS, setMethods(http.MethodGet)) + registerFunc(clusterRouter, "/min-resolved-ts", minResolvedTSHandler.GetMinResolvedTS, setMethods(http.MethodGet), setAuditBackend(prometheus)) // unsafe admin operation API unsafeOperationHandler := newUnsafeOperationHandler(svr, rd) registerFunc(clusterRouter, "/admin/unsafe/remove-failed-stores", - unsafeOperationHandler.RemoveFailedStores, setMethods(http.MethodPost)) + unsafeOperationHandler.RemoveFailedStores, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) registerFunc(clusterRouter, "/admin/unsafe/remove-failed-stores/show", - unsafeOperationHandler.GetFailedStoresRemovalStatus, setMethods(http.MethodGet)) + unsafeOperationHandler.GetFailedStoresRemovalStatus, setMethods(http.MethodGet), setAuditBackend(prometheus)) // API to set or unset failpoints failpoint.Inject("enableFailpointAPI", func() { diff --git a/server/api/service_middleware_test.go b/server/api/service_middleware_test.go index 9b2ec7d87461..0c037c2cc665 100644 --- a/server/api/service_middleware_test.go +++ b/server/api/service_middleware_test.go @@ -59,7 +59,7 @@ func (suite *auditMiddlewareTestSuite) TestConfigAuditSwitch() { sc := &config.ServiceMiddlewareConfig{} re := suite.Require() suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, sc)) - suite.False(sc.EnableAudit) + suite.True(sc.EnableAudit) ms := map[string]interface{}{ "enable-audit": "true", diff --git a/server/config/service_middleware_config.go b/server/config/service_middleware_config.go index 38f51fce3fdf..3d748de1c65b 100644 --- a/server/config/service_middleware_config.go +++ b/server/config/service_middleware_config.go @@ -17,7 +17,7 @@ package config import "github.com/tikv/pd/pkg/ratelimit" const ( - defaultEnableAuditMiddleware = false + defaultEnableAuditMiddleware = true defaultEnableRateLimitMiddleware = false ) diff --git a/server/metrics.go b/server/metrics.go index e44c04ce9e0b..4a3efaeebbcd 100644 --- a/server/metrics.go +++ b/server/metrics.go @@ -142,7 +142,7 @@ var ( Name: "audit_handling_seconds", Help: "PD server service handling audit", Buckets: prometheus.DefBuckets, - }, []string{"service", "method", "component"}) + }, []string{"service", "method", "component", "ip"}) ) func init() { diff --git a/tests/server/api/api_test.go b/tests/server/api/api_test.go index e65711a5b2d1..7b36618e62ca 100644 --- a/tests/server/api/api_test.go +++ b/tests/server/api/api_test.go @@ -207,42 +207,12 @@ func BenchmarkDoRequestWithServiceMiddleware(b *testing.B) { resp.Body.Close() b.StartTimer() for i := 0; i < b.N; i++ { - doTestRequest(leader) + doTestRequestWithLogAudit(leader) } cancel() cluster.Destroy() } -func BenchmarkDoRequestWithoutServiceMiddleware(b *testing.B) { - b.StopTimer() - ctx, cancel := context.WithCancel(context.Background()) - cluster, _ := tests.NewTestCluster(ctx, 1) - cluster.RunInitialServers() - cluster.WaitLeader() - leader := cluster.GetServer(cluster.GetLeader()) - input := map[string]interface{}{ - "enable-audit": "false", - } - data, _ := json.Marshal(input) - req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, _ := dialClient.Do(req) - resp.Body.Close() - b.StartTimer() - for i := 0; i < b.N; i++ { - doTestRequest(leader) - } - cancel() - cluster.Destroy() -} - -func doTestRequest(srv *tests.TestServer) { - timeUnix := time.Now().Unix() - 20 - req, _ := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", srv.GetAddr(), timeUnix), nil) - req.Header.Set("component", "test") - resp, _ := dialClient.Do(req) - resp.Body.Close() -} - func (suite *middlewareTestSuite) TestRateLimitMiddleware() { leader := suite.cluster.GetServer(suite.cluster.GetLeader()) input := map[string]interface{}{ @@ -433,7 +403,7 @@ func (suite *middlewareTestSuite) TestAuditPrometheusBackend() { defer resp.Body.Close() content, _ := io.ReadAll(resp.Body) output := string(content) - suite.Contains(output, "pd_service_audit_handling_seconds_count{component=\"anonymous\",method=\"HTTP\",service=\"GetTrend\"} 1") + suite.Contains(output, "pd_service_audit_handling_seconds_count{component=\"anonymous\",ip=\"127.0.0.1\",method=\"HTTP\",service=\"GetTrend\"} 1") // resign to test persist config oldLeaderName := leader.GetServer().Name() @@ -459,7 +429,7 @@ func (suite *middlewareTestSuite) TestAuditPrometheusBackend() { defer resp.Body.Close() content, _ = io.ReadAll(resp.Body) output = string(content) - suite.Contains(output, "pd_service_audit_handling_seconds_count{component=\"anonymous\",method=\"HTTP\",service=\"GetTrend\"} 2") + suite.Contains(output, "pd_service_audit_handling_seconds_count{component=\"anonymous\",ip=\"127.0.0.1\",method=\"HTTP\",service=\"GetTrend\"} 2") input = map[string]interface{}{ "enable-audit": "false", @@ -521,13 +491,35 @@ func BenchmarkDoRequestWithLocalLogAudit(b *testing.B) { resp.Body.Close() b.StartTimer() for i := 0; i < b.N; i++ { - doTestRequest(leader) + doTestRequestWithLogAudit(leader) } cancel() cluster.Destroy() } -func BenchmarkDoRequestWithoutLocalLogAudit(b *testing.B) { +func BenchmarkDoRequestWithPrometheusAudit(b *testing.B) { + b.StopTimer() + ctx, cancel := context.WithCancel(context.Background()) + cluster, _ := tests.NewTestCluster(ctx, 1) + cluster.RunInitialServers() + cluster.WaitLeader() + leader := cluster.GetServer(cluster.GetLeader()) + input := map[string]interface{}{ + "enable-audit": "true", + } + data, _ := json.Marshal(input) + req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) + resp, _ := dialClient.Do(req) + resp.Body.Close() + b.StartTimer() + for i := 0; i < b.N; i++ { + doTestRequestWithPrometheus(leader) + } + cancel() + cluster.Destroy() +} + +func BenchmarkDoRequestWithoutServiceMiddleware(b *testing.B) { b.StopTimer() ctx, cancel := context.WithCancel(context.Background()) cluster, _ := tests.NewTestCluster(ctx, 1) @@ -543,12 +535,27 @@ func BenchmarkDoRequestWithoutLocalLogAudit(b *testing.B) { resp.Body.Close() b.StartTimer() for i := 0; i < b.N; i++ { - doTestRequest(leader) + doTestRequestWithLogAudit(leader) } cancel() cluster.Destroy() } +func doTestRequestWithLogAudit(srv *tests.TestServer) { + req, _ := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s/pd/api/v1/admin/cache/regions", srv.GetAddr()), nil) + req.Header.Set("component", "test") + resp, _ := dialClient.Do(req) + resp.Body.Close() +} + +func doTestRequestWithPrometheus(srv *tests.TestServer) { + timeUnix := time.Now().Unix() - 20 + req, _ := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/pd/api/v1/trend?from=%d", srv.GetAddr(), timeUnix), nil) + req.Header.Set("component", "test") + resp, _ := dialClient.Do(req) + resp.Body.Close() +} + type redirectorTestSuite struct { suite.Suite cleanup func() From 44e07e22c3b23590b790263870aa92069217f9a6 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 18 Oct 2022 11:17:53 +0800 Subject: [PATCH 06/67] scheduler: decreate the repeat filter in scheduler. (#5544) close tikv/pd#5539 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- metrics/grafana/pd.json | 2 +- server/api/diagnostic_test.go | 9 +-- server/schedule/filter/filters.go | 61 ++++++++----------- server/schedulers/balance_leader.go | 1 + server/schedulers/balance_region.go | 46 +++++++++------ server/schedulers/balance_test.go | 14 ++++- server/schedulers/metrics.go | 2 +- server/schedulers/utils.go | 91 +++++++++++++++++++---------- 8 files changed, 129 insertions(+), 97 deletions(-) diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index a690a7842ab6..39a7a6ab1a92 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -3941,7 +3941,7 @@ "format": "time_series", "hide": true, "intervalFactor": 1, - "legendFormat": "tolerant-resource-{{source}}-{{target}}", + "legendFormat": "tolerant-resource", "refId": "E" } ], diff --git a/server/api/diagnostic_test.go b/server/api/diagnostic_test.go index 101ab92b1478..5709edc1f8ad 100644 --- a/server/api/diagnostic_test.go +++ b/server/api/diagnostic_test.go @@ -56,6 +56,7 @@ func (suite *diagnosticTestSuite) SetupSuite() { mustBootstrapCluster(re, suite.svr) mustPutStore(re, suite.svr, 1, metapb.StoreState_Up, metapb.NodeState_Serving, nil) + mustPutStore(re, suite.svr, 2, metapb.StoreState_Up, metapb.NodeState_Serving, nil) } func (suite *diagnosticTestSuite) TearDownSuite() { @@ -99,7 +100,7 @@ func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex, body, tu.StatusOK(suite.Require())) suite.NoError(err) - time.Sleep(time.Millisecond * 50) + time.Sleep(time.Millisecond * 100) result = &cluster.DiagnosticResult{} err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) suite.NoError(err) @@ -111,7 +112,7 @@ func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { suite.NoError(err) err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex+"/"+schedulers.BalanceRegionName, pauseArgs, tu.StatusOK(re)) suite.NoError(err) - time.Sleep(time.Millisecond * 50) + time.Sleep(time.Millisecond * 100) result = &cluster.DiagnosticResult{} err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) suite.NoError(err) @@ -122,14 +123,14 @@ func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { suite.NoError(err) err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex+"/"+schedulers.BalanceRegionName, pauseArgs, tu.StatusOK(re)) suite.NoError(err) - time.Sleep(time.Millisecond * 50) + time.Sleep(time.Millisecond * 100) result = &cluster.DiagnosticResult{} err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) suite.NoError(err) suite.Equal("pending", result.Status) mustPutRegion(re, suite.svr, 1000, 1, []byte("a"), []byte("b"), core.SetApproximateSize(60)) - time.Sleep(time.Millisecond * 50) + time.Sleep(time.Millisecond * 100) result = &cluster.DiagnosticResult{} err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) suite.NoError(err) diff --git a/server/schedule/filter/filters.go b/server/schedule/filter/filters.go index d8b010890932..645fa99d9735 100644 --- a/server/schedule/filter/filters.go +++ b/server/schedule/filter/filters.go @@ -51,6 +51,29 @@ func SelectSourceStores(stores []*core.StoreInfo, filters []Filter, opt *config. }) } +// SelectUnavailableTargetStores selects unavailable stores that can't be selected as target store from the list. +func SelectUnavailableTargetStores(stores []*core.StoreInfo, filters []Filter, opt *config.PersistOptions, collector *plan.Collector) []*core.StoreInfo { + return filterStoresBy(stores, func(s *core.StoreInfo) bool { + targetID := strconv.FormatUint(s.GetID(), 10) + return slice.AnyOf(filters, func(i int) bool { + status := filters[i].Target(opt, s) + if !status.IsOK() { + cfilter, ok := filters[i].(comparingFilter) + sourceID := "" + if ok { + sourceID = strconv.FormatUint(cfilter.GetSourceStoreID(), 10) + } + filterCounter.WithLabelValues(filterTarget, filters[i].Scope(), filters[i].Type(), targetID, sourceID).Inc() + if collector != nil { + collector.Collect(plan.SetResourceWithStep(s, 2), plan.SetStatus(status)) + } + return true + } + return false + }) + }) +} + // SelectTargetStores selects stores that be selected as target store from the list. func SelectTargetStores(stores []*core.StoreInfo, filters []Filter, opt *config.PersistOptions, collector *plan.Collector) []*core.StoreInfo { return filterStoresBy(stores, func(s *core.StoreInfo) bool { @@ -829,41 +852,3 @@ func createRegionForRuleFit(startKey, endKey []byte, }, copyLeader, opts...) return cloneRegion } - -// RegionScoreFilter filter target store that it's score must higher than the given score -type RegionScoreFilter struct { - scope string - score float64 -} - -// NewRegionScoreFilter creates a Filter that filters all high score stores. -func NewRegionScoreFilter(scope string, source *core.StoreInfo, opt *config.PersistOptions) Filter { - return &RegionScoreFilter{ - scope: scope, - score: source.RegionScore(opt.GetRegionScoreFormulaVersion(), opt.GetHighSpaceRatio(), opt.GetLowSpaceRatio(), 0), - } -} - -// Scope scopes only for balance region -func (f *RegionScoreFilter) Scope() string { - return f.scope -} - -// Type types region score filter -func (f *RegionScoreFilter) Type() string { - return "region-score-filter" -} - -// Source ignore source -func (f *RegionScoreFilter) Source(opt *config.PersistOptions, _ *core.StoreInfo) *plan.Status { - return statusOK -} - -// Target return true if target's score less than source's score -func (f *RegionScoreFilter) Target(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { - score := store.RegionScore(opt.GetRegionScoreFormulaVersion(), opt.GetHighSpaceRatio(), opt.GetLowSpaceRatio(), 0) - if score < f.score { - return statusOK - } - return statusStoreScoreDisallowed -} diff --git a/server/schedulers/balance_leader.go b/server/schedulers/balance_leader.go index 7490b8b9ba35..4cb542073ee2 100644 --- a/server/schedulers/balance_leader.go +++ b/server/schedulers/balance_leader.go @@ -540,6 +540,7 @@ func (l *balanceLeaderScheduler) transferLeaderIn(solver *solver, collector *pla func (l *balanceLeaderScheduler) createOperator(solver *solver, collector *plan.Collector) *operator.Operator { solver.step++ defer func() { solver.step-- }() + solver.sourceScore, solver.targetScore = solver.sourceStoreScore(l.GetName()), solver.targetStoreScore(l.GetName()) if !solver.shouldBalance(l.GetName()) { schedulerCounter.WithLabelValues(l.GetName(), "skip").Inc() if collector != nil { diff --git a/server/schedulers/balance_region.go b/server/schedulers/balance_region.go index ecaccc9fb09e..fc211ffe3f0b 100644 --- a/server/schedulers/balance_region.go +++ b/server/schedulers/balance_region.go @@ -146,17 +146,18 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() stores := cluster.GetStores() opts := cluster.GetOpts() - stores = filter.SelectSourceStores(stores, s.filters, opts, collector) + faultTargets := filter.SelectUnavailableTargetStores(stores, s.filters, opts, collector) + sourceStores := filter.SelectSourceStores(stores, s.filters, opts, collector) opInfluence := s.opController.GetOpInfluence(cluster) s.OpController.GetFastOpInfluence(cluster, opInfluence) kind := core.NewScheduleKind(core.RegionKind, core.BySize) solver := newSolver(basePlan, kind, cluster, opInfluence) - sort.Slice(stores, func(i, j int) bool { - iOp := solver.GetOpInfluence(stores[i].GetID()) - jOp := solver.GetOpInfluence(stores[j].GetID()) - return stores[i].RegionScore(opts.GetRegionScoreFormulaVersion(), opts.GetHighSpaceRatio(), opts.GetLowSpaceRatio(), iOp) > - stores[j].RegionScore(opts.GetRegionScoreFormulaVersion(), opts.GetHighSpaceRatio(), opts.GetLowSpaceRatio(), jOp) + sort.Slice(sourceStores, func(i, j int) bool { + iOp := solver.GetOpInfluence(sourceStores[i].GetID()) + jOp := solver.GetOpInfluence(sourceStores[j].GetID()) + return sourceStores[i].RegionScore(opts.GetRegionScoreFormulaVersion(), opts.GetHighSpaceRatio(), opts.GetLowSpaceRatio(), iOp) > + sourceStores[j].RegionScore(opts.GetRegionScoreFormulaVersion(), opts.GetHighSpaceRatio(), opts.GetLowSpaceRatio(), jOp) }) pendingFilter := filter.NewRegionPendingFilter() @@ -171,8 +172,15 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) } solver.step++ - for _, solver.source = range stores { + var sourceIndex int + + // sourcesStore is sorted by region score desc, so we pick the first store as source store. + for sourceIndex, solver.source = range sourceStores { retryLimit := s.retryQuota.GetLimit(solver.source) + solver.sourceScore = solver.sourceStoreScore(s.GetName()) + if sourceIndex == len(sourceStores)-1 { + break + } for i := 0; i < retryLimit; i++ { schedulerCounter.WithLabelValues(s.GetName(), "total").Inc() // Priority pick the region that has a pending peer. @@ -218,7 +226,7 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) continue } solver.step++ - if op := s.transferPeer(solver, collector); op != nil { + if op := s.transferPeer(solver, collector, sourceStores[sourceIndex+1:], faultTargets); op != nil { s.retryQuota.ResetLimit(solver.source) op.Counters = append(op.Counters, schedulerCounter.WithLabelValues(s.GetName(), "new-operator")) return []*operator.Operator{op}, collector.GetPlans() @@ -232,25 +240,27 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) } // transferPeer selects the best store to create a new peer to replace the old peer. -func (s *balanceRegionScheduler) transferPeer(solver *solver, collector *plan.Collector) *operator.Operator { +func (s *balanceRegionScheduler) transferPeer(solver *solver, collector *plan.Collector, dstStores []*core.StoreInfo, faultStores []*core.StoreInfo) *operator.Operator { + excludeTargets := solver.region.GetStoreIDs() + for _, store := range faultStores { + excludeTargets[store.GetID()] = struct{}{} + } // the order of the filters should be sorted by the cost of the cpu overhead. // the more expensive the filter is, the later it should be placed. filters := []filter.Filter{ - filter.NewExcludedFilter(s.GetName(), nil, solver.region.GetStoreIDs()), - filter.NewSpecialUseFilter(s.GetName()), - &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true}, - filter.NewRegionScoreFilter(s.GetName(), solver.source, solver.GetOpts()), + filter.NewExcludedFilter(s.GetName(), nil, excludeTargets), filter.NewPlacementSafeguard(s.GetName(), solver.GetOpts(), solver.GetBasicCluster(), solver.GetRuleManager(), solver.region, solver.source), } - candidates := filter.NewCandidates(solver.GetStores()). - FilterTarget(solver.GetOpts(), collector, filters...). - Sort(filter.RegionScoreComparer(solver.GetOpts())) - + candidates := filter.NewCandidates(dstStores).FilterTarget(solver.GetOpts(), collector, filters...) if len(candidates.Stores) != 0 { solver.step++ } - for _, solver.target = range candidates.Stores { + + // candidates are sorted by region score desc, so we pick the last store as target store. + for i := range candidates.Stores { + solver.target = candidates.Stores[len(candidates.Stores)-i-1] + solver.targetScore = solver.targetStoreScore(s.GetName()) regionID := solver.region.GetID() sourceID := solver.source.GetID() targetID := solver.target.GetID() diff --git a/server/schedulers/balance_test.go b/server/schedulers/balance_test.go index 0b7a72f93e2e..84499fbf3e17 100644 --- a/server/schedulers/balance_test.go +++ b/server/schedulers/balance_test.go @@ -71,12 +71,14 @@ func TestInfluenceAmp(t *testing.T) { basePlan := NewBalanceSchedulerPlan() solver := newSolver(basePlan, kind, tc, influence) solver.source, solver.target, solver.region = tc.GetStore(1), tc.GetStore(2), tc.GetRegion(1) + solver.sourceScore, solver.targetScore = solver.sourceStoreScore(""), solver.targetStoreScore("") re.True(solver.shouldBalance("")) // It will not schedule if the diff region count is greater than the sum // of TolerantSizeRatio and influenceAmp*2. tc.AddRegionStore(1, int(100+influenceAmp+2)) solver.source = tc.GetStore(1) + solver.sourceScore, solver.targetScore = solver.sourceStoreScore(""), solver.targetStoreScore("") re.False(solver.shouldBalance("")) re.Less(solver.sourceScore-solver.targetScore, float64(1)) } @@ -157,6 +159,7 @@ func TestShouldBalance(t *testing.T) { basePlan := NewBalanceSchedulerPlan() solver := newSolver(basePlan, kind, tc, oc.GetOpInfluence(tc)) solver.source, solver.target, solver.region = tc.GetStore(1), tc.GetStore(2), tc.GetRegion(1) + solver.sourceScore, solver.targetScore = solver.sourceStoreScore(""), solver.targetStoreScore("") re.Equal(testCase.expectedResult, solver.shouldBalance("")) } @@ -170,6 +173,7 @@ func TestShouldBalance(t *testing.T) { basePlan := NewBalanceSchedulerPlan() solver := newSolver(basePlan, kind, tc, oc.GetOpInfluence(tc)) solver.source, solver.target, solver.region = tc.GetStore(1), tc.GetStore(2), tc.GetRegion(1) + solver.sourceScore, solver.targetScore = solver.sourceStoreScore(""), solver.targetStoreScore("") re.Equal(testCase.expectedResult, solver.shouldBalance("")) } } @@ -184,7 +188,7 @@ func TestTolerantRatio(t *testing.T) { tc := mockcluster.NewCluster(ctx, opt) // create a region to control average region size. re.NotNil(tc.AddLeaderRegion(1, 1, 2)) - regionSize := int64(96 * units.MiB) + regionSize := int64(96) region := tc.GetRegion(1).Clone(core.SetApproximateSize(regionSize)) tbl := []struct { @@ -222,7 +226,10 @@ func TestTolerantRatio(t *testing.T) { basePlan := NewBalanceSchedulerPlan() solver := newSolver(basePlan, t.kind, tc, operator.OpInfluence{}) solver.region = region - re.Equal(t.expectTolerantResource(t.kind), solver.getTolerantResource()) + + sourceScore := t.expectTolerantResource(t.kind) + targetScore := solver.getTolerantResource() + re.Equal(sourceScore, targetScore) } } @@ -776,7 +783,7 @@ func TestBalanceRegionSchedule1(t *testing.T) { ops, _ = sb.Schedule(tc, false) op = ops[0] testutil.CheckTransferPeerWithLeaderTransfer(re, op, operator.OpKind(0), 4, 2) - + tc.SetStoreUp(1) // test region replicate not match opt.SetMaxReplicas(3) ops, plans := sb.Schedule(tc, true) @@ -784,6 +791,7 @@ func TestBalanceRegionSchedule1(t *testing.T) { re.Empty(ops) re.Equal(int(plans[0].GetStatus().StatusCode), plan.StatusRegionNotReplicated) + tc.SetStoreOffline(1) opt.SetMaxReplicas(1) ops, plans = sb.Schedule(tc, true) re.NotEmpty(ops) diff --git a/server/schedulers/metrics.go b/server/schedulers/metrics.go index c4914f671dab..21dd8bbe414d 100644 --- a/server/schedulers/metrics.go +++ b/server/schedulers/metrics.go @@ -46,7 +46,7 @@ var tolerantResourceStatus = prometheus.NewGaugeVec( Subsystem: "scheduler", Name: "tolerant_resource", Help: "Store status for schedule", - }, []string{"scheduler", "source", "target"}) + }, []string{"scheduler"}) var balanceLeaderCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ diff --git a/server/schedulers/utils.go b/server/schedulers/utils.go index a9eaccd29a01..acd1fbe84f90 100644 --- a/server/schedulers/utils.go +++ b/server/schedulers/utils.go @@ -44,6 +44,7 @@ type solver struct { kind core.ScheduleKind opInfluence operator.OpInfluence tolerantSizeRatio float64 + tolerantSource int64 sourceScore float64 targetScore float64 @@ -79,44 +80,69 @@ func (p *solver) TargetMetricLabel() string { return strconv.FormatUint(p.TargetStoreID(), 10) } -func (p *solver) shouldBalance(scheduleName string) bool { - // The reason we use max(regionSize, averageRegionSize) to check is: - // 1. prevent moving small regions between stores with close scores, leading to unnecessary balance. - // 2. prevent moving huge regions, leading to over balance. +func (p *solver) sourceStoreScore(scheduleName string) float64 { sourceID := p.source.GetID() - targetID := p.target.GetID() tolerantResource := p.getTolerantResource() // to avoid schedule too much, if A's core greater than B and C a little // we want that A should be moved out one region not two - sourceInfluence := p.GetOpInfluence(sourceID) - // A->B, B's influence is positive , so B can become source schedule, it will move region from B to C - if sourceInfluence > 0 { - sourceInfluence = -sourceInfluence + influence := p.GetOpInfluence(sourceID) + if influence > 0 { + influence = -influence + } + + opts := p.GetOpts() + if opts.IsDebugMetricsEnabled() { + opInfluenceStatus.WithLabelValues(scheduleName, strconv.FormatUint(sourceID, 10), "source").Set(float64(influence)) + tolerantResourceStatus.WithLabelValues(scheduleName).Set(float64(tolerantResource)) } + var score float64 + switch p.kind.Resource { + case core.LeaderKind: + sourceDelta := influence - tolerantResource + score = p.source.LeaderScore(p.kind.Policy, sourceDelta) + case core.RegionKind: + sourceDelta := influence*influenceAmp - tolerantResource + score = p.source.RegionScore(opts.GetRegionScoreFormulaVersion(), opts.GetHighSpaceRatio(), opts.GetLowSpaceRatio(), sourceDelta) + } + return score +} + +func (p *solver) targetStoreScore(scheduleName string) float64 { + targetID := p.target.GetID() // to avoid schedule too much, if A's score less than B and C in small range, // we want that A can be moved in one region not two - targetInfluence := p.GetOpInfluence(targetID) + tolerantResource := p.getTolerantResource() // to avoid schedule call back // A->B, A's influence is negative, so A will be target, C may move region to A - if targetInfluence < 0 { - targetInfluence = -targetInfluence + influence := p.GetOpInfluence(targetID) + if influence < 0 { + influence = -influence } + opts := p.GetOpts() + if opts.IsDebugMetricsEnabled() { + opInfluenceStatus.WithLabelValues(scheduleName, strconv.FormatUint(targetID, 10), "target").Set(float64(influence)) + } + var score float64 switch p.kind.Resource { case core.LeaderKind: - sourceDelta, targetDelta := sourceInfluence-tolerantResource, targetInfluence+tolerantResource - p.sourceScore = p.source.LeaderScore(p.kind.Policy, sourceDelta) - p.targetScore = p.target.LeaderScore(p.kind.Policy, targetDelta) + targetDelta := influence + tolerantResource + score = p.target.LeaderScore(p.kind.Policy, targetDelta) case core.RegionKind: - sourceDelta, targetDelta := sourceInfluence*influenceAmp-tolerantResource, targetInfluence*influenceAmp+tolerantResource - p.sourceScore = p.source.RegionScore(opts.GetRegionScoreFormulaVersion(), opts.GetHighSpaceRatio(), opts.GetLowSpaceRatio(), sourceDelta) - p.targetScore = p.target.RegionScore(opts.GetRegionScoreFormulaVersion(), opts.GetHighSpaceRatio(), opts.GetLowSpaceRatio(), targetDelta) - } - if opts.IsDebugMetricsEnabled() { - opInfluenceStatus.WithLabelValues(scheduleName, strconv.FormatUint(sourceID, 10), "source").Set(float64(sourceInfluence)) - opInfluenceStatus.WithLabelValues(scheduleName, strconv.FormatUint(targetID, 10), "target").Set(float64(targetInfluence)) - tolerantResourceStatus.WithLabelValues(scheduleName, strconv.FormatUint(sourceID, 10), strconv.FormatUint(targetID, 10)).Set(float64(tolerantResource)) + targetDelta := influence*influenceAmp + tolerantResource + score = p.target.RegionScore(opts.GetRegionScoreFormulaVersion(), opts.GetHighSpaceRatio(), opts.GetLowSpaceRatio(), targetDelta) } + return score +} + +// Both of the source store's score and target store's score should be calculated before calling this function. +// It will not calculate the score again. +func (p *solver) shouldBalance(scheduleName string) bool { + // The reason we use max(regionSize, averageRegionSize) to check is: + // 1. prevent moving small regions between stores with close scores, leading to unnecessary balance. + // 2. prevent moving huge regions, leading to over balance. + sourceID := p.source.GetID() + targetID := p.target.GetID() // Make sure after move, source score is still greater than target score. shouldBalance := p.sourceScore > p.targetScore @@ -124,24 +150,25 @@ func (p *solver) shouldBalance(scheduleName string) bool { log.Debug("skip balance "+p.kind.Resource.String(), zap.String("scheduler", scheduleName), zap.Uint64("region-id", p.region.GetID()), zap.Uint64("source-store", sourceID), zap.Uint64("target-store", targetID), zap.Int64("source-size", p.source.GetRegionSize()), zap.Float64("source-score", p.sourceScore), - zap.Int64("source-influence", sourceInfluence), zap.Int64("target-size", p.target.GetRegionSize()), zap.Float64("target-score", p.targetScore), - zap.Int64("target-influence", targetInfluence), zap.Int64("average-region-size", p.GetAverageRegionSize()), - zap.Int64("tolerant-resource", tolerantResource)) + zap.Int64("tolerant-resource", p.getTolerantResource())) } return shouldBalance } func (p *solver) getTolerantResource() int64 { - if p.kind.Resource == core.LeaderKind && p.kind.Policy == core.ByCount { - return int64(p.tolerantSizeRatio) + if p.tolerantSource > 0 { + return p.tolerantSource } - regionSize := p.region.GetApproximateSize() - if regionSize < p.GetAverageRegionSize() { - regionSize = p.GetAverageRegionSize() + + if p.kind.Resource == core.LeaderKind && p.kind.Policy == core.ByCount { + p.tolerantSource = int64(p.tolerantSizeRatio) + } else { + regionSize := p.GetAverageRegionSize() + p.tolerantSource = int64(float64(regionSize) * p.tolerantSizeRatio) } - return int64(float64(regionSize) * p.tolerantSizeRatio) + return p.tolerantSource } func adjustTolerantRatio(cluster schedule.Cluster, kind core.ScheduleKind) float64 { From 4ee3537afc21c8f01fe9c2b63ed34c9ef73091fc Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 18 Oct 2022 14:13:53 +0800 Subject: [PATCH 07/67] mod: update pingcap/log mod (#5605) close tikv/pd#5604 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- client/go.mod | 2 +- client/go.sum | 4 ++-- go.mod | 2 +- go.sum | 3 ++- pkg/errs/errs_test.go | 2 +- tests/client/go.mod | 2 +- tests/client/go.sum | 4 ++-- tools/pd-tso-bench/go.mod | 2 +- tools/pd-tso-bench/go.sum | 4 ++-- 9 files changed, 13 insertions(+), 12 deletions(-) diff --git a/client/go.mod b/client/go.mod index bc3b9f8d2c5d..4401e88b95e8 100644 --- a/client/go.mod +++ b/client/go.mod @@ -7,7 +7,7 @@ require ( github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad - github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee + github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 github.com/prometheus/client_golang v1.11.0 github.com/stretchr/testify v1.7.0 go.uber.org/goleak v1.1.11 diff --git a/client/go.sum b/client/go.sum index 89398c56263f..f56e7a130b37 100644 --- a/client/go.sum +++ b/client/go.sum @@ -106,8 +106,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad h1:lGKxsEwdE0pVXzHYD1SQ1vfa3t/bFVU/latrQz8b/w0= github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee h1:VO2t6IBpfvW34TdtD/G10VvnGqjLic1jzOuHjUb5VqM= -github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= +github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= diff --git a/go.mod b/go.mod index b7359e26d70a..7a935e27cf54 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad - github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 + github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 github.com/prometheus/client_golang v1.1.0 diff --git a/go.sum b/go.sum index 6d3b46f03354..c05ee56d69a5 100644 --- a/go.sum +++ b/go.sum @@ -422,8 +422,9 @@ github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs078 github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= -github.com/pingcap/log v0.0.0-20210906054005-afc726e70354 h1:SvWCbCPh1YeHd9yQLksvJYAgft6wLTY1aNG81tpyscQ= github.com/pingcap/log v0.0.0-20210906054005-afc726e70354/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= +github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM= github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 h1:kL1CW5qsn459kHZ2YoBYb+YOSWjSlshk55YP/XNQNWo= diff --git a/pkg/errs/errs_test.go b/pkg/errs/errs_test.go index 591d9f899ce9..c242dd994f59 100644 --- a/pkg/errs/errs_test.go +++ b/pkg/errs/errs_test.go @@ -63,7 +63,7 @@ func newZapTestLogger(cfg *log.Config, opts ...zap.Option) verifyLogger { // TestingWriter is used to write to memory. // Used in the verify logger. writer := newTestingWriter() - lg, _, _ := log.InitLoggerWithWriteSyncer(cfg, writer, opts...) + lg, _, _ := log.InitLoggerWithWriteSyncer(cfg, writer, writer, opts...) return verifyLogger{ Logger: lg, diff --git a/tests/client/go.mod b/tests/client/go.mod index cee2b19e8bdb..b96ffe5e05df 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -92,7 +92,7 @@ require ( github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 // indirect github.com/pingcap/errcode v0.3.0 // indirect github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect - github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee // indirect + github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d // indirect github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect diff --git a/tests/client/go.sum b/tests/client/go.sum index 8d388c14cebb..b2ec8ce4ef50 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -396,8 +396,8 @@ github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIf github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v0.0.0-20210906054005-afc726e70354/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= -github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee h1:VO2t6IBpfvW34TdtD/G10VvnGqjLic1jzOuHjUb5VqM= -github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= +github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM= github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 h1:kL1CW5qsn459kHZ2YoBYb+YOSWjSlshk55YP/XNQNWo= diff --git a/tools/pd-tso-bench/go.mod b/tools/pd-tso-bench/go.mod index 452a6d8a8424..5864ea5d9dee 100644 --- a/tools/pd-tso-bench/go.mod +++ b/tools/pd-tso-bench/go.mod @@ -5,7 +5,7 @@ go 1.16 require ( github.com/influxdata/tdigest v0.0.1 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c - github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee + github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 github.com/prometheus/client_golang v1.11.0 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 go.uber.org/zap v1.20.0 diff --git a/tools/pd-tso-bench/go.sum b/tools/pd-tso-bench/go.sum index 4e22744e5acf..068c3a13b5b4 100644 --- a/tools/pd-tso-bench/go.sum +++ b/tools/pd-tso-bench/go.sum @@ -106,8 +106,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad h1:lGKxsEwdE0pVXzHYD1SQ1vfa3t/bFVU/latrQz8b/w0= github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee h1:VO2t6IBpfvW34TdtD/G10VvnGqjLic1jzOuHjUb5VqM= -github.com/pingcap/log v0.0.0-20211215031037-e024ba4eb0ee/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= +github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= From 1a485f742fe1d7000782969ee511adf16c6ef7b4 Mon Sep 17 00:00:00 2001 From: Zwb Date: Thu, 20 Oct 2022 17:11:55 +0800 Subject: [PATCH 08/67] Modify placement rule to support witness (#5292) close tikv/pd#5568 Modify placement rule to support witness Signed-off-by: Wenbo Zhang --- go.mod | 2 +- go.sum | 4 +- server/api/trend_test.go | 28 +++- server/core/peer.go | 6 - server/handler.go | 2 +- server/schedule/checker/rule_checker.go | 35 +++- server/schedule/checker/rule_checker_test.go | 87 ++++++++++ server/schedule/operator/builder.go | 72 +++++--- server/schedule/operator/create_operator.go | 24 ++- server/schedule/operator/step.go | 166 +++++++++++++++++-- server/schedule/placement/fit.go | 2 +- server/schedule/placement/rule.go | 1 + tests/client/go.mod | 2 +- tests/client/go.sum | 3 +- 14 files changed, 368 insertions(+), 66 deletions(-) diff --git a/go.mod b/go.mod index 7a935e27cf54..dc013b203dbc 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce - github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad + github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 diff --git a/go.sum b/go.sum index c05ee56d69a5..91add432cacb 100644 --- a/go.sum +++ b/go.sum @@ -417,8 +417,8 @@ github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMt github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad h1:lGKxsEwdE0pVXzHYD1SQ1vfa3t/bFVU/latrQz8b/w0= -github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a h1:McYxPhA8SHqfUtLfQHHN0fQl4dy93IkhlX4Pp2MKIFA= +github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= diff --git a/server/api/trend_test.go b/server/api/trend_test.go index 9655faf0b7a2..cdf27206e8a2 100644 --- a/server/api/trend_test.go +++ b/server/api/trend_test.go @@ -39,9 +39,9 @@ func TestTrend(t *testing.T) { } // Create 3 regions, all peers on store1 and store2, and the leaders are all on store1. - region4 := newRegionInfo(4, "", "a", 2, 2, []uint64{1, 2}, nil, 1) - region5 := newRegionInfo(5, "a", "b", 2, 2, []uint64{1, 2}, nil, 1) - region6 := newRegionInfo(6, "b", "", 2, 2, []uint64{1, 2}, nil, 1) + region4 := newRegionInfo(4, "", "a", 2, 2, []uint64{1, 2}, nil, nil, 1) + region5 := newRegionInfo(5, "a", "b", 2, 2, []uint64{1, 2}, nil, []uint64{2}, 1) + region6 := newRegionInfo(6, "b", "", 2, 2, []uint64{1, 2}, nil, nil, 1) mustRegionHeartbeat(re, svr, region4) mustRegionHeartbeat(re, svr, region5) mustRegionHeartbeat(re, svr, region6) @@ -57,6 +57,8 @@ func TestTrend(t *testing.T) { op, err := svr.GetHandler().GetOperator(5) re.NoError(err) re.NotNil(op) + re.True(op.Step(0).(operator.AddLearner).IsWitness) + newPeerID := op.Step(0).(operator.AddLearner).PeerID region5 = region5.Clone(core.WithAddPeer(&metapb.Peer{Id: newPeerID, StoreId: 3, Role: metapb.PeerRole_Learner}), core.WithIncConfVer()) mustRegionHeartbeat(re, svr, region5) @@ -97,20 +99,34 @@ func TestTrend(t *testing.T) { } } -func newRegionInfo(id uint64, startKey, endKey string, confVer, ver uint64, voters []uint64, learners []uint64, leaderStore uint64) *core.RegionInfo { +func newRegionInfo(id uint64, startKey, endKey string, confVer, ver uint64, voters []uint64, learners []uint64, witnesses []uint64, leaderStore uint64) *core.RegionInfo { var ( peers = make([]*metapb.Peer, 0, len(voters)+len(learners)) leader *metapb.Peer ) for _, id := range voters { - p := &metapb.Peer{Id: 10 + id, StoreId: id} + witness := false + for _, wid := range witnesses { + if id == wid { + witness = true + break + } + } + p := &metapb.Peer{Id: 10 + id, StoreId: id, IsWitness: witness} if id == leaderStore { leader = p } peers = append(peers, p) } for _, id := range learners { - p := &metapb.Peer{Id: 10 + id, StoreId: id, Role: metapb.PeerRole_Learner} + witness := false + for _, wid := range witnesses { + if id == wid { + witness = true + break + } + } + p := &metapb.Peer{Id: 10 + id, StoreId: id, Role: metapb.PeerRole_Learner, IsWitness: witness} peers = append(peers, p) } return core.NewRegionInfo( diff --git a/server/core/peer.go b/server/core/peer.go index 9d324b29374f..81a154b45726 100644 --- a/server/core/peer.go +++ b/server/core/peer.go @@ -36,9 +36,6 @@ func IsVoter(peer *metapb.Peer) bool { // IsVoterOrIncomingVoter judges whether peer role will become Voter. // The peer is not nil and the role is equal to IncomingVoter or Voter. func IsVoterOrIncomingVoter(peer *metapb.Peer) bool { - if peer == nil { - return false - } switch peer.GetRole() { case metapb.PeerRole_IncomingVoter, metapb.PeerRole_Voter: return true @@ -49,9 +46,6 @@ func IsVoterOrIncomingVoter(peer *metapb.Peer) bool { // IsLearnerOrDemotingVoter judges whether peer role will become Learner. // The peer is not nil and the role is equal to DemotingVoter or Learner. func IsLearnerOrDemotingVoter(peer *metapb.Peer) bool { - if peer == nil { - return false - } switch peer.GetRole() { case metapb.PeerRole_DemotingVoter, metapb.PeerRole_Learner: return true diff --git a/server/handler.go b/server/handler.go index 914fe7c6cb72..bb168004807e 100644 --- a/server/handler.go +++ b/server/handler.go @@ -629,7 +629,7 @@ func (h *Handler) AddTransferPeerOperator(regionID uint64, fromStoreID, toStoreI return err } - newPeer := &metapb.Peer{StoreId: toStoreID, Role: oldPeer.GetRole()} + newPeer := &metapb.Peer{StoreId: toStoreID, Role: oldPeer.GetRole(), IsWitness: oldPeer.GetIsWitness()} op, err := operator.CreateMovePeerOperator("admin-move-peer", c, region, operator.OpAdmin, fromStoreID, newPeer) if err != nil { log.Debug("fail to create move peer operator", errs.ZapError(err)) diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 93de73ef34bb..8596af39412e 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -33,11 +33,12 @@ import ( ) var ( - errNoStoreToAdd = errors.New("no store to add peer") - errNoStoreToReplace = errors.New("no store to replace peer") - errPeerCannotBeLeader = errors.New("peer cannot be leader") - errNoNewLeader = errors.New("no new leader") - errRegionNoLeader = errors.New("region no leader") + errNoStoreToAdd = errors.New("no store to add peer") + errNoStoreToReplace = errors.New("no store to replace peer") + errPeerCannotBeLeader = errors.New("peer cannot be leader") + errPeerCannotBeWitness = errors.New("peer cannot be witness") + errNoNewLeader = errors.New("no new leader") + errRegionNoLeader = errors.New("region no leader") ) const maxPendingListLen = 100000 @@ -181,7 +182,7 @@ func (c *RuleChecker) addRulePeer(region *core.RegionInfo, rf *placement.RuleFit c.handleFilterState(region, filterByTempState) return nil, errNoStoreToAdd } - peer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole()} + peer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: rf.Rule.IsWitness} op, err := operator.CreateAddPeerOperator("add-rule-peer", c.cluster, region, peer, operator.OpReplica) if err != nil { return nil, err @@ -199,7 +200,7 @@ func (c *RuleChecker) replaceUnexpectRulePeer(region *core.RegionInfo, rf *place c.handleFilterState(region, filterByTempState) return nil, errNoStoreToReplace } - newPeer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole()} + newPeer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: rf.Rule.IsWitness} // pick the smallest leader store to avoid the Offline store be snapshot generator bottleneck. var newLeader *metapb.Peer if region.GetLeader().GetId() == peer.GetId() { @@ -266,6 +267,24 @@ func (c *RuleChecker) fixLooseMatchPeer(region *core.RegionInfo, fit *placement. checkerCounter.WithLabelValues("rule_checker", "demote-voter-role").Inc() return operator.CreateDemoteVoterOperator("fix-demote-voter", c.cluster, region, peer) } + if region.GetLeader().GetId() == peer.GetId() && rf.Rule.IsWitness { + return nil, errPeerCannotBeWitness + } + if !core.IsWitness(peer) && rf.Rule.IsWitness { + lv := "set-voter-witness" + if core.IsLearner(peer) { + lv = "set-learner-witness" + } + checkerCounter.WithLabelValues("rule_checker", lv).Inc() + return operator.CreateWitnessPeerOperator("fix-witness-peer", c.cluster, region, peer) + } else if core.IsWitness(peer) && !rf.Rule.IsWitness { + lv := "set-voter-non-witness" + if core.IsLearner(peer) { + lv = "set-learner-non-witness" + } + checkerCounter.WithLabelValues("rule_checker", lv).Inc() + return operator.CreateNonWitnessPeerOperator("fix-non-witness-peer", c.cluster, region, peer) + } return nil, nil } @@ -308,7 +327,7 @@ func (c *RuleChecker) fixBetterLocation(region *core.RegionInfo, rf *placement.R return nil, nil } checkerCounter.WithLabelValues("rule_checker", "move-to-better-location").Inc() - newPeer := &metapb.Peer{StoreId: newStore, Role: rf.Rule.Role.MetaPeerRole()} + newPeer := &metapb.Peer{StoreId: newStore, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: rf.Rule.IsWitness} return operator.CreateMovePeerOperator("move-to-better-location", c.cluster, region, operator.OpReplica, oldStore, newPeer) } diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 61c95c4e5554..93691a549ec2 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -302,6 +302,93 @@ func (suite *ruleCheckerTestSuite) TestFixLeaderRoleWithUnhealthyRegion() { suite.Nil(op) } +func (suite *ruleCheckerTestSuite) TestFixRuleWitness() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "follower"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) + suite.cluster.AddLeaderRegion(1, 1, 2) + + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Index: 100, + Override: true, + Role: placement.Voter, + Count: 1, + IsWitness: true, + LabelConstraints: []placement.LabelConstraint{ + {Key: "C", Op: "in", Values: []string{"voter"}}, + }, + }) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("add-rule-peer", op.Desc()) + suite.Equal(uint64(3), op.Step(0).(operator.AddLearner).ToStore) + suite.True(op.Step(0).(operator.AddLearner).IsWitness) +} + +func (suite *ruleCheckerTestSuite) TestFixRuleWitness2() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Index: 100, + Override: true, + Role: placement.Voter, + Count: 1, + IsWitness: true, + LabelConstraints: []placement.LabelConstraint{ + {Key: "C", Op: "in", Values: []string{"voter"}}, + }, + }) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("fix-witness-peer", op.Desc()) + suite.Equal(uint64(3), op.Step(0).(operator.BecomeWitness).StoreID) +} + +func (suite *ruleCheckerTestSuite) TestFixRuleWitness3() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + r := suite.cluster.GetRegion(1) + // set peer3 to witness + r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) + + op := suite.rc.Check(r) + suite.NotNil(op) + suite.Equal("fix-non-witness-peer", op.Desc()) + suite.Equal(uint64(3), op.Step(0).(operator.BecomeNonWitness).StoreID) +} + +func (suite *ruleCheckerTestSuite) TestFixRuleWitness4() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Index: 100, + Override: true, + Role: placement.Voter, + Count: 2, + IsWitness: true, + LabelConstraints: []placement.LabelConstraint{ + {Key: "A", Op: "In", Values: []string{"leader"}}, + }, + }) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Nil(op) +} + func (suite *ruleCheckerTestSuite) TestBetterReplacement() { suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) diff --git a/server/schedule/operator/builder.go b/server/schedule/operator/builder.go index 8fd2a8a2386d..8900a63aa2f8 100644 --- a/server/schedule/operator/builder.go +++ b/server/schedule/operator/builder.go @@ -218,9 +218,10 @@ func (b *Builder) PromoteLearner(storeID uint64) *Builder { b.err = errors.Errorf("cannot promote peer %d: unhealthy", storeID) } else { b.targetPeers.Set(&metapb.Peer{ - Id: peer.GetId(), - StoreId: peer.GetStoreId(), - Role: metapb.PeerRole_Voter, + Id: peer.GetId(), + StoreId: peer.GetStoreId(), + Role: metapb.PeerRole_Voter, + IsWitness: peer.GetIsWitness(), }) } return b @@ -237,9 +238,30 @@ func (b *Builder) DemoteVoter(storeID uint64) *Builder { b.err = errors.Errorf("cannot demote voter %d: is already learner", storeID) } else { b.targetPeers.Set(&metapb.Peer{ - Id: peer.GetId(), - StoreId: peer.GetStoreId(), - Role: metapb.PeerRole_Learner, + Id: peer.GetId(), + StoreId: peer.GetStoreId(), + Role: metapb.PeerRole_Learner, + IsWitness: peer.GetIsWitness(), + }) + } + return b +} + +// BecomeNonWitness records a remove witness attr operation in Builder. +func (b *Builder) BecomeNonWitness(storeID uint64) *Builder { + if b.err != nil { + return b + } + if peer, ok := b.targetPeers[storeID]; !ok { + b.err = errors.Errorf("cannot set non-witness attr to peer %d: not found", storeID) + } else if !core.IsWitness(peer) { + b.err = errors.Errorf("cannot set non-witness attr to peer %d: is already non-witness", storeID) + } else { + b.targetPeers.Set(&metapb.Peer{ + Id: peer.GetId(), + StoreId: peer.GetStoreId(), + Role: metapb.PeerRole_Learner, + IsWitness: false, }) } return b @@ -404,9 +426,10 @@ func (b *Builder) prepareBuild() (string, error) { // modify it to the peer id of the origin. if o.GetId() != n.GetId() { n = &metapb.Peer{ - Id: o.GetId(), - StoreId: o.GetStoreId(), - Role: n.GetRole(), + Id: o.GetId(), + StoreId: o.GetStoreId(), + Role: n.GetRole(), + IsWitness: n.GetIsWitness(), } } @@ -436,9 +459,10 @@ func (b *Builder) prepareBuild() (string, error) { return "", err } n = &metapb.Peer{ - Id: id, - StoreId: n.GetStoreId(), - Role: n.GetRole(), + Id: id, + StoreId: n.GetStoreId(), + Role: n.GetRole(), + IsWitness: n.GetIsWitness(), } } // It is a pair with `b.toRemove.Set(o)` when `o != nil`. @@ -505,9 +529,10 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) { peer := b.toAdd[add] if !core.IsLearner(peer) { b.execAddPeer(&metapb.Peer{ - Id: peer.GetId(), - StoreId: peer.GetStoreId(), - Role: metapb.PeerRole_Learner, + Id: peer.GetId(), + StoreId: peer.GetStoreId(), + Role: metapb.PeerRole_Learner, + IsWitness: peer.GetIsWitness(), }) b.toPromote.Set(peer) } else { @@ -526,9 +551,10 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) { peer := b.toRemove[remove] if !core.IsLearner(peer) { b.toDemote.Set(&metapb.Peer{ - Id: peer.GetId(), - StoreId: peer.GetStoreId(), - Role: metapb.PeerRole_Learner, + Id: peer.GetId(), + StoreId: peer.GetStoreId(), + Role: metapb.PeerRole_Learner, + IsWitness: peer.GetIsWitness(), }) } } @@ -675,19 +701,19 @@ func (b *Builder) execTransferLeader(targetStoreID uint64, targetStoreIDs []uint } func (b *Builder) execPromoteLearner(peer *metapb.Peer) { - b.steps = append(b.steps, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId()}) + b.steps = append(b.steps, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) b.currentPeers.Set(peer) delete(b.toPromote, peer.GetStoreId()) } func (b *Builder) execAddPeer(peer *metapb.Peer) { if b.lightWeight { - b.steps = append(b.steps, AddLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsLightWeight: b.lightWeight}) + b.steps = append(b.steps, AddLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsLightWeight: b.lightWeight, IsWitness: peer.GetIsWitness()}) } else { - b.steps = append(b.steps, AddLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId()}) + b.steps = append(b.steps, AddLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) } if !core.IsLearner(peer) { - b.steps = append(b.steps, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId()}) + b.steps = append(b.steps, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) } b.currentPeers.Set(peer) b.peerAddStep[peer.GetStoreId()] = len(b.steps) @@ -733,7 +759,7 @@ func (b *Builder) execChangePeerV2(needEnter bool, needTransferLeader bool) { for _, d := range b.toDemote.IDs() { peer := b.toDemote[d] - step.DemoteVoters = append(step.DemoteVoters, DemoteVoter{ToStore: peer.GetStoreId(), PeerID: peer.GetId()}) + step.DemoteVoters = append(step.DemoteVoters, DemoteVoter{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) b.currentPeers.Set(peer) } b.toDemote = newPeersMap() diff --git a/server/schedule/operator/create_operator.go b/server/schedule/operator/create_operator.go index 27a1b2647756..ef88cb397a15 100644 --- a/server/schedule/operator/create_operator.go +++ b/server/schedule/operator/create_operator.go @@ -76,12 +76,20 @@ func CreateForceTransferLeaderOperator(desc string, ci ClusterInformer, region * // CreateMoveRegionOperator creates an operator that moves a region to specified stores. func CreateMoveRegionOperator(desc string, ci ClusterInformer, region *core.RegionInfo, kind OpKind, roles map[uint64]placement.PeerRoleType) (*Operator, error) { // construct the peers from roles + oldPeers := region.GetPeers() peers := make(map[uint64]*metapb.Peer) + i := 0 for storeID, role := range roles { + isWitness := false + if i < len(oldPeers) { + isWitness = oldPeers[i].GetIsWitness() + } peers[storeID] = &metapb.Peer{ - StoreId: storeID, - Role: role.MetaPeerRole(), + StoreId: storeID, + Role: role.MetaPeerRole(), + IsWitness: isWitness, } + i += 1 } builder := NewBuilder(desc, ci, region).SetPeers(peers).SetExpectedRoles(roles) return builder.Build(kind) @@ -282,3 +290,15 @@ func CreateLeaveJointStateOperator(desc string, ci ClusterInformer, origin *core b.execChangePeerV2(false, true) return NewOperator(b.desc, brief, b.regionID, b.regionEpoch, kind, origin.GetApproximateSize(), b.steps...), nil } + +// CreateWitnessPeerOperator creates an operator that set a follower or learner peer with witness +func CreateWitnessPeerOperator(desc string, ci ClusterInformer, region *core.RegionInfo, peer *metapb.Peer) (*Operator, error) { + brief := fmt.Sprintf("create witness: region %v peer %v on store %v", region.GetID(), peer.Id, peer.StoreId) + return NewOperator(desc, brief, region.GetID(), region.GetRegionEpoch(), OpRegion, region.GetApproximateSize(), BecomeWitness{StoreID: peer.StoreId, PeerID: peer.Id}), nil +} + +// CreateNonWitnessPeerOperator creates an operator that set a peer with non-witness +func CreateNonWitnessPeerOperator(desc string, ci ClusterInformer, region *core.RegionInfo, peer *metapb.Peer) (*Operator, error) { + brief := fmt.Sprintf("promote to non-witness: region %v peer %v on store %v", region.GetID(), peer.Id, peer.StoreId) + return NewOperator(desc, brief, region.GetID(), region.GetRegionEpoch(), OpRegion, region.GetApproximateSize(), BecomeNonWitness{StoreID: peer.StoreId, PeerID: peer.Id}), nil +} diff --git a/server/schedule/operator/step.go b/server/schedule/operator/step.go index 8acbf15c5b79..8374f8362ff8 100644 --- a/server/schedule/operator/step.go +++ b/server/schedule/operator/step.go @@ -134,6 +134,7 @@ func (tl TransferLeader) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) * type AddPeer struct { ToStore, PeerID uint64 IsLightWeight bool + IsWitness bool } // ConfVerChanged returns the delta value for version increased by this step. @@ -153,6 +154,9 @@ func (ap AddPeer) IsFinish(region *core.RegionInfo) bool { log.Warn("obtain unexpected peer", zap.String("expect", ap.String()), zap.Uint64("obtain-voter", peer.GetId())) return false } + if peer.GetIsWitness() != ap.IsWitness { + return false + } return region.GetPendingVoter(peer.GetId()) == nil } return false @@ -195,13 +199,132 @@ func (ap AddPeer) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb.Re // The newly added peer is pending. return nil } - return createResponse(addNode(ap.PeerID, ap.ToStore), useConfChangeV2) + return createResponse(addNode(ap.PeerID, ap.ToStore, ap.IsWitness), useConfChangeV2) +} + +// BecomeWitness is an OpStep that makes a peer become a witness. +type BecomeWitness struct { + StoreID, PeerID uint64 +} + +// ConfVerChanged returns the delta value for version increased by this step. +func (bw BecomeWitness) ConfVerChanged(region *core.RegionInfo) uint64 { + peer := region.GetStorePeer(bw.StoreID) + return typeutil.BoolToUint64(peer.GetId() == bw.PeerID) +} + +func (bw BecomeWitness) String() string { + return fmt.Sprintf("change peer %v on store %v to witness", bw.PeerID, bw.StoreID) +} + +// IsFinish checks if current step is finished. +func (bw BecomeWitness) IsFinish(region *core.RegionInfo) bool { + if peer := region.GetStorePeer(bw.StoreID); peer != nil { + if peer.GetId() != bw.PeerID { + log.Warn("obtain unexpected peer", zap.String("expect", bw.String()), zap.Uint64("obtain-learner", peer.GetId())) + return false + } + return peer.IsWitness + } + return false +} + +// CheckInProgress checks if the step is in the progress of advancing. +func (bw BecomeWitness) CheckInProgress(ci ClusterInformer, region *core.RegionInfo) error { + if err := validateStore(ci, bw.StoreID); err != nil { + return err + } + peer := region.GetStorePeer(bw.StoreID) + if peer == nil || peer.GetId() != bw.PeerID { + return errors.New("peer does not exist") + } + return nil +} + +// Influence calculates the store difference that current step makes. +func (bw BecomeWitness) Influence(opInfluence OpInfluence, region *core.RegionInfo) { + to := opInfluence.GetStoreInfluence(bw.StoreID) + + regionSize := region.GetApproximateSize() + to.RegionSize -= regionSize + to.AdjustStepCost(storelimit.RemovePeer, regionSize) +} + +// Timeout returns true if the step is timeout. +func (bw BecomeWitness) Timeout(start time.Time, regionSize int64) bool { + return time.Since(start) > fastStepWaitDuration(regionSize) +} + +// GetCmd returns the schedule command for heartbeat response. +func (bw BecomeWitness) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb.RegionHeartbeatResponse { + if core.IsLearner(region.GetStorePeer(bw.StoreID)) { + return createResponse(addLearnerNode(bw.PeerID, bw.StoreID, true), useConfChangeV2) + } + return createResponse(addNode(bw.PeerID, bw.StoreID, true), useConfChangeV2) +} + +// BecomeNonWitness is an OpStep that makes a peer become a non-witness. +type BecomeNonWitness struct { + StoreID, PeerID uint64 +} + +// ConfVerChanged returns the delta value for version increased by this step. +func (bn BecomeNonWitness) ConfVerChanged(region *core.RegionInfo) uint64 { + peer := region.GetStorePeer(bn.StoreID) + return typeutil.BoolToUint64(peer.GetId() == bn.PeerID) +} + +func (bn BecomeNonWitness) String() string { + return fmt.Sprintf("change peer %v on store %v to non-witness", bn.PeerID, bn.StoreID) +} + +// IsFinish checks if current step is finished. +func (bn BecomeNonWitness) IsFinish(region *core.RegionInfo) bool { + if peer := region.GetStorePeer(bn.StoreID); peer != nil { + if peer.GetId() != bn.PeerID { + log.Warn("obtain unexpected peer", zap.String("expect", bn.String()), zap.Uint64("obtain-non-witness", peer.GetId())) + return false + } + return region.GetPendingPeer(peer.GetId()) == nil && !peer.IsWitness + } + return false +} + +// CheckInProgress checks if the step is in the progress of advancing. +func (bn BecomeNonWitness) CheckInProgress(ci ClusterInformer, region *core.RegionInfo) error { + if err := validateStore(ci, bn.StoreID); err != nil { + return err + } + peer := region.GetStorePeer(bn.StoreID) + if peer == nil || peer.GetId() != bn.PeerID { + return errors.New("peer does not exist") + } + return nil +} + +// Influence calculates the store difference that current step makes. +func (bn BecomeNonWitness) Influence(opInfluence OpInfluence, region *core.RegionInfo) { + to := opInfluence.GetStoreInfluence(bn.StoreID) + + regionSize := region.GetApproximateSize() + to.AdjustStepCost(storelimit.AddPeer, regionSize) +} + +// Timeout returns true if the step is timeout +func (bn BecomeNonWitness) Timeout(start time.Time, regionSize int64) bool { + return time.Since(start) > slowStepWaitDuration(regionSize) +} + +// GetCmd returns the schedule command for heartbeat response. +func (bn BecomeNonWitness) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb.RegionHeartbeatResponse { + return createResponse(addLearnerNode(bn.PeerID, bn.StoreID, false), useConfChangeV2) } // AddLearner is an OpStep that adds a region learner peer. type AddLearner struct { ToStore, PeerID uint64 IsLightWeight bool + IsWitness bool } // ConfVerChanged returns the delta value for version increased by this step. @@ -221,6 +344,9 @@ func (al AddLearner) IsFinish(region *core.RegionInfo) bool { log.Warn("obtain unexpected peer", zap.String("expect", al.String()), zap.Uint64("obtain-learner", peer.GetId())) return false } + if peer.GetIsWitness() != al.IsWitness { + return false + } return region.GetPendingLearner(peer.GetId()) == nil } return false @@ -268,12 +394,13 @@ func (al AddLearner) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb // The newly added peer is pending. return nil } - return createResponse(addLearnerNode(al.PeerID, al.ToStore), useConfChangeV2) + return createResponse(addLearnerNode(al.PeerID, al.ToStore, al.IsWitness), useConfChangeV2) } // PromoteLearner is an OpStep that promotes a region learner peer to normal voter. type PromoteLearner struct { ToStore, PeerID uint64 + IsWitness bool } // ConfVerChanged returns the delta value for version increased by this step. @@ -316,7 +443,7 @@ func (pl PromoteLearner) Timeout(start time.Time, regionSize int64) bool { // GetCmd returns the schedule command for heartbeat response. func (pl PromoteLearner) GetCmd(_ *core.RegionInfo, useConfChangeV2 bool) *pdpb.RegionHeartbeatResponse { - return createResponse(addNode(pl.PeerID, pl.ToStore), useConfChangeV2) + return createResponse(addNode(pl.PeerID, pl.ToStore, pl.IsWitness), useConfChangeV2) } // RemovePeer is an OpStep that removes a region peer. @@ -504,10 +631,15 @@ func (sr SplitRegion) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdp // Note: It is not an OpStep, only a sub step in ChangePeerV2Enter and ChangePeerV2Leave. type DemoteVoter struct { ToStore, PeerID uint64 + IsWitness bool } func (dv DemoteVoter) String() string { - return fmt.Sprintf("demote voter peer %v on store %v to learner", dv.PeerID, dv.ToStore) + info := "non-witness" + if dv.IsWitness { + info = "witness" + } + return fmt.Sprintf("demote voter peer %v on store %v to %v learner", dv.PeerID, dv.ToStore, info) } // ConfVerChanged returns the delta value for version increased by this step. @@ -521,8 +653,12 @@ func (dv DemoteVoter) IsFinish(region *core.RegionInfo) bool { if peer := region.GetStoreLearner(dv.ToStore); peer != nil { if peer.GetId() != dv.PeerID { log.Warn("obtain unexpected peer", zap.String("expect", dv.String()), zap.Uint64("obtain-learner", peer.GetId())) + return false + } + if peer.IsWitness != dv.IsWitness { + return false } - return peer.GetId() == dv.PeerID + return region.GetPendingLearner(peer.GetId()) == nil } return false } @@ -534,7 +670,7 @@ func (dv DemoteVoter) Timeout(start time.Time, regionSize int64) bool { // GetCmd returns the schedule command for heartbeat response. func (dv DemoteVoter) GetCmd(_ *core.RegionInfo, useConfChangeV2 bool) *pdpb.RegionHeartbeatResponse { - return createResponse(addLearnerNode(dv.PeerID, dv.ToStore), useConfChangeV2) + return createResponse(addLearnerNode(dv.PeerID, dv.ToStore, dv.IsWitness), useConfChangeV2) } // ChangePeerV2Enter is an OpStep that uses joint consensus to request all PromoteLearner and DemoteVoter. @@ -841,24 +977,26 @@ func fastStepWaitDuration(regionSize int64) time.Duration { return wait } -func addNode(id, storeID uint64) *pdpb.ChangePeer { +func addNode(id, storeID uint64, isWitness bool) *pdpb.ChangePeer { return &pdpb.ChangePeer{ ChangeType: eraftpb.ConfChangeType_AddNode, Peer: &metapb.Peer{ - Id: id, - StoreId: storeID, - Role: metapb.PeerRole_Voter, + Id: id, + StoreId: storeID, + Role: metapb.PeerRole_Voter, + IsWitness: isWitness, }, } } -func addLearnerNode(id, storeID uint64) *pdpb.ChangePeer { +func addLearnerNode(id, storeID uint64, isWitness bool) *pdpb.ChangePeer { return &pdpb.ChangePeer{ ChangeType: eraftpb.ConfChangeType_AddLearnerNode, Peer: &metapb.Peer{ - Id: id, - StoreId: storeID, - Role: metapb.PeerRole_Learner, + Id: id, + StoreId: storeID, + Role: metapb.PeerRole_Learner, + IsWitness: isWitness, }, } } diff --git a/server/schedule/placement/fit.go b/server/schedule/placement/fit.go index 54f6c53bd90d..33e0af2dbc95 100644 --- a/server/schedule/placement/fit.go +++ b/server/schedule/placement/fit.go @@ -385,7 +385,7 @@ func newRuleFit(rule *Rule, peers []*fitPeer) *RuleFit { rf := &RuleFit{Rule: rule, IsolationScore: isolationScore(peers, rule.LocationLabels)} for _, p := range peers { rf.Peers = append(rf.Peers, p.Peer) - if !p.matchRoleStrict(rule.Role) { + if !p.matchRoleStrict(rule.Role) || p.IsWitness != rule.IsWitness { rf.PeersWithDifferentRole = append(rf.PeersWithDifferentRole, p.Peer) } } diff --git a/server/schedule/placement/rule.go b/server/schedule/placement/rule.go index 31493b908bd1..5b8b488c6af1 100644 --- a/server/schedule/placement/rule.go +++ b/server/schedule/placement/rule.go @@ -63,6 +63,7 @@ type Rule struct { EndKey []byte `json:"-"` // range end key EndKeyHex string `json:"end_key"` // hex format end key, for marshal/unmarshal Role PeerRoleType `json:"role"` // expected role of the peers + IsWitness bool `json:"is_witness"` // when it is true, it means the role is also a witness Count int `json:"count"` // expected count of the peers LabelConstraints []LabelConstraint `json:"label_constraints,omitempty"` // used to select stores to place peers LocationLabels []string `json:"location_labels,omitempty"` // used to make peers isolated physically diff --git a/tests/client/go.mod b/tests/client/go.mod index b96ffe5e05df..4cdc8f323e30 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -5,7 +5,7 @@ go 1.18 require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad + github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a github.com/stretchr/testify v1.7.0 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 diff --git a/tests/client/go.sum b/tests/client/go.sum index b2ec8ce4ef50..4591bd2ef17f 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -390,8 +390,9 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad h1:lGKxsEwdE0pVXzHYD1SQ1vfa3t/bFVU/latrQz8b/w0= github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a h1:McYxPhA8SHqfUtLfQHHN0fQl4dy93IkhlX4Pp2MKIFA= +github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= From 8ed16608a411b2e2b5ab483a290887ecadc23506 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 21 Oct 2022 15:43:54 +0800 Subject: [PATCH 09/67] statistics: fix `ToHotPeersStat` result when `HotPeers` is empty (#5597) close tikv/pd#5598 Signed-off-by: lhy1024 Co-authored-by: ShuNing --- server/statistics/store_load.go | 10 ++-- tests/pdctl/hot/hot_test.go | 82 +++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/server/statistics/store_load.go b/server/statistics/store_load.go index a7cc723c74a4..12c9e92910a9 100644 --- a/server/statistics/store_load.go +++ b/server/statistics/store_load.go @@ -29,11 +29,13 @@ type StoreLoadDetail struct { // ToHotPeersStat abstracts load information to HotPeersStat. func (li *StoreLoadDetail) ToHotPeersStat() *HotPeersStat { + storeByteRate, storeKeyRate, storeQueryRate := li.LoadPred.Current.Loads[ByteDim], + li.LoadPred.Current.Loads[KeyDim], li.LoadPred.Current.Loads[QueryDim] if len(li.HotPeers) == 0 { return &HotPeersStat{ - StoreByteRate: 0.0, - StoreKeyRate: 0.0, - StoreQueryRate: 0.0, + StoreByteRate: storeByteRate, + StoreKeyRate: storeKeyRate, + StoreQueryRate: storeQueryRate, TotalBytesRate: 0.0, TotalKeysRate: 0.0, TotalQueryRate: 0.0, @@ -51,8 +53,6 @@ func (li *StoreLoadDetail) ToHotPeersStat() *HotPeersStat { queryRate += peer.Loads[QueryDim] } } - storeByteRate, storeKeyRate, storeQueryRate := li.LoadPred.Current.Loads[ByteDim], - li.LoadPred.Current.Loads[KeyDim], li.LoadPred.Current.Loads[QueryDim] return &HotPeersStat{ TotalBytesRate: byteRate, diff --git a/tests/pdctl/hot/hot_test.go b/tests/pdctl/hot/hot_test.go index 1a948df738af..ae81d49c95b8 100644 --- a/tests/pdctl/hot/hot_test.go +++ b/tests/pdctl/hot/hot_test.go @@ -358,3 +358,85 @@ func TestHistoryHotRegions(t *testing.T) { re.NoError(err) re.Error(json.Unmarshal(output, &hotRegions)) } + +func TestHotWithoutHotPeer(t *testing.T) { + re := require.New(t) + statistics.Denoising = false + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := tests.NewTestCluster(ctx, 1, func(cfg *config.Config, serverName string) { cfg.Schedule.HotRegionCacheHitsThreshold = 0 }) + re.NoError(err) + err = cluster.RunInitialServers() + re.NoError(err) + cluster.WaitLeader() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := pdctlCmd.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + leaderServer := cluster.GetServer(cluster.GetLeader()) + re.NoError(leaderServer.BootstrapCluster()) + for _, store := range stores { + pdctl.MustPutStore(re, leaderServer.GetServer(), store) + } + timestamp := uint64(time.Now().UnixNano()) + load := 1024.0 + for _, store := range stores { + for i := 0; i < 5; i++ { + err := leaderServer.GetServer().GetRaftCluster().HandleStoreHeartbeat(&pdpb.StoreStats{ + StoreId: store.Id, + BytesRead: uint64(load * statistics.StoreHeartBeatReportInterval), + KeysRead: uint64(load * statistics.StoreHeartBeatReportInterval), + BytesWritten: uint64(load * statistics.StoreHeartBeatReportInterval), + KeysWritten: uint64(load * statistics.StoreHeartBeatReportInterval), + Capacity: 1000 * units.MiB, + Available: 1000 * units.MiB, + Interval: &pdpb.TimeInterval{ + StartTimestamp: timestamp + uint64(i*statistics.StoreHeartBeatReportInterval), + EndTimestamp: timestamp + uint64((i+1)*statistics.StoreHeartBeatReportInterval)}, + }) + re.NoError(err) + } + } + defer cluster.Destroy() + + // wait hot scheduler starts + time.Sleep(5000 * time.Millisecond) + { + args := []string{"-u", pdAddr, "hot", "read"} + output, err := pdctl.ExecuteCommand(cmd, args...) + hotRegion := statistics.StoreHotPeersInfos{} + re.NoError(err) + re.NoError(json.Unmarshal(output, &hotRegion)) + re.Equal(hotRegion.AsPeer[1].Count, 0) + re.Equal(0.0, hotRegion.AsPeer[1].TotalBytesRate) + re.Equal(load, hotRegion.AsPeer[1].StoreByteRate) + re.Equal(hotRegion.AsLeader[1].Count, 0) + re.Equal(0.0, hotRegion.AsLeader[1].TotalBytesRate) + re.Equal(load, hotRegion.AsLeader[1].StoreByteRate) + } + { + args := []string{"-u", pdAddr, "hot", "write"} + output, err := pdctl.ExecuteCommand(cmd, args...) + hotRegion := statistics.StoreHotPeersInfos{} + re.NoError(err) + re.NoError(json.Unmarshal(output, &hotRegion)) + re.Equal(hotRegion.AsPeer[1].Count, 0) + re.Equal(0.0, hotRegion.AsPeer[1].TotalBytesRate) + re.Equal(load, hotRegion.AsPeer[1].StoreByteRate) + re.Equal(hotRegion.AsLeader[1].Count, 0) + re.Equal(0.0, hotRegion.AsLeader[1].TotalBytesRate) + re.Equal(0.0, hotRegion.AsLeader[1].StoreByteRate) // write leader sum + } +} From ec8ff1a0f426c340216ebe618367b319e5083aae Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Sat, 22 Oct 2022 19:57:54 +0800 Subject: [PATCH 10/67] scheduler: update hot region scheduler default config for v2 (#5590) ref tikv/pd#4949 Signed-off-by: lhy1024 --- server/schedulers/hot_region_config.go | 6 +-- server/schedulers/hot_region_test.go | 51 ++++++++++++++++--------- tests/pdctl/scheduler/scheduler_test.go | 2 +- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/server/schedulers/hot_region_config.go b/server/schedulers/hot_region_config.go index cb0173d3d1ec..423b5369d0db 100644 --- a/server/schedulers/hot_region_config.go +++ b/server/schedulers/hot_region_config.go @@ -44,7 +44,7 @@ const ( var defaultPrioritiesConfig = prioritiesConfig{ read: []string{statistics.QueryPriority, statistics.BytePriority}, - writeLeader: []string{statistics.KeyPriority, statistics.BytePriority}, + writeLeader: []string{statistics.QueryPriority, statistics.BytePriority}, writePeer: []string{statistics.BytePriority, statistics.KeyPriority}, } @@ -73,7 +73,7 @@ func initHotRegionScheduleConfig() *hotRegionSchedulerConfig { DstToleranceRatio: 1.05, // Tolerate 5% difference StrictPickingStore: true, EnableForTiFlash: true, - RankFormulaVersion: "", // Use default value when it is "". Depends on getRankFormulaVersionLocked. + RankFormulaVersion: "v2", ForbidRWType: "none", } cfg.applyPrioritiesConfig(defaultPrioritiesConfig) @@ -305,7 +305,7 @@ func (conf *hotRegionSchedulerConfig) getRankFormulaVersionLocked() string { switch conf.RankFormulaVersion { case "v2": return "v2" - default: + default: // Use "v1" when it is "" return "v1" } } diff --git a/server/schedulers/hot_region_test.go b/server/schedulers/hot_region_test.go index 338be550a2d5..a4f11ac67d53 100644 --- a/server/schedulers/hot_region_test.go +++ b/server/schedulers/hot_region_test.go @@ -595,6 +595,8 @@ func TestHotWriteRegionScheduleWithKeyRate(t *testing.T) { re.NoError(err) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{statistics.KeyPriority, statistics.BytePriority} + hb.(*hotScheduler).conf.RankFormulaVersion = "v1" tc := mockcluster.NewCluster(ctx, opt) tc.SetHotRegionCacheHitsThreshold(0) @@ -732,6 +734,7 @@ func TestHotWriteRegionScheduleWithLeader(t *testing.T) { statistics.Denoising = false opt := config.NewTestOptions() hb, err := schedule.CreateScheduler(statistics.Write.String(), schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) + hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{statistics.KeyPriority, statistics.BytePriority} re.NoError(err) tc := mockcluster.NewCluster(ctx, opt) @@ -794,6 +797,8 @@ func TestHotWriteRegionScheduleWithPendingInfluence(t *testing.T) { opt := config.NewTestOptions() hb, err := schedule.CreateScheduler(statistics.Write.String(), schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) + hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{statistics.KeyPriority, statistics.BytePriority} + hb.(*hotScheduler).conf.RankFormulaVersion = "v1" old := pendingAmpFactor pendingAmpFactor = 0.0 defer func() { @@ -885,6 +890,8 @@ func TestHotWriteRegionScheduleWithRuleEnabled(t *testing.T) { tc.SetEnablePlacementRules(true) hb, err := schedule.CreateScheduler(statistics.Write.String(), schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) + hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{statistics.KeyPriority, statistics.BytePriority} + tc.SetHotRegionCacheHitsThreshold(0) key, err := hex.DecodeString("") re.NoError(err) @@ -1088,6 +1095,7 @@ func TestHotReadRegionScheduleWithQuery(t *testing.T) { re.NoError(err) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) + hb.(*hotScheduler).conf.RankFormulaVersion = "v1" tc := mockcluster.NewCluster(ctx, opt) tc.SetHotRegionCacheHitsThreshold(0) @@ -1120,6 +1128,7 @@ func TestHotReadRegionScheduleWithKeyRate(t *testing.T) { opt := config.NewTestOptions() hb, err := schedule.CreateScheduler(statistics.Read.String(), schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) + hb.(*hotScheduler).conf.RankFormulaVersion = "v1" hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) hb.(*hotScheduler).conf.SetDstToleranceRatio(1) hb.(*hotScheduler).conf.ReadPriorities = []string{statistics.BytePriority, statistics.KeyPriority} @@ -1177,6 +1186,7 @@ func TestHotReadRegionScheduleWithPendingInfluence(t *testing.T) { hb, err := schedule.CreateScheduler(statistics.Read.String(), schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) // For test + hb.(*hotScheduler).conf.RankFormulaVersion = "v1" hb.(*hotScheduler).conf.GreatDecRatio = 0.99 hb.(*hotScheduler).conf.MinorDecRatio = 1 hb.(*hotScheduler).conf.DstToleranceRatio = 1 @@ -1936,6 +1946,8 @@ func TestHotScheduleWithPriority(t *testing.T) { testutil.CheckTransferLeader(re, ops[0], operator.OpHotRegion, 1, 3) hb, err = schedule.CreateScheduler(statistics.Write.String(), schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) + hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{statistics.KeyPriority, statistics.BytePriority} + hb.(*hotScheduler).conf.RankFormulaVersion = "v1" re.NoError(err) // assert loose store picking @@ -1980,6 +1992,7 @@ func TestHotScheduleWithStddev(t *testing.T) { re.NoError(err) hb.(*hotScheduler).conf.SetDstToleranceRatio(1.0) hb.(*hotScheduler).conf.SetSrcToleranceRatio(1.0) + hb.(*hotScheduler).conf.RankFormulaVersion = "v1" tc := mockcluster.NewCluster(ctx, opt) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.SetHotRegionCacheHitsThreshold(0) @@ -2084,7 +2097,7 @@ func TestCompatibility(t *testing.T) { // default checkPriority(re, hb.(*hotScheduler), tc, [3][2]int{ {statistics.QueryDim, statistics.ByteDim}, - {statistics.KeyDim, statistics.ByteDim}, + {statistics.QueryDim, statistics.ByteDim}, {statistics.ByteDim, statistics.KeyDim}, }) // config error value @@ -2093,7 +2106,7 @@ func TestCompatibility(t *testing.T) { hb.(*hotScheduler).conf.WritePeerPriorities = []string{statistics.QueryPriority, statistics.BytePriority, statistics.KeyPriority} checkPriority(re, hb.(*hotScheduler), tc, [3][2]int{ {statistics.QueryDim, statistics.ByteDim}, - {statistics.KeyDim, statistics.ByteDim}, + {statistics.QueryDim, statistics.ByteDim}, {statistics.ByteDim, statistics.KeyDim}, }) // low version @@ -2135,7 +2148,7 @@ func TestCompatibility(t *testing.T) { re.False(hb.(*hotScheduler).conf.lastQuerySupported) // it will updated after scheduling checkPriority(re, hb.(*hotScheduler), tc, [3][2]int{ {statistics.QueryDim, statistics.ByteDim}, - {statistics.KeyDim, statistics.ByteDim}, + {statistics.QueryDim, statistics.ByteDim}, {statistics.ByteDim, statistics.KeyDim}, }) re.True(hb.(*hotScheduler).conf.lastQuerySupported) @@ -2148,12 +2161,12 @@ func TestCompatibilityConfig(t *testing.T) { opt := config.NewTestOptions() tc := mockcluster.NewCluster(ctx, opt) - // From new or 3.x cluster + // From new or 3.x cluster, it will use new config hb, err := schedule.CreateScheduler(HotRegionType, schedule.NewOperatorController(ctx, tc, nil), storage.NewStorageWithMemoryBackend(), schedule.ConfigSliceDecoder("hot-region", nil)) re.NoError(err) checkPriority(re, hb.(*hotScheduler), tc, [3][2]int{ {statistics.QueryDim, statistics.ByteDim}, - {statistics.KeyDim, statistics.ByteDim}, + {statistics.QueryDim, statistics.ByteDim}, {statistics.ByteDim, statistics.KeyDim}, }) @@ -2163,7 +2176,7 @@ func TestCompatibilityConfig(t *testing.T) { re.NoError(err) checkPriority(re, hb.(*hotScheduler), tc, [3][2]int{ {statistics.QueryDim, statistics.ByteDim}, - {statistics.KeyDim, statistics.ByteDim}, + {statistics.QueryDim, statistics.ByteDim}, {statistics.ByteDim, statistics.KeyDim}, }) @@ -2211,6 +2224,18 @@ func TestCompatibilityConfig(t *testing.T) { }) } +func checkPriority(re *require.Assertions, hb *hotScheduler, tc *mockcluster.Cluster, dims [3][2]int) { + readSolver := newBalanceSolver(hb, tc, statistics.Read, transferLeader) + writeLeaderSolver := newBalanceSolver(hb, tc, statistics.Write, transferLeader) + writePeerSolver := newBalanceSolver(hb, tc, statistics.Write, movePeer) + re.Equal(dims[0][0], readSolver.firstPriority) + re.Equal(dims[0][1], readSolver.secondPriority) + re.Equal(dims[1][0], writeLeaderSolver.firstPriority) + re.Equal(dims[1][1], writeLeaderSolver.secondPriority) + re.Equal(dims[2][0], writePeerSolver.firstPriority) + re.Equal(dims[2][1], writePeerSolver.secondPriority) +} + func TestConfigValidation(t *testing.T) { re := require.New(t) @@ -2246,7 +2271,7 @@ func TestConfigValidation(t *testing.T) { // rank-formula-version // default hc = initHotRegionScheduleConfig() - re.Equal("v1", hc.GetRankFormulaVersion()) + re.Equal("v2", hc.GetRankFormulaVersion()) // v1 hc.RankFormulaVersion = "v1" err = hc.valid() @@ -2285,18 +2310,6 @@ func TestConfigValidation(t *testing.T) { re.Error(err) } -func checkPriority(re *require.Assertions, hb *hotScheduler, tc *mockcluster.Cluster, dims [3][2]int) { - readSolver := newBalanceSolver(hb, tc, statistics.Read, transferLeader) - writeLeaderSolver := newBalanceSolver(hb, tc, statistics.Write, transferLeader) - writePeerSolver := newBalanceSolver(hb, tc, statistics.Write, movePeer) - re.Equal(dims[0][0], readSolver.firstPriority) - re.Equal(dims[0][1], readSolver.secondPriority) - re.Equal(dims[1][0], writeLeaderSolver.firstPriority) - re.Equal(dims[1][1], writeLeaderSolver.secondPriority) - re.Equal(dims[2][0], writePeerSolver.firstPriority) - re.Equal(dims[2][1], writePeerSolver.secondPriority) -} - type maxZombieDurTestCase struct { typ resourceType isTiFlash bool diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index 94ab538c02f3..81f98a6e8da4 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -318,7 +318,7 @@ func TestScheduler(t *testing.T) { "write-peer-priorities": []interface{}{"byte", "key"}, "strict-picking-store": "true", "enable-for-tiflash": "true", - "rank-formula-version": "v1", + "rank-formula-version": "v2", } var conf map[string]interface{} mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) From e53caec3f309efef60b8fcd6c4a8b55a2ee17bfd Mon Sep 17 00:00:00 2001 From: matchge <74505524+matchge-ca@users.noreply.github.com> Date: Tue, 25 Oct 2022 00:03:55 -0400 Subject: [PATCH 11/67] Add jq for region check command (#5624) close tikv/pd#5585 - parse response string with jq query Signed-off-by: Hua Lu Co-authored-by: Hua Lu --- tools/pd-ctl/pdctl/command/region_command.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/pd-ctl/pdctl/command/region_command.go b/tools/pd-ctl/pdctl/command/region_command.go index 2489514fd3de..9e07abff2fc7 100644 --- a/tools/pd-ctl/pdctl/command/region_command.go +++ b/tools/pd-ctl/pdctl/command/region_command.go @@ -51,7 +51,7 @@ var ( // NewRegionCommand returns a region subcommand of rootCmd func NewRegionCommand() *cobra.Command { r := &cobra.Command{ - Use: `region [-jq=""]`, + Use: `region [--jq=""]`, Short: "show the region status", Run: showRegionCommandFunc, } @@ -354,10 +354,12 @@ func showRegionsByKeysCommandFunc(cmd *cobra.Command, args []string) { // NewRegionWithCheckCommand returns a region with check subcommand of regionCmd func NewRegionWithCheckCommand() *cobra.Command { r := &cobra.Command{ - Use: "check [miss-peer|extra-peer|down-peer|learner-peer|pending-peer|offline-peer|empty-region|oversized-region|undersized-region|hist-size|hist-keys]", + Use: `check [miss-peer|extra-peer|down-peer|learner-peer|pending-peer|offline-peer|empty-region|oversized-region|undersized-region|hist-size|hist-keys] [--jq=""]`, Short: "show the region with check specific status", Run: showRegionWithCheckCommandFunc, } + + r.Flags().String("jq", "", "jq query") return r } @@ -394,6 +396,11 @@ func showRegionWithCheckCommandFunc(cmd *cobra.Command, args []string) { cmd.Printf("Failed to get region: %s\n", err) return } + if flag := cmd.Flag("jq"); flag != nil && flag.Value.String() != "" { + printWithJQFilter(r, flag.Value.String()) + return + } + cmd.Println(r) } From f4fa6f409e3dc879ae441014b89dc49739ed1670 Mon Sep 17 00:00:00 2001 From: matchge <74505524+matchge-ca@users.noreply.github.com> Date: Tue, 25 Oct 2022 00:17:56 -0400 Subject: [PATCH 12/67] Redefine the total label query for balance leader and region schedulers (#5556) close tikv/pd#5393 * Remove the total label metric counters for leader and region * Add new query in PD's grafana balance leader scheduler and balance region scheduler panels for the sum of all labels and name it as total Signed-off-by: Hua Lu Co-authored-by: Hua Lu Co-authored-by: Ti Chi Robot --- metrics/grafana/pd.json | 18 ++++++++++++++++++ server/schedulers/balance_leader.go | 1 - server/schedulers/balance_region.go | 1 - 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 39a7a6ab1a92..978102eaba88 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -6880,6 +6880,15 @@ "metric": "pd_scheduler_event_count", "refId": "A", "step": 4 + }, + { + "expr": "sum(rate(pd_scheduler_event_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\", type=\"balance-leader-scheduler\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "total", + "metric": "pd_scheduler_event_count", + "refId": "B", + "step": 4 } ], "thresholds": [], @@ -6977,6 +6986,15 @@ "metric": "pd_scheduler_event_count", "refId": "A", "step": 4 + }, + { + "expr": "sum(rate(pd_scheduler_event_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\", type=\"balance-region-scheduler\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "total", + "metric": "pd_scheduler_event_count", + "refId": "B", + "step": 4 } ], "thresholds": [], diff --git a/server/schedulers/balance_leader.go b/server/schedulers/balance_leader.go index 4cb542073ee2..c1632d48abe2 100644 --- a/server/schedulers/balance_leader.go +++ b/server/schedulers/balance_leader.go @@ -413,7 +413,6 @@ func createTransferLeaderOperator(cs *candidateStores, dir string, l *balanceLea } var op *operator.Operator for i := 0; i < retryLimit; i++ { - schedulerCounter.WithLabelValues(l.GetName(), "total").Inc() if op = creator(ssolver, collector); op != nil { if _, ok := usedRegions[op.RegionID()]; !ok { break diff --git a/server/schedulers/balance_region.go b/server/schedulers/balance_region.go index fc211ffe3f0b..8f291f09e84c 100644 --- a/server/schedulers/balance_region.go +++ b/server/schedulers/balance_region.go @@ -182,7 +182,6 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) break } for i := 0; i < retryLimit; i++ { - schedulerCounter.WithLabelValues(s.GetName(), "total").Inc() // Priority pick the region that has a pending peer. // Pending region may means the disk is overload, remove the pending region firstly. solver.region = filter.SelectOneRegion(cluster.RandPendingRegions(solver.SourceStoreID(), s.conf.Ranges), collector, From 2b519327bff0791237c35ecb1d6c4f35cca52537 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 25 Oct 2022 13:17:56 +0800 Subject: [PATCH 13/67] *: use independent lock (#5587) ref tikv/pd#5586 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- server/cluster/cluster.go | 30 +-- server/cluster/cluster_test.go | 48 ++-- server/core/basic_cluster.go | 452 ++++++++++++++++++--------------- server/core/region.go | 18 +- server/core/region_tree.go | 2 +- 5 files changed, 294 insertions(+), 256 deletions(-) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index a29b35fac9b7..984c37812c1c 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -806,17 +806,15 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { }) var overlaps []*core.RegionInfo - c.Lock() if saveCache { // To prevent a concurrent heartbeat of another region from overriding the up-to-date region info by a stale one, // check its validation again here. // // However it can't solve the race condition of concurrent heartbeats from the same region. - if _, err := c.core.PreCheckPutRegion(region); err != nil { - c.Unlock() + if overlaps, err = c.core.AtomicCheckAndPutRegion(region); err != nil { return err } - overlaps = c.core.PutRegion(region) + for _, item := range overlaps { if c.regionStats != nil { c.regionStats.ClearDefunctRegion(item.GetID()) @@ -835,21 +833,19 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { } } for key := range storeMap { - c.updateStoreStatusLocked(key) + c.core.UpdateStoreStatus(key) } - regionEventCounter.WithLabelValues("update_cache").Inc() - } - if !c.IsPrepared() && isNew { - c.coordinator.prepareChecker.collect(region) + regionEventCounter.WithLabelValues("update_cache").Inc() } if c.regionStats != nil { c.regionStats.Observe(region, c.getRegionStoresLocked(region)) } - changedRegions := c.changedRegions - c.Unlock() + if !c.IsPrepared() && isNew { + c.coordinator.prepareChecker.collect(region) + } if c.storage != nil { // If there are concurrent heartbeats from the same region, the last write will win even if @@ -877,7 +873,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { if saveKV || needSync { select { - case changedRegions <- region: + case c.changedRegions <- region: default: } } @@ -885,16 +881,6 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { return nil } -func (c *RaftCluster) updateStoreStatusLocked(id uint64) { - leaderCount := c.core.GetStoreLeaderCount(id) - regionCount := c.core.GetStoreRegionCount(id) - witnessCount := c.core.GetStoreWitnessCount(id) - pendingPeerCount := c.core.GetStorePendingPeerCount(id) - leaderRegionSize := c.core.GetStoreLeaderRegionSize(id) - regionSize := c.core.GetStoreRegionSize(id) - c.core.UpdateStoreStatus(id, leaderCount, regionCount, pendingPeerCount, leaderRegionSize, regionSize, witnessCount) -} - func (c *RaftCluster) putMetaLocked(meta *metapb.Cluster) error { if c.storage != nil { if err := c.storage.SaveMeta(meta); err != nil { diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 50fa24e1c899..dabbe5e2f913 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -61,7 +61,7 @@ func TestStoreHeartbeat(t *testing.T) { for _, region := range regions { re.NoError(cluster.putRegion(region)) } - re.Equal(int(n), cluster.core.Regions.GetRegionCount()) + re.Equal(int(n), cluster.core.Regions.RegionsInfo.GetRegionCount()) for i, store := range stores { storeStats := &pdpb.StoreStats{ @@ -700,25 +700,25 @@ func TestRegionHeartbeat(t *testing.T) { for i, region := range regions { // region does not exist. re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // region is the same, not updated. re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) origin := region // region is updated. region = origin.Clone(core.WithIncVersion()) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // region is stale (Version). stale := origin.Clone(core.WithIncConfVer()) re.Error(cluster.processRegionHeartbeat(stale)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // region is updated @@ -728,13 +728,13 @@ func TestRegionHeartbeat(t *testing.T) { ) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // region is stale (ConfVer). stale = origin.Clone(core.WithIncConfVer()) re.Error(cluster.processRegionHeartbeat(stale)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // Add a down peer. @@ -746,38 +746,38 @@ func TestRegionHeartbeat(t *testing.T) { })) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Add a pending peer. region = region.Clone(core.WithPendingPeers([]*metapb.Peer{region.GetPeers()[rand.Intn(len(region.GetPeers()))]})) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Clear down peers. region = region.Clone(core.WithDownPeers(nil)) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Clear pending peers. region = region.Clone(core.WithPendingPeers(nil)) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Remove peers. origin = region region = origin.Clone(core.SetPeers(region.GetPeers()[:1])) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // Add peers. region = origin regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) checkRegionsKV(re, cluster.storage, regions[:i+1]) // Change one peer to witness @@ -787,37 +787,37 @@ func TestRegionHeartbeat(t *testing.T) { ) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Change leader. region = region.Clone(core.WithLeader(region.GetPeers()[1])) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Change ApproximateSize. region = region.Clone(core.SetApproximateSize(144)) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Change ApproximateKeys. region = region.Clone(core.SetApproximateKeys(144000)) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Change bytes written. region = region.Clone(core.SetWrittenBytes(24000)) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) // Change bytes read. region = region.Clone(core.SetReadBytes(1080000)) regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) - checkRegions(re, cluster.core.Regions, regions[:i+1]) + checkRegions(re, cluster.core.Regions.RegionsInfo, regions[:i+1]) } regionCounts := make(map[uint64]int) @@ -848,10 +848,10 @@ func TestRegionHeartbeat(t *testing.T) { } for _, store := range cluster.core.Stores.GetStores() { - re.Equal(cluster.core.Regions.GetStoreLeaderCount(store.GetID()), store.GetLeaderCount()) - re.Equal(cluster.core.Regions.GetStoreRegionCount(store.GetID()), store.GetRegionCount()) - re.Equal(cluster.core.Regions.GetStoreLeaderRegionSize(store.GetID()), store.GetLeaderSize()) - re.Equal(cluster.core.Regions.GetStoreRegionSize(store.GetID()), store.GetRegionSize()) + re.Equal(cluster.core.Regions.RegionsInfo.GetStoreLeaderCount(store.GetID()), store.GetLeaderCount()) + re.Equal(cluster.core.Regions.RegionsInfo.GetStoreRegionCount(store.GetID()), store.GetRegionCount()) + re.Equal(cluster.core.Regions.RegionsInfo.GetStoreLeaderRegionSize(store.GetID()), store.GetLeaderSize()) + re.Equal(cluster.core.Regions.RegionsInfo.GetStoreRegionSize(store.GetID()), store.GetRegionSize()) } // Test with storage. @@ -1648,7 +1648,7 @@ func Test(t *testing.T) { _, opts, err := newTestScheduleConfig() re.NoError(err) tc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opts, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) - cache := tc.core.Regions + cache := tc.core.Regions.RegionsInfo for i := uint64(0); i < n; i++ { region := regions[i] diff --git a/server/core/basic_cluster.go b/server/core/basic_cluster.go index 5f4ad579fa4d..997434b649f6 100644 --- a/server/core/basic_cluster.go +++ b/server/core/basic_cluster.go @@ -27,72 +27,59 @@ import ( // BasicCluster provides basic data member and interface for a tikv cluster. type BasicCluster struct { - syncutil.RWMutex - Stores *StoresInfo - Regions *RegionsInfo + Stores struct { + mu syncutil.RWMutex + *StoresInfo + } + + Regions struct { + mu syncutil.RWMutex + *RegionsInfo + } } // NewBasicCluster creates a BasicCluster. func NewBasicCluster() *BasicCluster { return &BasicCluster{ - Stores: NewStoresInfo(), - Regions: NewRegionsInfo(), + Stores: struct { + mu syncutil.RWMutex + *StoresInfo + }{StoresInfo: NewStoresInfo()}, + + Regions: struct { + mu syncutil.RWMutex + *RegionsInfo + }{RegionsInfo: NewRegionsInfo()}, } } +/* Stores read operations */ + // GetStores returns all Stores in the cluster. func (bc *BasicCluster) GetStores() []*StoreInfo { - bc.RLock() - defer bc.RUnlock() + bc.Stores.mu.RLock() + defer bc.Stores.mu.RUnlock() return bc.Stores.GetStores() } // GetMetaStores gets a complete set of metapb.Store. func (bc *BasicCluster) GetMetaStores() []*metapb.Store { - bc.RLock() - defer bc.RUnlock() + bc.Stores.mu.RLock() + defer bc.Stores.mu.RUnlock() return bc.Stores.GetMetaStores() } // GetStore searches for a store by ID. func (bc *BasicCluster) GetStore(storeID uint64) *StoreInfo { - bc.RLock() - defer bc.RUnlock() + bc.Stores.mu.RLock() + defer bc.Stores.mu.RUnlock() return bc.Stores.GetStore(storeID) } -// GetRegion searches for a region by ID. -func (bc *BasicCluster) GetRegion(regionID uint64) *RegionInfo { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetRegion(regionID) -} - -// GetRegions gets all RegionInfo from regionMap. -func (bc *BasicCluster) GetRegions() []*RegionInfo { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetRegions() -} - -// GetMetaRegions gets a set of metapb.Region from regionMap. -func (bc *BasicCluster) GetMetaRegions() []*metapb.Region { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetMetaRegions() -} - -// GetStoreRegions gets all RegionInfo with a given storeID. -func (bc *BasicCluster) GetStoreRegions(storeID uint64) []*RegionInfo { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetStoreRegions(storeID) -} - // GetRegionStores returns all Stores that contains the region's peer. func (bc *BasicCluster) GetRegionStores(region *RegionInfo) []*StoreInfo { - bc.RLock() - defer bc.RUnlock() + bc.Stores.mu.RLock() + defer bc.Stores.mu.RUnlock() var Stores []*StoreInfo for id := range region.GetStoreIDs() { if store := bc.Stores.GetStore(id); store != nil { @@ -104,8 +91,8 @@ func (bc *BasicCluster) GetRegionStores(region *RegionInfo) []*StoreInfo { // GetFollowerStores returns all Stores that contains the region's follower peer. func (bc *BasicCluster) GetFollowerStores(region *RegionInfo) []*StoreInfo { - bc.RLock() - defer bc.RUnlock() + bc.Stores.mu.RLock() + defer bc.Stores.mu.RUnlock() var Stores []*StoreInfo for id := range region.GetFollowers() { if store := bc.Stores.GetStore(id); store != nil { @@ -115,196 +102,300 @@ func (bc *BasicCluster) GetFollowerStores(region *RegionInfo) []*StoreInfo { return Stores } -// GetLeaderStoreByRegionID returns the leader store of the given region. -func (bc *BasicCluster) GetLeaderStoreByRegionID(regionID uint64) *StoreInfo { - bc.RLock() - defer bc.RUnlock() - region := bc.Regions.GetRegion(regionID) - if region == nil || region.GetLeader() == nil { - return nil - } - return bc.Stores.GetStore(region.GetLeader().GetStoreId()) -} - // GetLeaderStore returns all Stores that contains the region's leader peer. func (bc *BasicCluster) GetLeaderStore(region *RegionInfo) *StoreInfo { - bc.RLock() - defer bc.RUnlock() + bc.Stores.mu.RLock() + defer bc.Stores.mu.RUnlock() return bc.Stores.GetStore(region.GetLeader().GetStoreId()) } -// GetAdjacentRegions returns region's info that is adjacent with specific region. -func (bc *BasicCluster) GetAdjacentRegions(region *RegionInfo) (*RegionInfo, *RegionInfo) { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetAdjacentRegions(region) +// GetStoreCount returns the total count of storeInfo. +func (bc *BasicCluster) GetStoreCount() int { + bc.Stores.mu.RLock() + defer bc.Stores.mu.RUnlock() + return bc.Stores.GetStoreCount() } -// GetRangeHoles returns all range holes, i.e the key ranges without any region info. -func (bc *BasicCluster) GetRangeHoles() [][]string { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetRangeHoles() -} +/* Stores Write operations */ // PauseLeaderTransfer prevents the store from been selected as source or // target store of TransferLeader. func (bc *BasicCluster) PauseLeaderTransfer(storeID uint64) error { - bc.Lock() - defer bc.Unlock() + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() return bc.Stores.PauseLeaderTransfer(storeID) } // ResumeLeaderTransfer cleans a store's pause state. The store can be selected // as source or target of TransferLeader again. func (bc *BasicCluster) ResumeLeaderTransfer(storeID uint64) { - bc.Lock() - defer bc.Unlock() + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() bc.Stores.ResumeLeaderTransfer(storeID) } // SlowStoreEvicted marks a store as a slow store and prevents transferring // leader to the store func (bc *BasicCluster) SlowStoreEvicted(storeID uint64) error { - bc.Lock() - defer bc.Unlock() + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() return bc.Stores.SlowStoreEvicted(storeID) } // SlowStoreRecovered cleans the evicted state of a store. func (bc *BasicCluster) SlowStoreRecovered(storeID uint64) { - bc.Lock() - defer bc.Unlock() + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() bc.Stores.SlowStoreRecovered(storeID) } // ResetStoreLimit resets the limit for a specific store. func (bc *BasicCluster) ResetStoreLimit(storeID uint64, limitType storelimit.Type, ratePerSec ...float64) { - bc.Lock() - defer bc.Unlock() + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() bc.Stores.ResetStoreLimit(storeID, limitType, ratePerSec...) } // UpdateStoreStatus updates the information of the store. -func (bc *BasicCluster) UpdateStoreStatus(storeID uint64, leaderCount int, regionCount int, pendingPeerCount int, leaderSize int64, regionSize int64, witnessCount int) { - bc.Lock() - defer bc.Unlock() - bc.Stores.UpdateStoreStatus(storeID, leaderCount, regionCount, pendingPeerCount, leaderSize, regionSize, witnessCount) +func (bc *BasicCluster) UpdateStoreStatus(storeID uint64) { + bc.Regions.mu.RLock() + leaderCount := bc.Regions.GetStoreLeaderCount(storeID) + regionCount := bc.Regions.GetStoreRegionCount(storeID) + witnessCount := bc.Regions.GetStoreWitnessCount(storeID) + pendingPeerCount := bc.Regions.GetStorePendingPeerCount(storeID) + leaderRegionSize := bc.Regions.GetStoreLeaderRegionSize(storeID) + regionSize := bc.Regions.GetStoreRegionSize(storeID) + bc.Regions.mu.RUnlock() + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() + bc.Stores.UpdateStoreStatus(storeID, leaderCount, regionCount, pendingPeerCount, leaderRegionSize, regionSize, witnessCount) +} + +// PutStore put a store. +func (bc *BasicCluster) PutStore(store *StoreInfo) { + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() + bc.Stores.SetStore(store) +} + +// ResetStores resets the store cache. +func (bc *BasicCluster) ResetStores() { + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() + bc.Stores.StoresInfo = NewStoresInfo() +} + +// DeleteStore deletes a store. +func (bc *BasicCluster) DeleteStore(store *StoreInfo) { + bc.Stores.mu.Lock() + defer bc.Stores.mu.Unlock() + bc.Stores.DeleteStore(store) +} + +/* Regions read operations */ + +// GetRegion searches for a region by ID. +func (bc *BasicCluster) GetRegion(regionID uint64) *RegionInfo { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetRegion(regionID) +} + +// GetRegions gets all RegionInfo from regionMap. +func (bc *BasicCluster) GetRegions() []*RegionInfo { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetRegions() +} + +// GetMetaRegions gets a set of metapb.Region from regionMap. +func (bc *BasicCluster) GetMetaRegions() []*metapb.Region { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetMetaRegions() +} + +// GetStoreRegions gets all RegionInfo with a given storeID. +func (bc *BasicCluster) GetStoreRegions(storeID uint64) []*RegionInfo { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetStoreRegions(storeID) +} + +// GetLeaderStoreByRegionID returns the leader store of the given region. +func (bc *BasicCluster) GetLeaderStoreByRegionID(regionID uint64) *StoreInfo { + bc.Regions.mu.RLock() + region := bc.Regions.GetRegion(regionID) + if region == nil || region.GetLeader() == nil { + bc.Regions.mu.RUnlock() + return nil + } + bc.Regions.mu.RUnlock() + + bc.Stores.mu.RLock() + defer bc.Stores.mu.RUnlock() + return bc.Stores.GetStore(region.GetLeader().GetStoreId()) +} + +// GetAdjacentRegions returns region's info that is adjacent with specific region. +func (bc *BasicCluster) GetAdjacentRegions(region *RegionInfo) (*RegionInfo, *RegionInfo) { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetAdjacentRegions(region) +} + +// GetRangeHoles returns all range holes, i.e the key ranges without any region info. +func (bc *BasicCluster) GetRangeHoles() [][]string { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetRangeHoles() } const randomRegionMaxRetry = 10 // RandFollowerRegions returns a random region that has a follower on the store. func (bc *BasicCluster) RandFollowerRegions(storeID uint64, ranges []KeyRange) []*RegionInfo { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.RandFollowerRegions(storeID, ranges, randomRegionMaxRetry) } // RandLeaderRegions returns a random region that has leader on the store. func (bc *BasicCluster) RandLeaderRegions(storeID uint64, ranges []KeyRange) []*RegionInfo { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.RandLeaderRegions(storeID, ranges, randomRegionMaxRetry) } // RandPendingRegions returns a random region that has a pending peer on the store. func (bc *BasicCluster) RandPendingRegions(storeID uint64, ranges []KeyRange) []*RegionInfo { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.RandPendingRegions(storeID, ranges, randomRegionMaxRetry) } // RandLearnerRegions returns a random region that has a learner peer on the store. func (bc *BasicCluster) RandLearnerRegions(storeID uint64, ranges []KeyRange) []*RegionInfo { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.RandLearnerRegions(storeID, ranges, randomRegionMaxRetry) } // GetRegionCount gets the total count of RegionInfo of regionMap. func (bc *BasicCluster) GetRegionCount() int { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetRegionCount() } -// GetStoreCount returns the total count of storeInfo. -func (bc *BasicCluster) GetStoreCount() int { - bc.RLock() - defer bc.RUnlock() - return bc.Stores.GetStoreCount() -} - // GetStoreRegionCount gets the total count of a store's leader and follower RegionInfo by storeID. func (bc *BasicCluster) GetStoreRegionCount(storeID uint64) int { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetStoreLeaderCount(storeID) + bc.Regions.GetStoreFollowerCount(storeID) + bc.Regions.GetStoreLearnerCount(storeID) } // GetStoreLeaderCount get the total count of a store's leader RegionInfo. func (bc *BasicCluster) GetStoreLeaderCount(storeID uint64) int { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetStoreLeaderCount(storeID) } // GetStoreFollowerCount get the total count of a store's follower RegionInfo. func (bc *BasicCluster) GetStoreFollowerCount(storeID uint64) int { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetStoreFollowerCount(storeID) } // GetStorePendingPeerCount gets the total count of a store's region that includes pending peer. func (bc *BasicCluster) GetStorePendingPeerCount(storeID uint64) int { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetStorePendingPeerCount(storeID) } // GetStoreWitnessCount gets the total count of a store's witness RegionInfo. func (bc *BasicCluster) GetStoreWitnessCount(storeID uint64) int { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetStoreWitnessCount(storeID) } // GetStoreLeaderRegionSize get total size of store's leader regions. func (bc *BasicCluster) GetStoreLeaderRegionSize(storeID uint64) int64 { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetStoreLeaderRegionSize(storeID) } // GetStoreRegionSize get total size of store's regions. func (bc *BasicCluster) GetStoreRegionSize(storeID uint64) int64 { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetStoreRegionSize(storeID) } // GetAverageRegionSize returns the average region approximate size. func (bc *BasicCluster) GetAverageRegionSize() int64 { - bc.RLock() - defer bc.RUnlock() + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() return bc.Regions.GetAverageRegionSize() } +// GetRegionByKey searches RegionInfo from regionTree. +func (bc *BasicCluster) GetRegionByKey(regionKey []byte) *RegionInfo { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetRegionByKey(regionKey) +} + +// GetPrevRegionByKey searches previous RegionInfo from regionTree. +func (bc *BasicCluster) GetPrevRegionByKey(regionKey []byte) *RegionInfo { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetPrevRegionByKey(regionKey) +} + +// ScanRange scans regions intersecting [start key, end key), returns at most +// `limit` regions. limit <= 0 means no limit. +func (bc *BasicCluster) ScanRange(startKey, endKey []byte, limit int) []*RegionInfo { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.ScanRange(startKey, endKey, limit) +} + +// GetOverlaps returns the regions which are overlapped with the specified region range. +func (bc *BasicCluster) GetOverlaps(region *RegionInfo) []*RegionInfo { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetOverlaps(region) +} + +// GetRegionSizeByRange scans regions intersecting [start key, end key), returns the total region size of this range. +func (bc *BasicCluster) GetRegionSizeByRange(startKey, endKey []byte) int64 { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetRegionSizeByRange(startKey, endKey) +} + func (bc *BasicCluster) getWriteRate( f func(storeID uint64) (bytesRate, keysRate float64), ) (storeIDs []uint64, bytesRates, keysRates []float64) { - bc.RLock() - defer bc.RUnlock() + bc.Stores.mu.RLock() count := len(bc.Stores.stores) storeIDs = make([]uint64, 0, count) + for _, store := range bc.Stores.stores { + storeIDs = append(storeIDs, store.GetID()) + } + bc.Stores.mu.RUnlock() bytesRates = make([]float64, 0, count) keysRates = make([]float64, 0, count) - for _, store := range bc.Stores.stores { - id := store.GetID() + for _, id := range storeIDs { + bc.Regions.mu.RLock() bytesRate, keysRate := f(id) - storeIDs = append(storeIDs, id) + bc.Regions.mu.RUnlock() bytesRates = append(bytesRates, bytesRate) keysRates = append(keysRates, keysRate) } @@ -321,30 +412,7 @@ func (bc *BasicCluster) GetStoresWriteRate() (storeIDs []uint64, bytesRates, key return bc.getWriteRate(bc.Regions.GetStoreWriteRate) } -// PutStore put a store. -func (bc *BasicCluster) PutStore(store *StoreInfo) { - bc.Lock() - defer bc.Unlock() - bc.Stores.SetStore(store) -} - -// ResetStores resets the store cache. -func (bc *BasicCluster) ResetStores() { - bc.Lock() - defer bc.Unlock() - bc.Stores = NewStoresInfo() -} - -// DeleteStore deletes a store. -func (bc *BasicCluster) DeleteStore(store *StoreInfo) { - bc.Lock() - defer bc.Unlock() - bc.Stores.DeleteStore(store) -} - -func (bc *BasicCluster) getRelevantRegions(region *RegionInfo) (origin *RegionInfo, overlaps []*RegionInfo) { - bc.RLock() - defer bc.RUnlock() +func (bc *BasicCluster) getRelevantRegionsLocked(region *RegionInfo) (origin *RegionInfo, overlaps []*RegionInfo) { origin = bc.Regions.GetRegion(region.GetID()) if origin == nil || !bytes.Equal(origin.GetStartKey(), region.GetStartKey()) || !bytes.Equal(origin.GetEndKey(), region.GetEndKey()) { overlaps = bc.Regions.GetOverlaps(region) @@ -352,22 +420,20 @@ func (bc *BasicCluster) getRelevantRegions(region *RegionInfo) (origin *RegionIn return } -func isRegionRecreated(region *RegionInfo) bool { - // Regions recreated by online unsafe recover have both ver and conf ver equal to 1. To - // prevent stale bootstrap region (first region in a cluster which covers the entire key - // range) from reporting stale info, we exclude regions that covers the entire key range - // here. Technically, it is possible for unsafe recover to recreate such region, but that - // means the entire key range is unavailable, and we don't expect unsafe recover to perform - // better than recreating the cluster. - return region.GetRegionEpoch().GetVersion() == 1 && region.GetRegionEpoch().GetConfVer() == 1 && (len(region.GetStartKey()) != 0 || len(region.GetEndKey()) != 0) -} +/* Regions write operations */ // PreCheckPutRegion checks if the region is valid to put. func (bc *BasicCluster) PreCheckPutRegion(region *RegionInfo) (*RegionInfo, error) { - origin, overlaps := bc.getRelevantRegions(region) + bc.Regions.mu.RLock() + origin, overlaps := bc.getRelevantRegionsLocked(region) + bc.Regions.mu.RUnlock() + return bc.check(region, origin, overlaps) +} + +func (bc *BasicCluster) check(region, origin *RegionInfo, overlaps []*RegionInfo) (*RegionInfo, error) { for _, item := range overlaps { // PD ignores stale regions' heartbeats, unless it is recreated recently by unsafe recover operation. - if region.GetRegionEpoch().GetVersion() < item.GetRegionEpoch().GetVersion() && !isRegionRecreated(region) { + if region.GetRegionEpoch().GetVersion() < item.GetRegionEpoch().GetVersion() && !region.isRegionRecreated() { return nil, errRegionIsStale(region.GetMeta(), item.GetMeta()) } } @@ -380,27 +446,13 @@ func (bc *BasicCluster) PreCheckPutRegion(region *RegionInfo) (*RegionInfo, erro // TiKV reports term after v3.0 isTermBehind := region.GetTerm() > 0 && region.GetTerm() < origin.GetTerm() // Region meta is stale, return an error. - if (isTermBehind || r.GetVersion() < o.GetVersion() || r.GetConfVer() < o.GetConfVer()) && !isRegionRecreated(region) { + if (isTermBehind || r.GetVersion() < o.GetVersion() || r.GetConfVer() < o.GetConfVer()) && !region.isRegionRecreated() { return origin, errRegionIsStale(region.GetMeta(), origin.GetMeta()) } return origin, nil } -// PutRegion put a region. -func (bc *BasicCluster) PutRegion(region *RegionInfo) []*RegionInfo { - bc.Lock() - defer bc.Unlock() - return bc.Regions.SetRegion(region) -} - -// GetRegionSizeByRange scans regions intersecting [start key, end key), returns the total region size of this range. -func (bc *BasicCluster) GetRegionSizeByRange(startKey, endKey []byte) int64 { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetRegionSizeByRange(startKey, endKey) -} - // CheckAndPutRegion checks if the region is valid to put, if valid then put. func (bc *BasicCluster) CheckAndPutRegion(region *RegionInfo) []*RegionInfo { origin, err := bc.PreCheckPutRegion(region) @@ -412,10 +464,29 @@ func (bc *BasicCluster) CheckAndPutRegion(region *RegionInfo) []*RegionInfo { return bc.PutRegion(region) } +// AtomicCheckAndPutRegion checks if the region is valid to put, if valid then put. +func (bc *BasicCluster) AtomicCheckAndPutRegion(region *RegionInfo) ([]*RegionInfo, error) { + bc.Regions.mu.Lock() + defer bc.Regions.mu.Unlock() + origin, overlaps := bc.getRelevantRegionsLocked(region) + _, err := bc.check(region, origin, overlaps) + if err != nil { + return nil, err + } + return bc.Regions.SetRegion(region), nil +} + +// PutRegion put a region. +func (bc *BasicCluster) PutRegion(region *RegionInfo) []*RegionInfo { + bc.Regions.mu.Lock() + defer bc.Regions.mu.Unlock() + return bc.Regions.SetRegion(region) +} + // RemoveRegionIfExist removes RegionInfo from regionTree and regionMap if exists. func (bc *BasicCluster) RemoveRegionIfExist(id uint64) { - bc.Lock() - defer bc.Unlock() + bc.Regions.mu.Lock() + defer bc.Regions.mu.Unlock() if r := bc.Regions.GetRegion(id); r != nil { bc.Regions.RemoveRegion(r) } @@ -423,47 +494,18 @@ func (bc *BasicCluster) RemoveRegionIfExist(id uint64) { // ResetRegionCache drops all region cache. func (bc *BasicCluster) ResetRegionCache() { - bc.Lock() - defer bc.Unlock() - bc.Regions = NewRegionsInfo() + bc.Regions.mu.Lock() + defer bc.Regions.mu.Unlock() + bc.Regions.RegionsInfo = NewRegionsInfo() } // RemoveRegion removes RegionInfo from regionTree and regionMap. func (bc *BasicCluster) RemoveRegion(region *RegionInfo) { - bc.Lock() - defer bc.Unlock() + bc.Regions.mu.Lock() + defer bc.Regions.mu.Unlock() bc.Regions.RemoveRegion(region) } -// GetRegionByKey searches RegionInfo from regionTree. -func (bc *BasicCluster) GetRegionByKey(regionKey []byte) *RegionInfo { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetRegionByKey(regionKey) -} - -// GetPrevRegionByKey searches previous RegionInfo from regionTree. -func (bc *BasicCluster) GetPrevRegionByKey(regionKey []byte) *RegionInfo { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetPrevRegionByKey(regionKey) -} - -// ScanRange scans regions intersecting [start key, end key), returns at most -// `limit` regions. limit <= 0 means no limit. -func (bc *BasicCluster) ScanRange(startKey, endKey []byte, limit int) []*RegionInfo { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.ScanRange(startKey, endKey, limit) -} - -// GetOverlaps returns the regions which are overlapped with the specified region range. -func (bc *BasicCluster) GetOverlaps(region *RegionInfo) []*RegionInfo { - bc.RLock() - defer bc.RUnlock() - return bc.Regions.GetOverlaps(region) -} - // RegionSetInformer provides access to a shared informer of regions. type RegionSetInformer interface { GetRegionCount() int diff --git a/server/core/region.go b/server/core/region.go index 5ddee39865fa..d041ede2cb9d 100644 --- a/server/core/region.go +++ b/server/core/region.go @@ -573,6 +573,20 @@ func (r *RegionInfo) IsFromHeartbeat() bool { return r.fromHeartbeat } +func (r *RegionInfo) isInvolved(startKey, endKey []byte) bool { + return bytes.Compare(r.GetStartKey(), startKey) >= 0 && (len(endKey) == 0 || (len(r.GetEndKey()) > 0 && bytes.Compare(r.GetEndKey(), endKey) <= 0)) +} + +func (r *RegionInfo) isRegionRecreated() bool { + // Regions recreated by online unsafe recover have both ver and conf ver equal to 1. To + // prevent stale bootstrap region (first region in a cluster which covers the entire key + // range) from reporting stale info, we exclude regions that covers the entire key range + // here. Technically, it is possible for unsafe recover to recreate such region, but that + // means the entire key range is unavailable, and we don't expect unsafe recover to perform + // better than recreating the cluster. + return r.GetRegionEpoch().GetVersion() == 1 && r.GetRegionEpoch().GetConfVer() == 1 && (len(r.GetStartKey()) != 0 || len(r.GetEndKey()) != 0) +} + // RegionGuideFunc is a function that determines which follow-up operations need to be performed based on the origin // and new region information. type RegionGuideFunc func(region, origin *RegionInfo) (isNew, saveKV, saveCache, needSync bool) @@ -1289,10 +1303,6 @@ func DiffRegionKeyInfo(origin *RegionInfo, other *RegionInfo) string { return strings.Join(ret, ", ") } -func isInvolved(region *RegionInfo, startKey, endKey []byte) bool { - return bytes.Compare(region.GetStartKey(), startKey) >= 0 && (len(endKey) == 0 || (len(region.GetEndKey()) > 0 && bytes.Compare(region.GetEndKey(), endKey) <= 0)) -} - // String converts slice of bytes to string without copy. func String(b []byte) (s string) { if len(b) == 0 { diff --git a/server/core/region_tree.go b/server/core/region_tree.go index aa32b8a10430..a5d717a93e98 100644 --- a/server/core/region_tree.go +++ b/server/core/region_tree.go @@ -273,7 +273,7 @@ func (t *regionTree) RandomRegion(ranges []KeyRange) *RegionInfo { } index := rand.Intn(endIndex-startIndex) + startIndex region := t.tree.GetAt(index).(*regionItem).region - if isInvolved(region, startKey, endKey) { + if region.isInvolved(startKey, endKey) { return region } } From a4d9f294247b03c3a713c83ac7034dc33ba96c38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Wed, 26 Oct 2022 05:47:57 +0200 Subject: [PATCH 14/67] metrics: Add check for unhealthy stores (#5612) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close tikv/pd#5611 Signed-off-by: Daniël van Eeden Co-authored-by: Ti Chi Robot --- metrics/alertmanager/pd.rules.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/metrics/alertmanager/pd.rules.yml b/metrics/alertmanager/pd.rules.yml index bebc82521ea7..cf8cea9c3b4f 100644 --- a/metrics/alertmanager/pd.rules.yml +++ b/metrics/alertmanager/pd.rules.yml @@ -49,6 +49,21 @@ groups: value: '{{ $value }}' summary: PD_cluster_lost_connect_tikv_nums + - alert: PD_cluster_unhealthy_tikv_nums + expr: (sum ( pd_cluster_status{type="store_unhealth_count"} ) by (instance) + > 0) and (sum(etcd_server_is_leader) by (instance) > 0) + for: 1m + labels: + env: ENV_LABELS_ENV + expr: (sum ( pd_cluster_status{type="store_unhealth_count"} ) by (instance) + > 0) and (sum(etcd_server_is_leader) by (instance) > 0) + level: warning + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ + $value }}' + summary: PD_cluster_unhealth_tikv_nums + value: '{{ $value }}' + - alert: PD_cluster_low_space expr: (sum(pd_cluster_status{type="store_low_space_count"}) by (instance) > 0) and (sum(etcd_server_is_leader) by (instance) > 0) for: 1m From 914e19c1ae5a750b8c9cd24d3a22eb3900d6eb32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B7=B7=E6=B2=8CDM?= Date: Wed, 26 Oct 2022 15:29:57 +0800 Subject: [PATCH 15/67] operator: fix some error checks (#5629) close tikv/pd#5623 Signed-off-by: HunDunDM Co-authored-by: Ti Chi Robot --- server/schedule/operator/step.go | 43 +++++------ server/schedule/operator/step_test.go | 107 ++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 24 deletions(-) diff --git a/server/schedule/operator/step.go b/server/schedule/operator/step.go index 8374f8362ff8..3000cd6a6506 100644 --- a/server/schedule/operator/step.go +++ b/server/schedule/operator/step.go @@ -404,22 +404,24 @@ type PromoteLearner struct { } // ConfVerChanged returns the delta value for version increased by this step. +// It is also used by ChangePeerV2Leave. Since there are currently four roles, +// we need to confirm whether it is a Voter, not a DemotingVoter, etc. func (pl PromoteLearner) ConfVerChanged(region *core.RegionInfo) uint64 { peer := region.GetStoreVoter(pl.ToStore) - return typeutil.BoolToUint64(peer.GetId() == pl.PeerID) + return typeutil.BoolToUint64(peer.GetId() == pl.PeerID && peer.GetRole() == metapb.PeerRole_Voter) } func (pl PromoteLearner) String() string { return fmt.Sprintf("promote learner peer %v on store %v to voter", pl.PeerID, pl.ToStore) } -// IsFinish checks if current step is finished. +// IsFinish checks if current step is finished. It is also used by ChangePeerV2Leave. func (pl PromoteLearner) IsFinish(region *core.RegionInfo) bool { if peer := region.GetStoreVoter(pl.ToStore); peer != nil { if peer.GetId() != pl.PeerID { log.Warn("obtain unexpected peer", zap.String("expect", pl.String()), zap.Uint64("obtain-voter", peer.GetId())) } - return peer.GetId() == pl.PeerID + return peer.GetId() == pl.PeerID && peer.GetRole() == metapb.PeerRole_Voter } return false } @@ -643,9 +645,10 @@ func (dv DemoteVoter) String() string { } // ConfVerChanged returns the delta value for version increased by this step. -func (dv DemoteVoter) ConfVerChanged(region *core.RegionInfo) bool { - peer := region.GetStoreLearner(dv.ToStore) - return peer.GetId() == dv.PeerID +func (dv DemoteVoter) ConfVerChanged(region *core.RegionInfo) uint64 { + peer := region.GetStorePeer(dv.ToStore) + // the demoting peer may be removed later. + return typeutil.BoolToUint64(peer == nil || (peer.GetId() == dv.PeerID && peer.GetRole() == metapb.PeerRole_Learner)) } // IsFinish checks if current step is finished. @@ -700,7 +703,8 @@ func (cpe ChangePeerV2Enter) ConfVerChanged(region *core.RegionInfo) uint64 { } } for _, dv := range cpe.DemoteVoters { - peer := region.GetStoreVoter(dv.ToStore) + peer := region.GetStorePeer(dv.ToStore) + // the demoting peer may be removed later. if peer != nil && (peer.GetId() != dv.PeerID || !core.IsLearnerOrDemotingVoter(peer)) { return 0 } @@ -715,16 +719,16 @@ func (cpe ChangePeerV2Enter) IsFinish(region *core.RegionInfo) bool { if peer != nil && peer.GetId() != pl.PeerID { log.Warn("obtain unexpected peer", zap.String("expect", pl.String()), zap.Uint64("obtain-voter", peer.GetId())) } - if peer.GetId() != pl.PeerID || peer.GetRole() != metapb.PeerRole_IncomingVoter { + if peer.GetId() != pl.PeerID || !core.IsVoterOrIncomingVoter(peer) { return false } } for _, dv := range cpe.DemoteVoters { - peer := region.GetStoreVoter(dv.ToStore) + peer := region.GetStorePeer(dv.ToStore) if peer != nil && peer.GetId() != dv.PeerID { log.Warn("obtain unexpected peer", zap.String("expect", dv.String()), zap.Uint64("obtain-learner", peer.GetId())) } - if peer.GetId() != dv.PeerID || peer.GetRole() != metapb.PeerRole_DemotingVoter { + if peer.GetId() != dv.PeerID || !core.IsLearnerOrDemotingVoter(peer) { return false } } @@ -740,12 +744,10 @@ func (cpe ChangePeerV2Enter) CheckInProgress(_ ClusterInformer, region *core.Reg return errors.New("peer does not exist") } switch peer.GetRole() { - case metapb.PeerRole_Learner: + case metapb.PeerRole_Learner, metapb.PeerRole_Voter: notInJointState = true case metapb.PeerRole_IncomingVoter: inJointState = true - case metapb.PeerRole_Voter: - return errors.New("peer already is a voter") case metapb.PeerRole_DemotingVoter: return errors.New("cannot promote a demoting voter") default: @@ -758,12 +760,10 @@ func (cpe ChangePeerV2Enter) CheckInProgress(_ ClusterInformer, region *core.Reg return errors.New("peer does not exist") } switch peer.GetRole() { - case metapb.PeerRole_Voter: + case metapb.PeerRole_Voter, metapb.PeerRole_Learner: notInJointState = true case metapb.PeerRole_DemotingVoter: inJointState = true - case metapb.PeerRole_Learner: - return errors.New("peer already is a learner") case metapb.PeerRole_IncomingVoter: return errors.New("cannot demote a incoming voter") default: @@ -833,13 +833,12 @@ func (cpl ChangePeerV2Leave) String() string { // ConfVerChanged returns the delta value for version increased by this step. func (cpl ChangePeerV2Leave) ConfVerChanged(region *core.RegionInfo) uint64 { for _, pl := range cpl.PromoteLearners { - peer := region.GetStoreVoter(pl.ToStore) - if peer.GetId() != pl.PeerID || peer.GetRole() != metapb.PeerRole_Voter { + if pl.ConfVerChanged(region) == 0 { return 0 } } for _, dv := range cpl.DemoteVoters { - if region.GetStorePeer(dv.PeerID) != nil && !dv.ConfVerChanged(region) { + if dv.ConfVerChanged(region) == 0 { return 0 } } @@ -849,11 +848,7 @@ func (cpl ChangePeerV2Leave) ConfVerChanged(region *core.RegionInfo) uint64 { // IsFinish checks if current step is finished. func (cpl ChangePeerV2Leave) IsFinish(region *core.RegionInfo) bool { for _, pl := range cpl.PromoteLearners { - peer := region.GetStoreVoter(pl.ToStore) - if peer != nil && peer.GetId() != pl.PeerID { - log.Warn("obtain unexpected peer", zap.String("expect", pl.String()), zap.Uint64("obtain-voter", peer.GetId())) - } - if peer.GetId() != pl.PeerID || peer.GetRole() != metapb.PeerRole_Voter { + if !pl.IsFinish(region) { return false } } diff --git a/server/schedule/operator/step_test.go b/server/schedule/operator/step_test.go index 2b5141b8bd3a..983723815a14 100644 --- a/server/schedule/operator/step_test.go +++ b/server/schedule/operator/step_test.go @@ -293,6 +293,113 @@ func (suite *operatorStepTestSuite) TestChangePeerV2Enter() { suite.check(cpe, desc, testCases) } +func (suite *operatorStepTestSuite) TestChangePeerV2EnterWithSingleChange() { + cpe := ChangePeerV2Enter{ + PromoteLearners: []PromoteLearner{{PeerID: 3, ToStore: 3}}, + } + testCases := []testCase{ + { // before step + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}, + }, + 0, + false, + suite.NoError, + }, + { // after step + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_IncomingVoter}, + }, + 1, + true, + suite.NoError, + }, + { // after step (direct) + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_Voter}, + }, + 1, + true, + suite.NoError, + }, + { // error role + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter}, + }, + 0, + false, + suite.Error, + }, + } + desc := "use joint consensus, promote learner peer 3 on store 3 to voter" + suite.check(cpe, desc, testCases) + + cpe = ChangePeerV2Enter{ + DemoteVoters: []DemoteVoter{{PeerID: 3, ToStore: 3}}, + } + testCases = []testCase{ + { // before step + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_Voter}, + }, + 0, + false, + suite.NoError, + }, + { // after step + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter}, + }, + 1, + true, + suite.NoError, + }, + { // after step (direct) + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}, + }, + 1, + true, + suite.NoError, + }, + { // demote and remove peer + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + }, + 1, // correct calculation is required + false, + suite.Error, + }, + { // error role + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_IncomingVoter}, + }, + 0, + false, + suite.Error, + }, + } + desc = "use joint consensus, demote voter peer 3 on store 3 to learner" + suite.check(cpe, desc, testCases) +} + func (suite *operatorStepTestSuite) TestChangePeerV2Leave() { cpl := ChangePeerV2Leave{ PromoteLearners: []PromoteLearner{{PeerID: 3, ToStore: 3}, {PeerID: 4, ToStore: 4}}, From 8e2bd59fc68b0cf8e93e590cc60933976b4af694 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Wed, 26 Oct 2022 20:25:57 +0800 Subject: [PATCH 16/67] schedule: batch to report the metrics of target filter (#5561) close tikv/pd#5538 Signed-off-by: bufferflies <1045931706@qq.com> --- plugin/scheduler_example/evict_leader.go | 2 +- server/schedule/checker/replica_strategy.go | 6 +- server/schedule/filter/candidates.go | 8 +- server/schedule/filter/candidates_test.go | 10 +- server/schedule/filter/counter.go | 213 ++++++++++++++++++++ server/schedule/filter/counter_test.go | 50 +++++ server/schedule/filter/filters.go | 177 ++++++++-------- server/schedule/filter/filters_test.go | 22 +- server/schedulers/balance_leader.go | 23 ++- server/schedulers/balance_region.go | 24 ++- server/schedulers/evict_leader.go | 2 +- server/schedulers/label.go | 2 +- server/schedulers/random_merge.go | 2 +- server/schedulers/shuffle_leader.go | 2 +- server/schedulers/shuffle_region.go | 4 +- server/schedulers/utils.go | 2 +- 16 files changed, 417 insertions(+), 132 deletions(-) create mode 100644 server/schedule/filter/counter.go create mode 100644 server/schedule/filter/counter_test.go diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index ef5a50a68031..77a2e11ea4db 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -225,7 +225,7 @@ func (s *evictLeaderScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ( continue } target := filter.NewCandidates(cluster.GetFollowerStores(region)). - FilterTarget(cluster.GetOpts(), nil, &filter.StoreStateFilter{ActionScope: EvictLeaderName, TransferLeader: true}). + FilterTarget(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: EvictLeaderName, TransferLeader: true}). RandomPick() if target == nil { continue diff --git a/server/schedule/checker/replica_strategy.go b/server/schedule/checker/replica_strategy.go index f609ccb01d18..92f35595f73f 100644 --- a/server/schedule/checker/replica_strategy.go +++ b/server/schedule/checker/replica_strategy.go @@ -71,12 +71,12 @@ func (s *ReplicaStrategy) SelectStoreToAdd(coLocationStores []*core.StoreInfo, e isolationComparer := filter.IsolationComparer(s.locationLabels, coLocationStores) strictStateFilter := &filter.StoreStateFilter{ActionScope: s.checkerName, MoveRegion: true} targetCandidate := filter.NewCandidates(s.cluster.GetStores()). - FilterTarget(s.cluster.GetOpts(), nil, filters...). + FilterTarget(s.cluster.GetOpts(), nil, nil, filters...). KeepTheTopStores(isolationComparer, false) // greater isolation score is better if targetCandidate.Len() == 0 { return 0, false } - target := targetCandidate.FilterTarget(s.cluster.GetOpts(), nil, strictStateFilter). + target := targetCandidate.FilterTarget(s.cluster.GetOpts(), nil, nil, strictStateFilter). PickTheTopStore(filter.RegionScoreComparer(s.cluster.GetOpts()), true) // less region score is better if target == nil { return 0, true // filter by temporary states @@ -123,7 +123,7 @@ func (s *ReplicaStrategy) swapStoreToFirst(stores []*core.StoreInfo, id uint64) func (s *ReplicaStrategy) SelectStoreToRemove(coLocationStores []*core.StoreInfo) uint64 { isolationComparer := filter.IsolationComparer(s.locationLabels, coLocationStores) source := filter.NewCandidates(coLocationStores). - FilterSource(s.cluster.GetOpts(), nil, &filter.StoreStateFilter{ActionScope: replicaCheckerName, MoveRegion: true}). + FilterSource(s.cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: replicaCheckerName, MoveRegion: true}). KeepTheTopStores(isolationComparer, true). PickTheTopStore(filter.RegionScoreComparer(s.cluster.GetOpts()), false) if source == nil { diff --git a/server/schedule/filter/candidates.go b/server/schedule/filter/candidates.go index bfdaccf207fa..3eb38d8cad2c 100644 --- a/server/schedule/filter/candidates.go +++ b/server/schedule/filter/candidates.go @@ -35,14 +35,14 @@ func NewCandidates(stores []*core.StoreInfo) *StoreCandidates { } // FilterSource keeps stores that can pass all source filters. -func (c *StoreCandidates) FilterSource(opt *config.PersistOptions, collector *plan.Collector, filters ...Filter) *StoreCandidates { - c.Stores = SelectSourceStores(c.Stores, filters, opt, collector) +func (c *StoreCandidates) FilterSource(opt *config.PersistOptions, collector *plan.Collector, counter *Counter, filters ...Filter) *StoreCandidates { + c.Stores = SelectSourceStores(c.Stores, filters, opt, collector, counter) return c } // FilterTarget keeps stores that can pass all target filters. -func (c *StoreCandidates) FilterTarget(opt *config.PersistOptions, collector *plan.Collector, filters ...Filter) *StoreCandidates { - c.Stores = SelectTargetStores(c.Stores, filters, opt, collector) +func (c *StoreCandidates) FilterTarget(opt *config.PersistOptions, collector *plan.Collector, counter *Counter, filters ...Filter) *StoreCandidates { + c.Stores = SelectTargetStores(c.Stores, filters, opt, collector, counter) return c } diff --git a/server/schedule/filter/candidates_test.go b/server/schedule/filter/candidates_test.go index a49513977e01..78ed69c0f798 100644 --- a/server/schedule/filter/candidates_test.go +++ b/server/schedule/filter/candidates_test.go @@ -48,8 +48,8 @@ func idComparer2(a, b *core.StoreInfo) int { type idFilter func(uint64) bool -func (f idFilter) Scope() string { return "idFilter" } -func (f idFilter) Type() string { return "idFilter" } +func (f idFilter) Scope() string { return "idFilter" } +func (f idFilter) Type() filterType { return filterType(0) } func (f idFilter) Source(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { if f(store.GetID()) { return statusOK @@ -68,11 +68,11 @@ func (f idFilter) Target(opt *config.PersistOptions, store *core.StoreInfo) *pla func TestCandidates(t *testing.T) { re := require.New(t) cs := newTestCandidates(1, 2, 3, 4, 5) - cs.FilterSource(nil, nil, idFilter(func(id uint64) bool { return id > 2 })) + cs.FilterSource(nil, nil, nil, idFilter(func(id uint64) bool { return id > 2 })) check(re, cs, 3, 4, 5) - cs.FilterTarget(nil, nil, idFilter(func(id uint64) bool { return id%2 == 1 })) + cs.FilterTarget(nil, nil, nil, idFilter(func(id uint64) bool { return id%2 == 1 })) check(re, cs, 3, 5) - cs.FilterTarget(nil, nil, idFilter(func(id uint64) bool { return id > 100 })) + cs.FilterTarget(nil, nil, nil, idFilter(func(id uint64) bool { return id > 100 })) check(re, cs) store := cs.PickFirst() re.Nil(store) diff --git a/server/schedule/filter/counter.go b/server/schedule/filter/counter.go new file mode 100644 index 000000000000..9cdeb6aad976 --- /dev/null +++ b/server/schedule/filter/counter.go @@ -0,0 +1,213 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "strconv" +) + +type action int + +const ( + source action = iota + target + + actionLen +) + +var actions = [actionLen]string{ + "filter-source", + "filter-target", +} + +// String implements fmt.Stringer interface. +func (a action) String() string { + if a < actionLen { + return actions[a] + } + return "unknown" +} + +type scope int + +const ( + // BalanceLeader is the filter type for balance leader. + BalanceLeader scope = iota + // BalanceRegion is the filter type for balance region. + BalanceRegion + // BalanceHotRegion is the filter type for hot region. + BalanceHotRegion + // Label is the filter type for replica. + Label + + // EvictLeader is the filter type for evict leader. + EvictLeader + // RegionScatter is the filter type for scatter region. + RegionScatter + // ReplicaChecker is the filter type for replica. + ReplicaChecker + // RuleChecker is the filter type for rule. + RuleChecker + + // GrantHotLeader is the filter type for grant hot leader. + GrantHotLeader + // ShuffleHotRegion is the filter type for shuffle hot region. + ShuffleHotRegion + // ShuffleRegion is the filter type for shuffle region. + ShuffleRegion + // RandomMerge is the filter type for random merge. + RandomMerge + scopeLen +) + +var scopes = [scopeLen]string{ + "balance-leader-scheduler", + "balance-region-scheduler", + "balance-hot-region-scheduler", + "label-scheduler", + + "evict-leader-scheduler", + "region-scatter", + "replica-checker", + "rule-checker", + + "grant-hot-leader-scheduler", + "shuffle-region-scheduler", + "shuffle-region-scheduler", + "random-merge-scheduler", +} + +// String implements fmt.Stringer interface. +func (s scope) String() string { + if s >= scopeLen { + return "unknown" + } + return scopes[s] +} + +type filterType int + +const ( + excluded filterType = iota + storageThreshold + distinctScore + labelConstraint + ruleFit + ruleLeader + engine + specialUse + isolation + + storeStateOK + storeStateTombstone + storeStateDown + storeStateOffline + storeStatePauseLeader + storeStateSlow + storeStateDisconnected + storeStateBusy + storeStateExceedRemoveLimit + storeStateExceedAddLimit + storeStateTooManySnapshot + storeStateTooManyPendingPeer + storeStateRejectLeader + + filtersLen +) + +var filters = [filtersLen]string{ + "exclude-filter", + "storage-threshold-filter", + "distinct-filter", + "label-constraint-filter", + "rule-fit-filter", + "rule-fit-leader-filter", + "engine-filter", + "special-use-filter", + "isolation-filter", + + "store-state-ok-filter", + "store-state-tombstone-filter", + "store-state-down-filter", + "store-state-offline-filter", + "store-state-pause-leader-filter", + "store-state-slow-filter", + "store-state-disconnect-filter", + "store-state-busy-filter", + "store-state-exceed-remove-limit-filter", + "store-state-exceed-add-limit-filter", + "store-state-too-many-snapshots-filter", + "store-state-too-many-pending-peers-filter", + "store-state-reject-leader-filter", +} + +// String implements fmt.Stringer interface. +func (f filterType) String() string { + if f < filtersLen { + return filters[f] + } + + return "unknown" +} + +// Counter records the filter counter. +type Counter struct { + scope string + // record filter counter for each store. + // [action][type][sourceID][targetID]count + // [source-filter][rule-fit-filter]<1->2><10> + counter [][]map[uint64]map[uint64]int +} + +// NewCounter creates a Counter. +func NewCounter(scope string) *Counter { + counter := make([][]map[uint64]map[uint64]int, actionLen) + for i := range counter { + counter[i] = make([]map[uint64]map[uint64]int, filtersLen) + for k := range counter[i] { + counter[i][k] = make(map[uint64]map[uint64]int) + } + } + return &Counter{counter: counter, scope: scope} +} + +// Add adds the filter counter. +func (c *Counter) inc(action action, filterType filterType, sourceID uint64, targetID uint64) { + if _, ok := c.counter[action][filterType][sourceID]; !ok { + c.counter[action][filterType][sourceID] = make(map[uint64]int) + } + c.counter[action][filterType][sourceID][targetID]++ +} + +// Flush flushes the counter to the metrics. +func (c *Counter) Flush() { + for i, actions := range c.counter { + actionName := action(i).String() + for j, counters := range actions { + filterName := filterType(j).String() + for sourceID, count := range counters { + sourceIDStr := strconv.FormatUint(sourceID, 10) + for targetID, value := range count { + targetIDStr := strconv.FormatUint(sourceID, 10) + if value > 0 { + filterCounter.WithLabelValues(actionName, c.scope, filterName, sourceIDStr, targetIDStr). + Add(float64(value)) + counters[sourceID][targetID] = 0 + } + } + } + } + } +} diff --git a/server/schedule/filter/counter_test.go b/server/schedule/filter/counter_test.go new file mode 100644 index 000000000000..65531e26de7f --- /dev/null +++ b/server/schedule/filter/counter_test.go @@ -0,0 +1,50 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestString(t *testing.T) { + re := require.New(t) + testcases := []struct { + filterType int + expected string + }{ + {int(storeStateTombstone), "store-state-tombstone-filter"}, + {int(filtersLen - 1), "store-state-reject-leader-filter"}, + {int(filtersLen), "unknown"}, + } + + for _, data := range testcases { + re.Equal(data.expected, filterType(data.filterType).String()) + } + re.Equal(int(filtersLen), len(filters)) +} + +func TestCounter(t *testing.T) { + re := require.New(t) + counter := NewCounter(BalanceLeader.String()) + counter.inc(source, storeStateTombstone, 1, 2) + counter.inc(target, storeStateTombstone, 1, 2) + re.Equal(counter.counter[source][storeStateTombstone][1][2], 1) + re.Equal(counter.counter[target][storeStateTombstone][1][2], 1) + counter.Flush() + re.Equal(counter.counter[source][storeStateTombstone][1][2], 0) + re.Equal(counter.counter[target][storeStateTombstone][1][2], 0) +} diff --git a/server/schedule/filter/filters.go b/server/schedule/filter/filters.go index 645fa99d9735..df364117d050 100644 --- a/server/schedule/filter/filters.go +++ b/server/schedule/filter/filters.go @@ -15,7 +15,6 @@ package filter import ( - "fmt" "strconv" "github.com/pingcap/kvproto/pkg/metapb" @@ -28,19 +27,19 @@ import ( "github.com/tikv/pd/server/schedule/plan" ) -const ( - filterSource = "filter-source" - filterTarget = "filter-target" -) - // SelectSourceStores selects stores that be selected as source store from the list. -func SelectSourceStores(stores []*core.StoreInfo, filters []Filter, opt *config.PersistOptions, collector *plan.Collector) []*core.StoreInfo { +func SelectSourceStores(stores []*core.StoreInfo, filters []Filter, opt *config.PersistOptions, collector *plan.Collector, + counter *Counter) []*core.StoreInfo { return filterStoresBy(stores, func(s *core.StoreInfo) bool { - sourceID := strconv.FormatUint(s.GetID(), 10) return slice.AllOf(filters, func(i int) bool { status := filters[i].Source(opt, s) if !status.IsOK() { - filterCounter.WithLabelValues(filterSource, filters[i].Scope(), filters[i].Type(), sourceID, "").Inc() + if counter != nil { + counter.inc(source, filters[i].Type(), s.GetID(), 0) + } else { + sourceID := strconv.FormatUint(s.GetID(), 10) + filterCounter.WithLabelValues(source.String(), filters[i].Scope(), filters[i].Type().String(), sourceID, "").Inc() + } if collector != nil { collector.Collect(plan.SetResource(s), plan.SetStatus(status)) } @@ -52,18 +51,25 @@ func SelectSourceStores(stores []*core.StoreInfo, filters []Filter, opt *config. } // SelectUnavailableTargetStores selects unavailable stores that can't be selected as target store from the list. -func SelectUnavailableTargetStores(stores []*core.StoreInfo, filters []Filter, opt *config.PersistOptions, collector *plan.Collector) []*core.StoreInfo { +func SelectUnavailableTargetStores(stores []*core.StoreInfo, filters []Filter, opt *config.PersistOptions, + collector *plan.Collector, counter *Counter) []*core.StoreInfo { return filterStoresBy(stores, func(s *core.StoreInfo) bool { targetID := strconv.FormatUint(s.GetID(), 10) return slice.AnyOf(filters, func(i int) bool { status := filters[i].Target(opt, s) if !status.IsOK() { cfilter, ok := filters[i].(comparingFilter) - sourceID := "" + sourceID := uint64(0) if ok { - sourceID = strconv.FormatUint(cfilter.GetSourceStoreID(), 10) + sourceID = cfilter.GetSourceStoreID() } - filterCounter.WithLabelValues(filterTarget, filters[i].Scope(), filters[i].Type(), targetID, sourceID).Inc() + if counter != nil { + counter.inc(target, filters[i].Type(), sourceID, s.GetID()) + } else { + filterCounter.WithLabelValues(target.String(), filters[i].Scope(), filters[i].Type().String(), + strconv.FormatUint(sourceID, 10), targetID).Inc() + } + if collector != nil { collector.Collect(plan.SetResourceWithStep(s, 2), plan.SetStatus(status)) } @@ -75,22 +81,33 @@ func SelectUnavailableTargetStores(stores []*core.StoreInfo, filters []Filter, o } // SelectTargetStores selects stores that be selected as target store from the list. -func SelectTargetStores(stores []*core.StoreInfo, filters []Filter, opt *config.PersistOptions, collector *plan.Collector) []*core.StoreInfo { +func SelectTargetStores(stores []*core.StoreInfo, filters []Filter, opt *config.PersistOptions, collector *plan.Collector, + counter *Counter) []*core.StoreInfo { + if len(filters) == 0 { + return stores + } + return filterStoresBy(stores, func(s *core.StoreInfo) bool { - targetID := strconv.FormatUint(s.GetID(), 10) return slice.AllOf(filters, func(i int) bool { filter := filters[i] status := filter.Target(opt, s) if !status.IsOK() { cfilter, ok := filter.(comparingFilter) - sourceID := "" + sourceID := uint64(0) if ok { - sourceID = strconv.FormatUint(cfilter.GetSourceStoreID(), 10) + sourceID = cfilter.GetSourceStoreID() + } + if counter != nil { + counter.inc(target, filter.Type(), sourceID, s.GetID()) + } else { + targetIDStr := strconv.FormatUint(s.GetID(), 10) + sourceIDStr := strconv.FormatUint(sourceID, 10) + filterCounter.WithLabelValues(target.String(), filter.Scope(), filter.Type().String(), sourceIDStr, targetIDStr).Inc() } - filterCounter.WithLabelValues(filterTarget, filters[i].Scope(), filters[i].Type(), sourceID, targetID).Inc() if collector != nil { collector.Collect(plan.SetResource(s), plan.SetStatus(status)) } + return false } return true @@ -111,7 +128,7 @@ func filterStoresBy(stores []*core.StoreInfo, keepPred func(*core.StoreInfo) boo type Filter interface { // Scope is used to indicate where the filter will act on. Scope() string - Type() string + Type() filterType // Return plan.Status to show whether be filtered as source Source(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status // Return plan.Status to show whether be filtered as target @@ -138,7 +155,7 @@ func Target(opt *config.PersistOptions, store *core.StoreInfo, filters []Filter) if ok { sourceID = strconv.FormatUint(cfilter.GetSourceStoreID(), 10) } - filterCounter.WithLabelValues(filterTarget, filter.Scope(), filter.Type(), sourceID, targetID).Inc() + filterCounter.WithLabelValues(target.String(), filter.Scope(), filter.Type().String(), sourceID, targetID).Inc() } return false } @@ -165,8 +182,8 @@ func (f *excludedFilter) Scope() string { return f.scope } -func (f *excludedFilter) Type() string { - return "exclude-filter" +func (f *excludedFilter) Type() filterType { + return excluded } func (f *excludedFilter) Source(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { @@ -195,8 +212,8 @@ func (f *storageThresholdFilter) Scope() string { return f.scope } -func (f *storageThresholdFilter) Type() string { - return "storage-threshold-filter" +func (f *storageThresholdFilter) Type() filterType { + return storageThreshold } func (f *storageThresholdFilter) Source(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { @@ -263,15 +280,15 @@ func (f *distinctScoreFilter) Scope() string { return f.scope } -func (f *distinctScoreFilter) Type() string { - return "distinct-filter" +func (f *distinctScoreFilter) Type() filterType { + return distinctScore } -func (f *distinctScoreFilter) Source(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *distinctScoreFilter) Source(_ *config.PersistOptions, _ *core.StoreInfo) *plan.Status { return statusOK } -func (f *distinctScoreFilter) Target(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *distinctScoreFilter) Target(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { score := core.DistinctScore(f.labels, f.stores, store) switch f.policy { case locationSafeguard: @@ -305,7 +322,7 @@ type StoreStateFilter struct { // Set true if allows temporary states. AllowTemporaryStates bool // Reason is used to distinguish the reason of store state filter - Reason string + Reason filterType } // Scope returns the scheduler or the checker which the filter acts on. @@ -314,103 +331,103 @@ func (f *StoreStateFilter) Scope() string { } // Type returns the type of the Filter. -func (f *StoreStateFilter) Type() string { - return fmt.Sprintf("store-state-%s-filter", f.Reason) +func (f *StoreStateFilter) Type() filterType { + return f.Reason } // conditionFunc defines condition to determine a store should be selected. // It should consider if the filter allows temporary states. type conditionFunc func(*config.PersistOptions, *core.StoreInfo) *plan.Status -func (f *StoreStateFilter) isRemoved(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *StoreStateFilter) isRemoved(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if store.IsRemoved() { - f.Reason = "tombstone" + f.Reason = storeStateTombstone return statusStoreRemoved } - f.Reason = "" + f.Reason = storeStateOK return statusOK } func (f *StoreStateFilter) isDown(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { if store.DownTime() > opt.GetMaxStoreDownTime() { - f.Reason = "down" + f.Reason = storeStateDown return statusStoreDown } - f.Reason = "" + f.Reason = storeStateOK return statusOK } -func (f *StoreStateFilter) isRemoving(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *StoreStateFilter) isRemoving(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if store.IsRemoving() { - f.Reason = "offline" + f.Reason = storeStateOffline return statusStoresRemoving } - f.Reason = "" + f.Reason = storeStateOK return statusOK } -func (f *StoreStateFilter) pauseLeaderTransfer(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *StoreStateFilter) pauseLeaderTransfer(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if !store.AllowLeaderTransfer() { - f.Reason = "pause-leader" + f.Reason = storeStatePauseLeader return statusStoreRejectLeader } - f.Reason = "" + f.Reason = storeStateOK return statusOK } func (f *StoreStateFilter) slowStoreEvicted(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { if store.EvictedAsSlowStore() { - f.Reason = "slow-store" + f.Reason = storeStateSlow return statusStoreRejectLeader } - f.Reason = "" + f.Reason = storeStateOK return statusOK } -func (f *StoreStateFilter) isDisconnected(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *StoreStateFilter) isDisconnected(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if !f.AllowTemporaryStates && store.IsDisconnected() { - f.Reason = "disconnected" + f.Reason = storeStateDisconnected return statusStoreDisconnected } - f.Reason = "" + f.Reason = storeStateOK return statusOK } -func (f *StoreStateFilter) isBusy(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *StoreStateFilter) isBusy(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if !f.AllowTemporaryStates && store.IsBusy() { - f.Reason = "busy" + f.Reason = storeStateBusy return statusStoreBusy } - f.Reason = "" + f.Reason = storeStateOK return statusOK } -func (f *StoreStateFilter) exceedRemoveLimit(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *StoreStateFilter) exceedRemoveLimit(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if !f.AllowTemporaryStates && !store.IsAvailable(storelimit.RemovePeer) { - f.Reason = "exceed-remove-limit" + f.Reason = storeStateExceedRemoveLimit return statusStoreRemoveLimit } - f.Reason = "" + f.Reason = storeStateOK return statusOK } -func (f *StoreStateFilter) exceedAddLimit(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *StoreStateFilter) exceedAddLimit(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if !f.AllowTemporaryStates && !store.IsAvailable(storelimit.AddPeer) { - f.Reason = "exceed-add-limit" + f.Reason = storeStateExceedAddLimit return statusStoreAddLimit } - f.Reason = "" + f.Reason = storeStateOK return statusOK } func (f *StoreStateFilter) tooManySnapshots(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { if !f.AllowTemporaryStates && (uint64(store.GetSendingSnapCount()) > opt.GetMaxSnapshotCount() || uint64(store.GetReceivingSnapCount()) > opt.GetMaxSnapshotCount()) { - f.Reason = "too-many-snapshot" + f.Reason = storeStateTooManySnapshot return statusStoreSnapshotThrottled } - f.Reason = "" + f.Reason = storeStateOK return statusOK } @@ -418,19 +435,19 @@ func (f *StoreStateFilter) tooManyPendingPeers(opt *config.PersistOptions, store if !f.AllowTemporaryStates && opt.GetMaxPendingPeerCount() > 0 && store.GetPendingPeerCount() > int(opt.GetMaxPendingPeerCount()) { - f.Reason = "too-many-pending-peer" + f.Reason = storeStateTooManyPendingPeer return statusStorePendingPeerThrottled } - f.Reason = "" + f.Reason = storeStateOK return statusOK } func (f *StoreStateFilter) hasRejectLeaderProperty(opts *config.PersistOptions, store *core.StoreInfo) *plan.Status { if opts.CheckLabelProperty(config.RejectLeader, store.GetLabels()) { - f.Reason = "reject-leader" + f.Reason = storeStateRejectLeader return statusStoreRejectLeader } - f.Reason = "" + f.Reason = storeStateOK return statusOK } @@ -533,8 +550,8 @@ func (f labelConstraintFilter) Scope() string { } // Type returns the name of the filter. -func (f labelConstraintFilter) Type() string { - return "label-constraint-filter" +func (f labelConstraintFilter) Type() filterType { + return labelConstraint } // Source filters stores when select them as schedule source. @@ -546,7 +563,7 @@ func (f labelConstraintFilter) Source(opt *config.PersistOptions, store *core.St } // Target filters stores when select them as schedule target. -func (f labelConstraintFilter) Target(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f labelConstraintFilter) Target(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if placement.MatchLabelConstraints(store, f.constraints) { return statusOK } @@ -580,11 +597,11 @@ func (f *ruleFitFilter) Scope() string { return f.scope } -func (f *ruleFitFilter) Type() string { - return "rule-fit-filter" +func (f *ruleFitFilter) Type() filterType { + return ruleFit } -func (f *ruleFitFilter) Source(options *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *ruleFitFilter) Source(_ *config.PersistOptions, _ *core.StoreInfo) *plan.Status { return statusOK } @@ -633,11 +650,11 @@ func (f *ruleLeaderFitFilter) Scope() string { return f.scope } -func (f *ruleLeaderFitFilter) Type() string { - return "rule-fit-leader-filter" +func (f *ruleLeaderFitFilter) Type() filterType { + return ruleLeader } -func (f *ruleLeaderFitFilter) Source(options *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *ruleLeaderFitFilter) Source(_ *config.PersistOptions, _ *core.StoreInfo) *plan.Status { return statusOK } @@ -688,18 +705,18 @@ func (f *engineFilter) Scope() string { return f.scope } -func (f *engineFilter) Type() string { - return "engine-filter" +func (f *engineFilter) Type() filterType { + return engine } -func (f *engineFilter) Source(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *engineFilter) Source(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if f.constraint.MatchStore(store) { return statusOK } return statusStoreNotMatchRule } -func (f *engineFilter) Target(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *engineFilter) Target(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { if f.constraint.MatchStore(store) { return statusOK } @@ -731,8 +748,8 @@ func (f *specialUseFilter) Scope() string { return f.scope } -func (f *specialUseFilter) Type() string { - return "special-use-filter" +func (f *specialUseFilter) Type() filterType { + return specialUse } func (f *specialUseFilter) Source(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { @@ -805,15 +822,15 @@ func (f *isolationFilter) Scope() string { return f.scope } -func (f *isolationFilter) Type() string { - return "isolation-filter" +func (f *isolationFilter) Type() filterType { + return isolation } func (f *isolationFilter) Source(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { return statusOK } -func (f *isolationFilter) Target(opt *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *isolationFilter) Target(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { // No isolation constraint to fit if len(f.constraintSet) == 0 { return statusStoreNotMatchIsolation diff --git a/server/schedule/filter/filters_test.go b/server/schedule/filter/filters_test.go index b9d225d802c0..adb7d8aeb696 100644 --- a/server/schedule/filter/filters_test.go +++ b/server/schedule/filter/filters_test.go @@ -210,26 +210,26 @@ func TestStoreStateFilterReason(t *testing.T) { check := func(store *core.StoreInfo, testCases []testCase) { for _, testCase := range testCases { filters[testCase.filterIdx].Source(opt, store) - re.Equal(testCase.sourceReason, filters[testCase.filterIdx].(*StoreStateFilter).Reason) + re.Equal(testCase.sourceReason, filters[testCase.filterIdx].(*StoreStateFilter).Reason.String()) filters[testCase.filterIdx].Source(opt, store) - re.Equal(testCase.targetReason, filters[testCase.filterIdx].(*StoreStateFilter).Reason) + re.Equal(testCase.targetReason, filters[testCase.filterIdx].(*StoreStateFilter).Reason.String()) } } // No reason catched store = store.Clone(core.SetLastHeartbeatTS(time.Now())) testCases := []testCase{ - {2, "", ""}, + {2, "store-state-ok-filter", "store-state-ok-filter"}, } check(store, testCases) // Disconnected store = store.Clone(core.SetLastHeartbeatTS(time.Now().Add(-5 * time.Minute))) testCases = []testCase{ - {0, "disconnected", "disconnected"}, - {1, "", ""}, - {2, "disconnected", "disconnected"}, - {3, "", ""}, + {0, "store-state-disconnect-filter", "store-state-disconnect-filter"}, + {1, "store-state-ok-filter", "store-state-ok-filter"}, + {2, "store-state-disconnect-filter", "store-state-disconnect-filter"}, + {3, "store-state-ok-filter", "store-state-ok-filter"}, } check(store, testCases) @@ -237,10 +237,10 @@ func TestStoreStateFilterReason(t *testing.T) { store = store.Clone(core.SetLastHeartbeatTS(time.Now())). Clone(core.SetStoreStats(&pdpb.StoreStats{IsBusy: true})) testCases = []testCase{ - {0, "", ""}, - {1, "busy", "busy"}, - {2, "busy", "busy"}, - {3, "", ""}, + {0, "store-state-ok-filter", "store-state-ok-filter"}, + {1, "store-state-busy-filter", "store-state-busy-filter"}, + {2, "store-state-busy-filter", "store-state-busy-filter"}, + {3, "store-state-ok-filter", "store-state-ok-filter"}, } check(store, testCases) } diff --git a/server/schedulers/balance_leader.go b/server/schedulers/balance_leader.go index c1632d48abe2..209b94feb82e 100644 --- a/server/schedulers/balance_leader.go +++ b/server/schedulers/balance_leader.go @@ -177,12 +177,13 @@ func (handler *balanceLeaderHandler) ListConfig(w http.ResponseWriter, r *http.R type balanceLeaderScheduler struct { *BaseScheduler *retryQuota - name string - conf *balanceLeaderSchedulerConfig - handler http.Handler - opController *schedule.OperatorController - filters []filter.Filter - counter *prometheus.CounterVec + name string + conf *balanceLeaderSchedulerConfig + handler http.Handler + opController *schedule.OperatorController + filters []filter.Filter + counter *prometheus.CounterVec + filterCounter *filter.Counter } // newBalanceLeaderScheduler creates a scheduler that tends to keep leaders on @@ -197,6 +198,7 @@ func newBalanceLeaderScheduler(opController *schedule.OperatorController, conf * handler: newBalanceLeaderHandler(conf), opController: opController, counter: balanceLeaderCounter, + filterCounter: filter.NewCounter(filter.BalanceLeader.String()), } for _, option := range options { option(s) @@ -363,8 +365,8 @@ func (l *balanceLeaderScheduler) Schedule(cluster schedule.Cluster, dryRun bool) scoreFunc := func(store *core.StoreInfo) float64 { return store.LeaderScore(solver.kind.Policy, solver.GetOpInfluence(store.GetID())) } - sourceCandidate := newCandidateStores(filter.SelectSourceStores(stores, l.filters, cluster.GetOpts(), collector), false, scoreFunc) - targetCandidate := newCandidateStores(filter.SelectTargetStores(stores, l.filters, cluster.GetOpts(), nil), true, scoreFunc) + sourceCandidate := newCandidateStores(filter.SelectSourceStores(stores, l.filters, cluster.GetOpts(), collector, l.filterCounter), false, scoreFunc) + targetCandidate := newCandidateStores(filter.SelectTargetStores(stores, l.filters, cluster.GetOpts(), nil, l.filterCounter), true, scoreFunc) usedRegions := make(map[uint64]struct{}) result := make([]*operator.Operator, 0, batch) @@ -392,6 +394,7 @@ func (l *balanceLeaderScheduler) Schedule(cluster schedule.Cluster, dryRun bool) } } } + l.filterCounter.Flush() l.retryQuota.GC(append(sourceCandidate.stores, targetCandidate.stores...)) return result, collector.GetPlans() } @@ -472,7 +475,7 @@ func (l *balanceLeaderScheduler) transferLeaderOut(solver *solver, collector *pl if leaderFilter := filter.NewPlacementLeaderSafeguard(l.GetName(), opts, solver.GetBasicCluster(), solver.GetRuleManager(), solver.region, solver.source, false /*allowMoveLeader*/); leaderFilter != nil { finalFilters = append(l.filters, leaderFilter) } - targets = filter.SelectTargetStores(targets, finalFilters, opts, collector) + targets = filter.SelectTargetStores(targets, finalFilters, opts, collector, l.filterCounter) leaderSchedulePolicy := opts.GetLeaderSchedulePolicy() sort.Slice(targets, func(i, j int) bool { iOp := solver.GetOpInfluence(targets[i].GetID()) @@ -522,7 +525,7 @@ func (l *balanceLeaderScheduler) transferLeaderIn(solver *solver, collector *pla finalFilters = append(l.filters, leaderFilter) } target := filter.NewCandidates([]*core.StoreInfo{solver.target}). - FilterTarget(opts, nil, finalFilters...). + FilterTarget(opts, nil, l.filterCounter, finalFilters...). PickFirst() if target == nil { log.Debug("region has no target store", zap.String("scheduler", l.GetName()), zap.Uint64("region-id", solver.region.GetID())) diff --git a/server/schedulers/balance_region.go b/server/schedulers/balance_region.go index 8f291f09e84c..8209214691fd 100644 --- a/server/schedulers/balance_region.go +++ b/server/schedulers/balance_region.go @@ -73,10 +73,11 @@ type balanceRegionSchedulerConfig struct { type balanceRegionScheduler struct { *BaseScheduler *retryQuota - conf *balanceRegionSchedulerConfig - opController *schedule.OperatorController - filters []filter.Filter - counter *prometheus.CounterVec + conf *balanceRegionSchedulerConfig + opController *schedule.OperatorController + filters []filter.Filter + counter *prometheus.CounterVec + filterCounter *filter.Counter } // newBalanceRegionScheduler creates a scheduler that tends to keep regions on @@ -89,6 +90,7 @@ func newBalanceRegionScheduler(opController *schedule.OperatorController, conf * conf: conf, opController: opController, counter: balanceRegionCounter, + filterCounter: filter.NewCounter(filter.BalanceRegion.String()), } for _, setOption := range opts { setOption(scheduler) @@ -146,8 +148,8 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() stores := cluster.GetStores() opts := cluster.GetOpts() - faultTargets := filter.SelectUnavailableTargetStores(stores, s.filters, opts, collector) - sourceStores := filter.SelectSourceStores(stores, s.filters, opts, collector) + faultTargets := filter.SelectUnavailableTargetStores(stores, s.filters, opts, collector, s.filterCounter) + sourceStores := filter.SelectSourceStores(stores, s.filters, opts, collector, s.filterCounter) opInfluence := s.opController.GetOpInfluence(cluster) s.OpController.GetFastOpInfluence(cluster, opInfluence) kind := core.NewScheduleKind(core.RegionKind, core.BySize) @@ -183,7 +185,7 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) } for i := 0; i < retryLimit; i++ { // Priority pick the region that has a pending peer. - // Pending region may means the disk is overload, remove the pending region firstly. + // Pending region may mean the disk is overload, remove the pending region firstly. solver.region = filter.SelectOneRegion(cluster.RandPendingRegions(solver.SourceStoreID(), s.conf.Ranges), collector, baseRegionFilters...) if solver.region == nil { @@ -197,7 +199,7 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) append(baseRegionFilters, pendingFilter)...) } if solver.region == nil { - // Finally pick learner. + // Finally, pick learner. solver.region = filter.SelectOneRegion(cluster.RandLearnerRegions(solver.SourceStoreID(), s.conf.Ranges), collector, append(baseRegionFilters, pendingFilter)...) } @@ -215,7 +217,7 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) schedulerCounter.WithLabelValues(s.GetName(), "region-hot").Inc() continue } - // Check region whether have leader + // Check region leader if solver.region.GetLeader() == nil { log.Warn("region have no leader", zap.String("scheduler", s.GetName()), zap.Uint64("region-id", solver.region.GetID())) if collector != nil { @@ -234,6 +236,7 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) } s.retryQuota.Attenuate(solver.source) } + s.filterCounter.Flush() s.retryQuota.GC(stores) return nil, collector.GetPlans() } @@ -250,8 +253,7 @@ func (s *balanceRegionScheduler) transferPeer(solver *solver, collector *plan.Co filter.NewExcludedFilter(s.GetName(), nil, excludeTargets), filter.NewPlacementSafeguard(s.GetName(), solver.GetOpts(), solver.GetBasicCluster(), solver.GetRuleManager(), solver.region, solver.source), } - - candidates := filter.NewCandidates(dstStores).FilterTarget(solver.GetOpts(), collector, filters...) + candidates := filter.NewCandidates(dstStores).FilterTarget(solver.GetOpts(), collector, s.filterCounter, filters...) if len(candidates.Stores) != 0 { solver.step++ } diff --git a/server/schedulers/evict_leader.go b/server/schedulers/evict_leader.go index 2e807f18a9aa..c13c4eb4c70f 100644 --- a/server/schedulers/evict_leader.go +++ b/server/schedulers/evict_leader.go @@ -330,7 +330,7 @@ func scheduleEvictLeaderOnce(name, typ string, cluster schedule.Cluster, conf ev filters = append(filters, &filter.StoreStateFilter{ActionScope: name, TransferLeader: true}) candidates := filter.NewCandidates(cluster.GetFollowerStores(region)). - FilterTarget(cluster.GetOpts(), nil, filters...) + FilterTarget(cluster.GetOpts(), nil, nil, filters...) // Compatible with old TiKV transfer leader logic. target := candidates.RandomPick() targets := candidates.PickAll() diff --git a/server/schedulers/label.go b/server/schedulers/label.go index b2d8423dd175..e566b317d7e7 100644 --- a/server/schedulers/label.go +++ b/server/schedulers/label.go @@ -127,7 +127,7 @@ func (s *labelScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ([]*ope f := filter.NewExcludedFilter(s.GetName(), nil, excludeStores) target := filter.NewCandidates(cluster.GetFollowerStores(region)). - FilterTarget(cluster.GetOpts(), nil, &filter.StoreStateFilter{ActionScope: LabelName, TransferLeader: true}, f). + FilterTarget(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: LabelName, TransferLeader: true}, f). RandomPick() if target == nil { log.Debug("label scheduler no target found for region", zap.Uint64("region-id", region.GetID())) diff --git a/server/schedulers/random_merge.go b/server/schedulers/random_merge.go index 53c6eb1cf334..c1b0be3bac45 100644 --- a/server/schedulers/random_merge.go +++ b/server/schedulers/random_merge.go @@ -104,7 +104,7 @@ func (s *randomMergeScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ( schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() store := filter.NewCandidates(cluster.GetStores()). - FilterSource(cluster.GetOpts(), nil, &filter.StoreStateFilter{ActionScope: s.conf.Name, MoveRegion: true}). + FilterSource(cluster.GetOpts(), nil, nil, &filter.StoreStateFilter{ActionScope: s.conf.Name, MoveRegion: true}). RandomPick() if store == nil { schedulerCounter.WithLabelValues(s.GetName(), "no-source-store").Inc() diff --git a/server/schedulers/shuffle_leader.go b/server/schedulers/shuffle_leader.go index da9c9777f604..c07f1abb320f 100644 --- a/server/schedulers/shuffle_leader.go +++ b/server/schedulers/shuffle_leader.go @@ -110,7 +110,7 @@ func (s *shuffleLeaderScheduler) Schedule(cluster schedule.Cluster, dryRun bool) // 2. transfer a leader to the store. schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() targetStore := filter.NewCandidates(cluster.GetStores()). - FilterTarget(cluster.GetOpts(), nil, s.filters...). + FilterTarget(cluster.GetOpts(), nil, nil, s.filters...). RandomPick() if targetStore == nil { schedulerCounter.WithLabelValues(s.GetName(), "no-target-store").Inc() diff --git a/server/schedulers/shuffle_region.go b/server/schedulers/shuffle_region.go index eeba44ef6afa..acd822a39024 100644 --- a/server/schedulers/shuffle_region.go +++ b/server/schedulers/shuffle_region.go @@ -130,7 +130,7 @@ func (s *shuffleRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) func (s *shuffleRegionScheduler) scheduleRemovePeer(cluster schedule.Cluster) (*core.RegionInfo, *metapb.Peer) { candidates := filter.NewCandidates(cluster.GetStores()). - FilterSource(cluster.GetOpts(), nil, s.filters...). + FilterSource(cluster.GetOpts(), nil, nil, s.filters...). Shuffle() pendingFilter := filter.NewRegionPendingFilter() @@ -169,7 +169,7 @@ func (s *shuffleRegionScheduler) scheduleAddPeer(cluster schedule.Cluster, regio excludedFilter := filter.NewExcludedFilter(s.GetName(), nil, region.GetStoreIDs()) target := filter.NewCandidates(cluster.GetStores()). - FilterTarget(cluster.GetOpts(), nil, append(s.filters, scoreGuard, excludedFilter)...). + FilterTarget(cluster.GetOpts(), nil, nil, append(s.filters, scoreGuard, excludedFilter)...). RandomPick() if target == nil { return nil diff --git a/server/schedulers/utils.go b/server/schedulers/utils.go index acd1fbe84f90..2228c8af9344 100644 --- a/server/schedulers/utils.go +++ b/server/schedulers/utils.go @@ -146,7 +146,7 @@ func (p *solver) shouldBalance(scheduleName string) bool { // Make sure after move, source score is still greater than target score. shouldBalance := p.sourceScore > p.targetScore - if !shouldBalance { + if !shouldBalance && log.GetLevel() <= zap.DebugLevel { log.Debug("skip balance "+p.kind.Resource.String(), zap.String("scheduler", scheduleName), zap.Uint64("region-id", p.region.GetID()), zap.Uint64("source-store", sourceID), zap.Uint64("target-store", targetID), zap.Int64("source-size", p.source.GetRegionSize()), zap.Float64("source-score", p.sourceScore), From d033fbf11a5daadb1961d4c2d5a4d9befbfa820d Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 27 Oct 2022 12:01:57 +0800 Subject: [PATCH 17/67] *: reduce call when comparing key (#5628) ref tikv/pd#5606 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- pkg/rangetree/range_tree.go | 3 +- server/core/region.go | 26 ++++++++--------- server/core/region_test.go | 4 +-- server/core/region_tree.go | 52 ++++++++++++++++----------------- server/core/region_tree_test.go | 26 ++++++++--------- 5 files changed, 56 insertions(+), 55 deletions(-) diff --git a/pkg/rangetree/range_tree.go b/pkg/rangetree/range_tree.go index 47d7e960a0e1..174e338f7758 100644 --- a/pkg/rangetree/range_tree.go +++ b/pkg/rangetree/range_tree.go @@ -76,9 +76,10 @@ func (r *RangeTree) GetOverlaps(item RangeItem) []RangeItem { } var overlaps []RangeItem + endKey := item.GetEndKey() r.tree.AscendGreaterOrEqual(result, func(i btree.Item) bool { over := i.(RangeItem) - if len(item.GetEndKey()) > 0 && bytes.Compare(item.GetEndKey(), over.GetStartKey()) <= 0 { + if len(endKey) > 0 && bytes.Compare(endKey, over.GetStartKey()) <= 0 { return false } overlaps = append(overlaps, over) diff --git a/server/core/region.go b/server/core/region.go index d041ede2cb9d..cdf961749c4a 100644 --- a/server/core/region.go +++ b/server/core/region.go @@ -702,7 +702,7 @@ func (rm regionMap) Get(id uint64) *regionItem { // If the regionItem already exists, it will be overwritten. // Note: Do not use this function when you only need to update the RegionInfo and do not need a new regionItem. func (rm regionMap) AddNew(region *RegionInfo) *regionItem { - item := ®ionItem{region: region} + item := ®ionItem{RegionInfo: region} rm[region.GetID()] = item return item } @@ -738,7 +738,7 @@ func NewRegionsInfo() *RegionsInfo { // GetRegion returns the RegionInfo with regionID func (r *RegionsInfo) GetRegion(regionID uint64) *RegionInfo { if item := r.regions.Get(regionID); item != nil { - return item.region + return item.RegionInfo } return nil } @@ -750,7 +750,7 @@ func (r *RegionsInfo) SetRegion(region *RegionInfo) (overlaps []*RegionInfo) { rangeChanged := true // This Region is new, or its range has changed. if item = r.regions.Get(region.GetID()); item != nil { // If this ID already exists, use the existing regionItem and pick out the origin. - origin := item.region + origin := item.RegionInfo rangeChanged = !origin.rangeEqualsTo(region) if rangeChanged { // Delete itself in regionTree so that overlaps will not contain itself. @@ -765,14 +765,14 @@ func (r *RegionsInfo) SetRegion(region *RegionInfo) (overlaps []*RegionInfo) { // If the peers are not changed, only the statistical on the sub regionTree needs to be updated. r.updateSubTreeStat(origin, region) // Update the RegionInfo in the regionItem. - item.region = region + item.RegionInfo = region return } // If the range or peers have changed, the sub regionTree needs to be cleaned up. // TODO: Improve performance by deleting only the different peers. r.removeRegionFromSubTree(origin) // Update the RegionInfo in the regionItem. - item.region = region + item.RegionInfo = region } else { // If this ID does not exist, generate a new regionItem and save it in the regionMap. item = r.regions.AddNew(region) @@ -963,7 +963,7 @@ func (r *RegionsInfo) GetPrevRegionByKey(regionKey []byte) *RegionInfo { func (r *RegionsInfo) GetRegions() []*RegionInfo { regions := make([]*RegionInfo, 0, r.regions.Len()) for _, item := range r.regions { - regions = append(regions, item.region) + regions = append(regions, item.RegionInfo) } return regions } @@ -1027,7 +1027,7 @@ func (r *RegionsInfo) GetStoreWriteRate(storeID uint64) (bytesRate, keysRate flo func (r *RegionsInfo) GetMetaRegions() []*metapb.Region { regions := make([]*metapb.Region, 0, r.regions.Len()) for _, item := range r.regions { - regions = append(regions, typeutil.DeepClone(item.region.meta, RegionFactory)) + regions = append(regions, typeutil.DeepClone(item.meta, RegionFactory)) } return regions } @@ -1110,7 +1110,7 @@ func (r *RegionsInfo) RandLearnerRegions(storeID uint64, ranges []KeyRange, n in // GetLeader returns leader RegionInfo by storeID and regionID (now only used in test) func (r *RegionsInfo) GetLeader(storeID uint64, region *RegionInfo) *RegionInfo { if leaders, ok := r.leaders[storeID]; ok { - return leaders.find(region).region + return leaders.find(region).RegionInfo } return nil } @@ -1118,7 +1118,7 @@ func (r *RegionsInfo) GetLeader(storeID uint64, region *RegionInfo) *RegionInfo // GetFollower returns follower RegionInfo by storeID and regionID (now only used in test) func (r *RegionsInfo) GetFollower(storeID uint64, region *RegionInfo) *RegionInfo { if followers, ok := r.followers[storeID]; ok { - return followers.find(region).region + return followers.find(region).RegionInfo } return nil } @@ -1215,11 +1215,11 @@ func (r *RegionsInfo) GetAdjacentRegions(region *RegionInfo) (*RegionInfo, *Regi p, n := r.tree.getAdjacentRegions(region) var prev, next *RegionInfo // check key to avoid key range hole - if p != nil && bytes.Equal(p.region.GetEndKey(), region.GetStartKey()) { - prev = r.GetRegion(p.region.GetID()) + if p != nil && bytes.Equal(p.GetEndKey(), region.GetStartKey()) { + prev = r.GetRegion(p.GetID()) } - if n != nil && bytes.Equal(region.GetEndKey(), n.region.GetStartKey()) { - next = r.GetRegion(n.region.GetID()) + if n != nil && bytes.Equal(region.GetEndKey(), n.GetStartKey()) { + next = r.GetRegion(n.GetID()) } return prev, next } diff --git a/server/core/region_test.go b/server/core/region_test.go index f3f0c917a04b..93367f8dbbc9 100644 --- a/server/core/region_test.go +++ b/server/core/region_test.go @@ -401,7 +401,7 @@ func regionInfo(id uint64) *RegionInfo { func check(re *require.Assertions, rm regionMap, ids ...uint64) { // Check Get. for _, id := range ids { - re.Equal(id, rm.Get(id).region.GetID()) + re.Equal(id, rm.Get(id).GetID()) } // Check Len. re.Equal(len(ids), rm.Len()) @@ -412,7 +412,7 @@ func check(re *require.Assertions, rm regionMap, ids ...uint64) { } set1 := make(map[uint64]struct{}) for _, r := range rm { - set1[r.region.GetID()] = struct{}{} + set1[r.GetID()] = struct{}{} } re.Equal(expect, set1) } diff --git a/server/core/region_tree.go b/server/core/region_tree.go index a5d717a93e98..690566758379 100644 --- a/server/core/region_tree.go +++ b/server/core/region_tree.go @@ -30,28 +30,28 @@ import ( var _ rangetree.RangeItem = ®ionItem{} type regionItem struct { - region *RegionInfo + *RegionInfo } // GetStartKey returns the start key of the region. func (r *regionItem) GetStartKey() []byte { - return r.region.GetStartKey() + return r.meta.StartKey } // GetEndKey returns the end key of the region. func (r *regionItem) GetEndKey() []byte { - return r.region.GetEndKey() + return r.meta.EndKey } // Less returns true if the region start key is less than the other. func (r *regionItem) Less(other btree.Item) bool { - left := r.region.GetStartKey() - right := other.(rangetree.RangeItem).GetStartKey() + left := r.meta.StartKey + right := other.(*regionItem).meta.StartKey return bytes.Compare(left, right) < 0 } func (r *regionItem) Contains(key []byte) bool { - start, end := r.region.GetStartKey(), r.region.GetEndKey() + start, end := r.GetStartKey(), r.GetEndKey() return bytes.Compare(key, start) >= 0 && (len(end) == 0 || bytes.Compare(key, end) < 0) } @@ -88,11 +88,11 @@ func (t *regionTree) length() int { // getOverlaps gets the regions which are overlapped with the specified region range. func (t *regionTree) getOverlaps(region *RegionInfo) []*RegionInfo { - item := ®ionItem{region: region} + item := ®ionItem{RegionInfo: region} result := t.tree.GetOverlaps(item) overlaps := make([]*RegionInfo, len(result)) for i, r := range result { - overlaps[i] = r.(*regionItem).region + overlaps[i] = r.(*regionItem).RegionInfo } return overlaps } @@ -101,7 +101,7 @@ func (t *regionTree) getOverlaps(region *RegionInfo) []*RegionInfo { // It finds and deletes all the overlapped regions first, and then // insert the region. func (t *regionTree) update(item *regionItem) []*RegionInfo { - region := item.region + region := item.RegionInfo t.totalSize += region.approximateSize regionWriteBytesRate, regionWriteKeysRate := region.GetWriteRate() t.totalWriteBytesRate += regionWriteBytesRate @@ -110,7 +110,7 @@ func (t *regionTree) update(item *regionItem) []*RegionInfo { overlaps := t.tree.Update(item) result := make([]*RegionInfo, len(overlaps)) for i, overlap := range overlaps { - old := overlap.(*regionItem).region + old := overlap.(*regionItem).RegionInfo result[i] = old log.Debug("overlapping region", zap.Uint64("region-id", old.GetID()), @@ -125,7 +125,7 @@ func (t *regionTree) update(item *regionItem) []*RegionInfo { return result } -// updateStat is used to update statistics when regionItem.region is directly replaced. +// updateStat is used to update statistics when regionItem.RegionInfo is directly replaced. func (t *regionTree) updateStat(origin *RegionInfo, region *RegionInfo) { t.totalSize += region.approximateSize regionWriteBytesRate, regionWriteKeysRate := region.GetWriteRate() @@ -145,14 +145,14 @@ func (t *regionTree) remove(region *RegionInfo) { if t.length() == 0 { return } - item := ®ionItem{region: region} + item := ®ionItem{RegionInfo: region} result := t.tree.Find(item) - if result == nil || result.(*regionItem).region.GetID() != region.GetID() { + if result == nil || result.(*regionItem).GetID() != region.GetID() { return } - t.totalSize -= result.(*regionItem).region.GetApproximateSize() - regionWriteBytesRate, regionWriteKeysRate := result.(*regionItem).region.GetWriteRate() + t.totalSize -= result.(*regionItem).GetApproximateSize() + regionWriteBytesRate, regionWriteKeysRate := result.(*regionItem).GetWriteRate() t.totalWriteBytesRate -= regionWriteBytesRate t.totalWriteKeysRate -= regionWriteKeysRate t.tree.Remove(result) @@ -165,7 +165,7 @@ func (t *regionTree) search(regionKey []byte) *RegionInfo { if result == nil { return nil } - return result.region + return result.RegionInfo } // searchPrev returns the previous region of the region where the regionKey is located. @@ -175,20 +175,20 @@ func (t *regionTree) searchPrev(regionKey []byte) *RegionInfo { if curRegionItem == nil { return nil } - prevRegionItem, _ := t.getAdjacentRegions(curRegionItem.region) + prevRegionItem, _ := t.getAdjacentRegions(curRegionItem.RegionInfo) if prevRegionItem == nil { return nil } - if !bytes.Equal(prevRegionItem.region.GetEndKey(), curRegionItem.region.GetStartKey()) { + if !bytes.Equal(prevRegionItem.GetEndKey(), curRegionItem.GetStartKey()) { return nil } - return prevRegionItem.region + return prevRegionItem.RegionInfo } // find is a helper function to find an item that contains the regions start // key. func (t *regionTree) find(region *RegionInfo) *regionItem { - item := t.tree.Find(®ionItem{region: region}) + item := t.tree.Find(®ionItem{RegionInfo: region}) if item == nil { return nil } @@ -205,9 +205,9 @@ func (t *regionTree) scanRange(startKey []byte, f func(*RegionInfo) bool) { // find if there is a region with key range [s, d), s < startKey < d fn := func(item rangetree.RangeItem) bool { r := item.(*regionItem) - return f(r.region) + return f(r.RegionInfo) } - t.tree.ScanRange(®ionItem{region: region}, fn) + t.tree.ScanRange(®ionItem{RegionInfo: region}, fn) } func (t *regionTree) scanRanges() []*RegionInfo { @@ -223,7 +223,7 @@ func (t *regionTree) scanRanges() []*RegionInfo { } func (t *regionTree) getAdjacentRegions(region *RegionInfo) (*regionItem, *regionItem) { - item := ®ionItem{region: &RegionInfo{meta: &metapb.Region{StartKey: region.GetStartKey()}}} + item := ®ionItem{RegionInfo: &RegionInfo{meta: &metapb.Region{StartKey: region.GetStartKey()}}} prevItem, nextItem := t.tree.GetAdjacentItem(item) var prev, next *regionItem if prevItem != nil { @@ -248,10 +248,10 @@ func (t *regionTree) RandomRegion(ranges []KeyRange) *RegionInfo { for _, i := range rand.Perm(len(ranges)) { var endIndex int startKey, endKey := ranges[i].StartKey, ranges[i].EndKey - startRegion, startIndex := t.tree.GetWithIndex(®ionItem{region: &RegionInfo{meta: &metapb.Region{StartKey: startKey}}}) + startRegion, startIndex := t.tree.GetWithIndex(®ionItem{RegionInfo: &RegionInfo{meta: &metapb.Region{StartKey: startKey}}}) if len(endKey) != 0 { - _, endIndex = t.tree.GetWithIndex(®ionItem{region: &RegionInfo{meta: &metapb.Region{StartKey: endKey}}}) + _, endIndex = t.tree.GetWithIndex(®ionItem{RegionInfo: &RegionInfo{meta: &metapb.Region{StartKey: endKey}}}) } else { endIndex = t.tree.Len() } @@ -272,7 +272,7 @@ func (t *regionTree) RandomRegion(ranges []KeyRange) *RegionInfo { continue } index := rand.Intn(endIndex-startIndex) + startIndex - region := t.tree.GetAt(index).(*regionItem).region + region := t.tree.GetAt(index).(*regionItem).RegionInfo if region.isInvolved(startKey, endKey) { return region } diff --git a/server/core/region_tree_test.go b/server/core/region_tree_test.go index a8a8d6a3ba7f..3237a5daf095 100644 --- a/server/core/region_tree_test.go +++ b/server/core/region_tree_test.go @@ -185,28 +185,28 @@ func TestRegionTree(t *testing.T) { // check get adjacent regions prev, next := tree.getAdjacentRegions(regionA) re.Nil(prev) - re.Equal(regionB, next.region) + re.Equal(regionB, next.RegionInfo) prev, next = tree.getAdjacentRegions(regionB) - re.Equal(regionA, prev.region) - re.Equal(regionD, next.region) + re.Equal(regionA, prev.RegionInfo) + re.Equal(regionD, next.RegionInfo) prev, next = tree.getAdjacentRegions(regionC) - re.Equal(regionB, prev.region) - re.Equal(regionD, next.region) + re.Equal(regionB, prev.RegionInfo) + re.Equal(regionD, next.RegionInfo) prev, next = tree.getAdjacentRegions(regionD) - re.Equal(regionB, prev.region) + re.Equal(regionB, prev.RegionInfo) re.Nil(next) // region with the same range and different region id will not be delete. - region0 := newRegionItem([]byte{}, []byte("a")).region + region0 := newRegionItem([]byte{}, []byte("a")).RegionInfo updateNewItem(tree, region0) re.Equal(region0, tree.search([]byte{})) - anotherRegion0 := newRegionItem([]byte{}, []byte("a")).region + anotherRegion0 := newRegionItem([]byte{}, []byte("a")).RegionInfo anotherRegion0.meta.Id = 123 tree.remove(anotherRegion0) re.Equal(region0, tree.search([]byte{})) // overlaps with 0, A, B, C. - region0D := newRegionItem([]byte(""), []byte("d")).region + region0D := newRegionItem([]byte(""), []byte("d")).RegionInfo updateNewItem(tree, region0D) re.Equal(region0D, tree.search([]byte{})) re.Equal(region0D, tree.search([]byte("a"))) @@ -215,7 +215,7 @@ func TestRegionTree(t *testing.T) { re.Equal(regionD, tree.search([]byte("d"))) // overlaps with D. - regionE := newRegionItem([]byte("e"), []byte{}).region + regionE := newRegionItem([]byte("e"), []byte{}).RegionInfo updateNewItem(tree, regionE) re.Equal(region0D, tree.search([]byte{})) re.Equal(region0D, tree.search([]byte("a"))) @@ -240,7 +240,7 @@ func updateRegions(re *require.Assertions, tree *regionTree, regions []*RegionIn func TestRegionTreeSplitAndMerge(t *testing.T) { re := require.New(t) tree := newRegionTree() - regions := []*RegionInfo{newRegionItem([]byte{}, []byte{}).region} + regions := []*RegionInfo{newRegionItem([]byte{}, []byte{}).RegionInfo} // Byte will underflow/overflow if n > 7. n := 7 @@ -355,7 +355,7 @@ func TestRandomRegionDiscontinuous(t *testing.T) { } func updateNewItem(tree *regionTree, region *RegionInfo) { - item := ®ionItem{region: region} + item := ®ionItem{RegionInfo: region} tree.update(item) } @@ -379,7 +379,7 @@ func checkRandomRegion(re *require.Assertions, tree *regionTree, regions []*Regi } func newRegionItem(start, end []byte) *regionItem { - return ®ionItem{region: NewTestRegionInfo(start, end)} + return ®ionItem{RegionInfo: NewTestRegionInfo(start, end)} } type mockRegionTreeData struct { From 7aba282ff2aa30c6e5e264ab9b3de3a4f932302b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B7=B7=E6=B2=8CDM?= Date: Thu, 27 Oct 2022 12:23:57 +0800 Subject: [PATCH 18/67] simulator: support change peer v2 (#5609) close tikv/pd#5469 Signed-off-by: HunDunDM Co-authored-by: Ti Chi Robot --- server/api/trend_test.go | 4 +- server/core/region_option.go | 6 +- server/schedule/operator_controller_test.go | 2 +- tools/pd-simulator/simulator/node.go | 11 +- tools/pd-simulator/simulator/statistics.go | 40 +- tools/pd-simulator/simulator/task.go | 654 +++++++++++--------- 6 files changed, 404 insertions(+), 313 deletions(-) diff --git a/server/api/trend_test.go b/server/api/trend_test.go index cdf27206e8a2..cf74a187f87b 100644 --- a/server/api/trend_test.go +++ b/server/api/trend_test.go @@ -62,7 +62,7 @@ func TestTrend(t *testing.T) { newPeerID := op.Step(0).(operator.AddLearner).PeerID region5 = region5.Clone(core.WithAddPeer(&metapb.Peer{Id: newPeerID, StoreId: 3, Role: metapb.PeerRole_Learner}), core.WithIncConfVer()) mustRegionHeartbeat(re, svr, region5) - region5 = region5.Clone(core.WithPromoteLearner(newPeerID), core.WithRemoveStorePeer(2), core.WithIncConfVer()) + region5 = region5.Clone(core.WithRole(newPeerID, metapb.PeerRole_Voter), core.WithRemoveStorePeer(2), core.WithIncConfVer()) mustRegionHeartbeat(re, svr, region5) op, err = svr.GetHandler().GetOperator(6) @@ -71,7 +71,7 @@ func TestTrend(t *testing.T) { newPeerID = op.Step(0).(operator.AddLearner).PeerID region6 = region6.Clone(core.WithAddPeer(&metapb.Peer{Id: newPeerID, StoreId: 3, Role: metapb.PeerRole_Learner}), core.WithIncConfVer()) mustRegionHeartbeat(re, svr, region6) - region6 = region6.Clone(core.WithPromoteLearner(newPeerID), core.WithLeader(region6.GetStorePeer(2)), core.WithRemoveStorePeer(1), core.WithIncConfVer()) + region6 = region6.Clone(core.WithRole(newPeerID, metapb.PeerRole_Voter), core.WithLeader(region6.GetStorePeer(2)), core.WithRemoveStorePeer(1), core.WithIncConfVer()) mustRegionHeartbeat(re, svr, region6) var trend Trend diff --git a/server/core/region_option.go b/server/core/region_option.go index 99ec2a8d876e..64e329fd3b33 100644 --- a/server/core/region_option.go +++ b/server/core/region_option.go @@ -325,12 +325,12 @@ func WithAddPeer(peer *metapb.Peer) RegionCreateOption { } } -// WithPromoteLearner promotes the learner. -func WithPromoteLearner(peerID uint64) RegionCreateOption { +// WithRole changes the role. +func WithRole(peerID uint64, role metapb.PeerRole) RegionCreateOption { return func(region *RegionInfo) { for _, p := range region.GetPeers() { if p.GetId() == peerID { - p.Role = metapb.PeerRole_Voter + p.Role = role } } } diff --git a/server/schedule/operator_controller_test.go b/server/schedule/operator_controller_test.go index e3b3610af728..7ab638e3e06b 100644 --- a/server/schedule/operator_controller_test.go +++ b/server/schedule/operator_controller_test.go @@ -638,7 +638,7 @@ func (suite *operatorControllerTestSuite) TestDispatchUnfinishedStep() { suite.Equal(2, stream.MsgLength()) region4 := region3.Clone( - core.WithPromoteLearner(3), + core.WithRole(3, metapb.PeerRole_Voter), core.WithIncConfVer(), ) suite.True(steps[1].IsFinish(region4)) diff --git a/tools/pd-simulator/simulator/node.go b/tools/pd-simulator/simulator/node.go index a6a9c4787351..7cf84c4e9417 100644 --- a/tools/pd-simulator/simulator/node.go +++ b/tools/pd-simulator/simulator/node.go @@ -43,7 +43,7 @@ type Node struct { stats *info.StoreStats tick uint64 wg sync.WaitGroup - tasks map[uint64]Task + tasks map[uint64]*Task client Client receiveRegionHeartbeatCh <-chan *pdpb.RegionHeartbeatResponse ctx context.Context @@ -99,7 +99,7 @@ func NewNode(s *cases.Store, pdAddr string, config *SimConfig) (*Node, error) { client: client, ctx: ctx, cancel: cancel, - tasks: make(map[uint64]Task), + tasks: make(map[uint64]*Task), receiveRegionHeartbeatCh: receiveRegionHeartbeatCh, limiter: ratelimit.NewRateLimiter(float64(speed), int(speed)), tick: uint64(rand.Intn(storeHeartBeatPeriod)), @@ -125,7 +125,7 @@ func (n *Node) receiveRegionHeartbeat() { for { select { case resp := <-n.receiveRegionHeartbeatCh: - task := responseToTask(resp, n.raftEngine) + task := responseToTask(n.raftEngine, resp) if task != nil { n.AddTask(task) } @@ -156,8 +156,7 @@ func (n *Node) stepTask() { n.Lock() defer n.Unlock() for _, task := range n.tasks { - task.Step(n.raftEngine) - if task.IsFinished() { + if isFinished := task.Step(n.raftEngine); isFinished { simutil.Logger.Debug("task status", zap.Uint64("node-id", n.Id), zap.Uint64("region-id", task.RegionID()), @@ -246,7 +245,7 @@ func (n *Node) reportRegionChange() { } // AddTask adds task in this node. -func (n *Node) AddTask(task Task) { +func (n *Node) AddTask(task *Task) { n.Lock() defer n.Unlock() if t, ok := n.tasks[task.RegionID()]; ok { diff --git a/tools/pd-simulator/simulator/statistics.go b/tools/pd-simulator/simulator/statistics.go index 37e666f05bc7..2dadd78020d5 100644 --- a/tools/pd-simulator/simulator/statistics.go +++ b/tools/pd-simulator/simulator/statistics.go @@ -23,20 +23,22 @@ import ( type taskStatistics struct { syncutil.RWMutex - addPeer map[uint64]int + addVoter map[uint64]int removePeer map[uint64]int addLearner map[uint64]int promoteLeaner map[uint64]int + demoteVoter map[uint64]int transferLeader map[uint64]map[uint64]int mergeRegion int } func newTaskStatistics() *taskStatistics { return &taskStatistics{ - addPeer: make(map[uint64]int), + addVoter: make(map[uint64]int), removePeer: make(map[uint64]int), addLearner: make(map[uint64]int), promoteLeaner: make(map[uint64]int), + demoteVoter: make(map[uint64]int), transferLeader: make(map[uint64]map[uint64]int), } } @@ -45,10 +47,11 @@ func (t *taskStatistics) getStatistics() map[string]int { t.RLock() defer t.RUnlock() stats := make(map[string]int) - addPeer := getSum(t.addPeer) + addVoter := getSum(t.addVoter) removePeer := getSum(t.removePeer) addLearner := getSum(t.addLearner) - promoteLeaner := getSum(t.promoteLeaner) + promoteLearner := getSum(t.promoteLeaner) + demoteVoter := getSum(t.demoteVoter) var transferLeader int for _, to := range t.transferLeader { @@ -57,34 +60,41 @@ func (t *taskStatistics) getStatistics() map[string]int { } } - stats["Add Peer (task)"] = addPeer + stats["Add Voter (task)"] = addVoter stats["Remove Peer (task)"] = removePeer stats["Add Learner (task)"] = addLearner - stats["Promote Learner (task)"] = promoteLeaner + stats["Promote Learner (task)"] = promoteLearner + stats["Demote Voter (task)"] = demoteVoter stats["Transfer Leader (task)"] = transferLeader stats["Merge Region (task)"] = t.mergeRegion return stats } -func (t *taskStatistics) incAddPeer(regionID uint64) { +func (t *taskStatistics) incAddVoter(regionID uint64) { t.Lock() defer t.Unlock() - t.addPeer[regionID]++ + t.addVoter[regionID]++ } -func (t *taskStatistics) incAddLeaner(regionID uint64) { +func (t *taskStatistics) incAddLearner(regionID uint64) { t.Lock() defer t.Unlock() t.addLearner[regionID]++ } -func (t *taskStatistics) incPromoteLeaner(regionID uint64) { +func (t *taskStatistics) incPromoteLearner(regionID uint64) { t.Lock() defer t.Unlock() t.promoteLeaner[regionID]++ } +func (t *taskStatistics) incDemoteVoter(regionID uint64) { + t.Lock() + defer t.Unlock() + t.demoteVoter[regionID]++ +} + func (t *taskStatistics) incRemovePeer(regionID uint64) { t.Lock() defer t.Unlock() @@ -97,16 +107,16 @@ func (t *taskStatistics) incMergeRegion() { t.mergeRegion++ } -func (t *taskStatistics) incTransferLeader(fromPeerID, toPeerID uint64) { +func (t *taskStatistics) incTransferLeader(fromPeerStoreID, toPeerStoreID uint64) { t.Lock() defer t.Unlock() - _, ok := t.transferLeader[fromPeerID] + _, ok := t.transferLeader[fromPeerStoreID] if ok { - t.transferLeader[fromPeerID][toPeerID]++ + t.transferLeader[fromPeerStoreID][toPeerStoreID]++ } else { m := make(map[uint64]int) - m[toPeerID]++ - t.transferLeader[fromPeerID] = m + m[toPeerStoreID]++ + t.transferLeader[fromPeerStoreID] = m } } diff --git a/tools/pd-simulator/simulator/task.go b/tools/pd-simulator/simulator/task.go index 083d8b6774c8..3ad9c0af6003 100644 --- a/tools/pd-simulator/simulator/task.go +++ b/tools/pd-simulator/simulator/task.go @@ -17,6 +17,7 @@ package simulator import ( "bytes" "fmt" + "strings" "time" "github.com/docker/go-units" @@ -25,19 +26,22 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/tikv/pd/server/core" "github.com/tikv/pd/tools/pd-analysis/analysis" + "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" + "go.uber.org/zap" ) -var ( - chunkSize = int64(4 * units.KiB) - maxSnapGeneratorPoolSize = uint32(2) - maxSnapReceivePoolSize = uint32(4) - compressionRatio = int64(2) +const ( + removeSpeed = 100 * units.MiB + chunkSize = 4 * units.KiB + maxSnapGeneratorPoolSize = 2 + maxSnapReceivePoolSize = 4 + compressionRatio = 2 ) type snapAction int const ( - generate = iota + generate snapAction = iota receive ) @@ -49,115 +53,180 @@ const ( finished ) -// Task running in node. -type Task interface { - Desc() string - RegionID() uint64 - Step(r *RaftEngine) - IsFinished() bool -} +func responseToTask(engine *RaftEngine, resp *pdpb.RegionHeartbeatResponse) *Task { + var ( + regionID = resp.GetRegionId() + region = engine.GetRegion(regionID) + op operator + desc string + ) -func responseToTask(resp *pdpb.RegionHeartbeatResponse, r *RaftEngine) Task { - regionID := resp.GetRegionId() - region := r.GetRegion(regionID) - epoch := resp.GetRegionEpoch() - - // change peer - if resp.GetChangePeer() != nil { - changePeer := resp.GetChangePeer() - switch changePeer.GetChangeType() { - case eraftpb.ConfChangeType_AddNode: - return &addPeer{ - regionID: regionID, - epoch: epoch, - peer: changePeer.GetPeer(), - } - case eraftpb.ConfChangeType_RemoveNode: - return &removePeer{ - regionID: regionID, - size: region.GetApproximateSize(), - keys: region.GetApproximateKeys(), - speed: 100 * 1000 * 1000, - epoch: epoch, - peer: changePeer.GetPeer(), - } - case eraftpb.ConfChangeType_AddLearnerNode: - return &addLearner{ - regionID: regionID, - size: region.GetApproximateSize(), - keys: region.GetApproximateKeys(), - epoch: epoch, - peer: changePeer.GetPeer(), - // This two variables are used to simulate sending and receiving snapshot processes. - sendingStat: newSnapshotState(region.GetApproximateSize(), generate), - receivingStat: newSnapshotState(region.GetApproximateSize(), receive), + switch { + case resp.GetChangePeer() != nil: + op, desc = changePeerToOperator(region, resp.GetChangePeer()) + case resp.GetChangePeerV2() != nil: + cps := resp.GetChangePeerV2().GetChanges() + if len(cps) == 0 { + // leave joint state + desc = fmt.Sprintf("leave joint state for region %d", regionID) + op = &changePeerV2Leave{} + } else if len(cps) == 1 { + // original ChangePeer + op, desc = changePeerToOperator(region, cps[0]) + } else { + // enter joint state, it can only contain PromoteLearner and DemoteVoter. + subDesc := make([]string, 0, len(cps)) + cp2 := &changePeerV2Enter{} + for _, cp := range cps { + peer := cp.GetPeer() + subOp, _ := changePeerToOperator(region, cp) + switch subOp.(type) { + case *promoteLearner: + subDesc = append(subDesc, fmt.Sprintf("promote peer %+v", peer)) + cp2.promoteLearners = append(cp2.promoteLearners, peer) + case *demoteVoter: + subDesc = append(subDesc, fmt.Sprintf("demote peer %+v", peer)) + cp2.demoteVoters = append(cp2.demoteVoters, peer) + default: + simutil.Logger.Error("cannot exec AddPeer or RemovePeer when using joint state") + return nil + } } + desc = fmt.Sprintf("%s for region %d", strings.Join(subDesc, ", "), regionID) + op = cp2 } - } else if resp.GetTransferLeader() != nil { - changePeer := resp.GetTransferLeader().GetPeer() - fromPeer := region.GetLeader() - return &transferLeader{ - regionID: regionID, - epoch: epoch, - fromPeer: fromPeer, - peer: changePeer, + case resp.GetTransferLeader() != nil: + fromPeerStoreID := region.GetLeader().GetStoreId() + // When this field is included, it means that TiKV needs to decide the optimal Leader by itself. + toPeers := resp.GetTransferLeader().GetPeers() + // When no Peers are included, use Peer to build Peers of length 1. + if len(toPeers) == 0 { + toPeers = []*metapb.Peer{resp.GetTransferLeader().GetPeer()} } - } else if resp.GetMerge() != nil { + desc = fmt.Sprintf("transfer leader from store %d to store %d", fromPeerStoreID, toPeers[0].GetStoreId()) + op = &transferLeader{ + fromPeerStoreID: fromPeerStoreID, + toPeers: toPeers, + } + case resp.GetMerge() != nil: targetRegion := resp.GetMerge().GetTarget() - return &mergeRegion{ - regionID: regionID, - epoch: epoch, - targetRegion: targetRegion, + desc = fmt.Sprintf("merge region %d into %d", regionID, targetRegion.GetId()) + op = &mergeRegion{targetRegion: targetRegion} + case resp.GetSplitRegion() != nil: + // TODO: support split region + simutil.Logger.Error("split region scheduling is currently not supported") + return nil + default: + return nil + } + + if op == nil { + return nil + } + + return &Task{ + operator: op, + desc: desc, + regionID: regionID, + epoch: resp.GetRegionEpoch(), + isFinished: false, + } +} + +func changePeerToOperator(region *core.RegionInfo, cp *pdpb.ChangePeer) (operator, string) { + regionID := region.GetID() + peer := cp.GetPeer() + switch cp.GetChangeType() { + case eraftpb.ConfChangeType_AddNode: + if region.GetPeer(peer.GetId()) != nil { + return &promoteLearner{peer: peer}, fmt.Sprintf("promote learner %+v for region %d", peer, regionID) + } + return &addPeer{ + peer: peer, + size: region.GetApproximateSize(), + keys: region.GetApproximateKeys(), + sendingStat: newSnapshotState(region.GetApproximateSize(), generate), + receivingStat: newSnapshotState(region.GetApproximateSize(), receive), + }, fmt.Sprintf("add voter %+v for region %d", peer, regionID) + case eraftpb.ConfChangeType_AddLearnerNode: + if region.GetPeer(peer.GetId()) != nil { + return &demoteVoter{peer: peer}, fmt.Sprintf("demote voter %+v for region %d", peer, regionID) } + return &addPeer{ + peer: peer, + size: region.GetApproximateSize(), + keys: region.GetApproximateKeys(), + sendingStat: newSnapshotState(region.GetApproximateSize(), generate), + receivingStat: newSnapshotState(region.GetApproximateSize(), receive), + }, fmt.Sprintf("add learner %+v for region %d", peer, regionID) + case eraftpb.ConfChangeType_RemoveNode: + return &removePeer{ + peer: peer, + size: region.GetApproximateSize(), + speed: removeSpeed, + }, fmt.Sprintf("remove peer %+v for region %d", peer, regionID) + default: + return nil, "" } - return nil } -type snapshotStat struct { - action snapAction - remainSize int64 - status snapStatus - start time.Time +// Simulate the execution of the Operator. +type operator interface { + // Returns new region if the execution is finished, otherwise returns nil. + tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) } -func newSnapshotState(size int64, action snapAction) *snapshotStat { - if action == receive { - size /= compressionRatio - } - return &snapshotStat{ - remainSize: size, - action: action, - status: pending, - start: time.Now(), - } +// Task running in node. +type Task struct { + operator + desc string + regionID uint64 + epoch *metapb.RegionEpoch + isFinished bool } -type mergeRegion struct { - regionID uint64 - epoch *metapb.RegionEpoch - targetRegion *metapb.Region - finished bool +// Desc returns the description of the Task. +func (t *Task) Desc() string { + return t.desc } -func (m *mergeRegion) Desc() string { - return fmt.Sprintf("merge region %d into %d", m.regionID, m.targetRegion.GetId()) +// RegionID returns the region-id of the Task. +func (t *Task) RegionID() uint64 { + return t.regionID } -func (m *mergeRegion) Step(r *RaftEngine) { - if m.finished { - return +// Step execute once on the Task. +func (t *Task) Step(engine *RaftEngine) (isFinished bool) { + if t.isFinished { + return true } - region := r.GetRegion(m.regionID) - // If region equals to nil, it means that the region has already been merged. - if region == nil || region.GetRegionEpoch().GetConfVer() > m.epoch.ConfVer || region.GetRegionEpoch().GetVersion() > m.epoch.Version { - m.finished = true + region := engine.GetRegion(t.regionID) + if region == nil || region.GetRegionEpoch().GetConfVer() > t.epoch.ConfVer || region.GetRegionEpoch().GetVersion() > t.epoch.Version { + t.isFinished = true return } - targetRegion := r.GetRegion(m.targetRegion.Id) + var newRegion *core.RegionInfo + newRegion, t.isFinished = t.tick(engine, region) + + if newRegion != nil { + engine.SetRegion(newRegion) + engine.recordRegionChange(newRegion) + } + + return t.isFinished +} + +type mergeRegion struct { + targetRegion *metapb.Region +} + +func (m *mergeRegion) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { + targetRegion := engine.GetRegion(m.targetRegion.Id) + var startKey, endKey []byte - if bytes.Equal(m.targetRegion.EndKey, region.GetStartKey()) { + if bytes.Equal(m.targetRegion.GetEndKey(), region.GetStartKey()) { startKey = targetRegion.GetStartKey() endKey = region.GetEndKey() } else { @@ -165,16 +234,16 @@ func (m *mergeRegion) Step(r *RaftEngine) { endKey = targetRegion.GetEndKey() } - epoch := targetRegion.GetRegionEpoch() - if m.epoch.ConfVer > m.targetRegion.RegionEpoch.ConfVer { - epoch.ConfVer = m.epoch.ConfVer + epoch := targetRegion.Clone().GetRegionEpoch() + if region.GetRegionEpoch().GetConfVer() > epoch.GetConfVer() { + epoch.ConfVer = region.GetRegionEpoch().GetConfVer() } - - if m.epoch.Version > m.targetRegion.RegionEpoch.Version { - epoch.Version = m.epoch.Version + if region.GetRegionEpoch().GetVersion() > epoch.GetVersion() { + epoch.Version = region.GetRegionEpoch().GetVersion() } epoch.Version++ - mergeRegion := targetRegion.Clone( + + newRegion = targetRegion.Clone( core.WithStartKey(startKey), core.WithEndKey(endKey), core.SetRegionConfVer(epoch.ConfVer), @@ -182,247 +251,260 @@ func (m *mergeRegion) Step(r *RaftEngine) { core.SetApproximateSize(targetRegion.GetApproximateSize()+region.GetApproximateSize()), core.SetApproximateKeys(targetRegion.GetApproximateKeys()+region.GetApproximateKeys()), ) - r.SetRegion(mergeRegion) - r.recordRegionChange(mergeRegion) - r.schedulerStats.taskStats.incMergeRegion() - m.finished = true -} - -func (m *mergeRegion) RegionID() uint64 { - return m.regionID -} - -func (m *mergeRegion) IsFinished() bool { - return m.finished + engine.schedulerStats.taskStats.incMergeRegion() + return newRegion, true } type transferLeader struct { - regionID uint64 - epoch *metapb.RegionEpoch - fromPeer *metapb.Peer - peer *metapb.Peer - finished bool -} - -func (t *transferLeader) Desc() string { - return fmt.Sprintf("transfer leader from store %d to store %d", t.fromPeer.GetStoreId(), t.peer.GetStoreId()) + fromPeerStoreID uint64 + toPeers []*metapb.Peer } -func (t *transferLeader) Step(r *RaftEngine) { - if t.finished { +func (t *transferLeader) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { + isFinished = true + toPeer := t.toPeers[0] // TODO: Support selection logic + if peer := region.GetPeer(toPeer.GetId()); peer == nil || peer.GetRole() != toPeer.GetRole() || core.IsLearner(peer) { return } - region := r.GetRegion(t.regionID) - if region.GetRegionEpoch().GetVersion() > t.epoch.Version || region.GetRegionEpoch().GetConfVer() > t.epoch.ConfVer { - t.finished = true - return - } - var newRegion *core.RegionInfo - if region.GetPeer(t.peer.GetId()) != nil { - newRegion = region.Clone(core.WithLeader(t.peer)) - } else { - // This branch will be executed - t.finished = true - return + if toPeer.GetRole() == metapb.PeerRole_DemotingVoter { + simutil.Logger.Error("set demoting-voter as leader", + zap.Uint64("region-id", region.GetID()), + zap.String("peer", toPeer.String())) } - t.finished = true - r.SetRegion(newRegion) - r.recordRegionChange(newRegion) - fromPeerID := t.fromPeer.GetId() - toPeerID := t.peer.GetId() - r.schedulerStats.taskStats.incTransferLeader(fromPeerID, toPeerID) -} -func (t *transferLeader) RegionID() uint64 { - return t.regionID + newRegion = region.Clone(core.WithLeader(toPeer)) + engine.schedulerStats.taskStats.incTransferLeader(t.fromPeerStoreID, toPeer.GetStoreId()) + return } -func (t *transferLeader) IsFinished() bool { - return t.finished -} - -type addPeer struct { - regionID uint64 - epoch *metapb.RegionEpoch - peer *metapb.Peer - finished bool -} - -func (a *addPeer) Desc() string { - return fmt.Sprintf("add peer %+v for region %d", a.peer, a.regionID) -} - -func (a *addPeer) Step(r *RaftEngine) { - if a.finished { - return +func checkAndCreateChangePeerOption(engine *RaftEngine, region *core.RegionInfo, + peer *metapb.Peer, from, to metapb.PeerRole) []core.RegionCreateOption { + // `from` and `to` need to satisfy the combination in switch. + + // check `from` Role + if peer.GetRole() != from { + simutil.Logger.Error( + "unexpected role", + zap.String("role", peer.GetRole().String()), + zap.String("expected", from.String())) + return nil } - region := r.GetRegion(a.regionID) - if region.GetRegionEpoch().GetVersion() > a.epoch.Version || region.GetRegionEpoch().GetConfVer() > a.epoch.ConfVer { - a.finished = true - return + // Leader cannot be demoted + if (to == metapb.PeerRole_DemotingVoter || to == metapb.PeerRole_Learner) && region.GetLeader().GetId() == peer.GetId() { + simutil.Logger.Error("demote leader", zap.String("region", region.GetMeta().String())) + return nil } - - var opts []core.RegionCreateOption - if region.GetPeer(a.peer.GetId()) == nil { - opts = append(opts, core.WithAddPeer(a.peer)) - r.schedulerStats.taskStats.incAddPeer(region.GetID()) - } else { - opts = append(opts, core.WithPromoteLearner(a.peer.GetId())) - r.schedulerStats.taskStats.incPromoteLeaner(region.GetID()) - } - opts = append(opts, core.WithIncConfVer()) - newRegion := region.Clone(opts...) - r.SetRegion(newRegion) - r.recordRegionChange(newRegion) - a.finished = true + // create option + switch to { + case metapb.PeerRole_Voter: // Learner/IncomingVoter -> Voter + engine.schedulerStats.taskStats.incPromoteLearner(region.GetID()) + case metapb.PeerRole_Learner: // Voter/DemotingVoter -> Learner + engine.schedulerStats.taskStats.incDemoteVoter(region.GetID()) + case metapb.PeerRole_IncomingVoter: // Learner -> IncomingVoter, only in joint state + case metapb.PeerRole_DemotingVoter: // Voter -> DemotingVoter, only in joint state + default: + return nil + } + return []core.RegionCreateOption{core.WithRole(peer.GetId(), to), core.WithIncConfVer()} } -func (a *addPeer) RegionID() uint64 { - return a.regionID +type promoteLearner struct { + peer *metapb.Peer } -func (a *addPeer) IsFinished() bool { - return a.finished +func (pl *promoteLearner) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { + isFinished = true + peer := region.GetPeer(pl.peer.GetId()) + opts := checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_Learner, metapb.PeerRole_Voter) + if len(opts) > 0 { + newRegion = region.Clone(opts...) + } + return } -type removePeer struct { - regionID uint64 - size int64 - keys int64 - speed int64 - epoch *metapb.RegionEpoch - peer *metapb.Peer - finished bool +type demoteVoter struct { + peer *metapb.Peer } -func (a *removePeer) Desc() string { - return fmt.Sprintf("remove peer %+v for region %d", a.peer, a.regionID) +func (dv *demoteVoter) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { + isFinished = true + peer := region.GetPeer(dv.peer.GetId()) + opts := checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_Voter, metapb.PeerRole_Learner) + if len(opts) > 0 { + newRegion = region.Clone(opts...) + } + return } -func (a *removePeer) Step(r *RaftEngine) { - if a.finished { - return - } - region := r.GetRegion(a.regionID) - if region.GetRegionEpoch().GetVersion() > a.epoch.Version || region.GetRegionEpoch().GetConfVer() > a.epoch.ConfVer { - a.finished = true - return - } +type changePeerV2Enter struct { + promoteLearners []*metapb.Peer + demoteVoters []*metapb.Peer +} - regionSize := uint64(region.GetApproximateSize()) - a.size -= a.speed - if a.size < 0 { - for _, peer := range region.GetPeers() { - if peer.GetId() == a.peer.GetId() { - storeID := peer.GetStoreId() - var downPeers []*pdpb.PeerStats - if r.conn.Nodes[storeID] == nil { - for _, downPeer := range region.GetDownPeers() { - if downPeer.Peer.StoreId != storeID { - downPeers = append(downPeers, downPeer) - } - } - } - newRegion := region.Clone( - core.WithRemoveStorePeer(storeID), - core.WithIncConfVer(), - core.WithDownPeers(downPeers), - ) - r.SetRegion(newRegion) - r.recordRegionChange(newRegion) - r.schedulerStats.taskStats.incRemovePeer(region.GetID()) - if r.conn.Nodes[storeID] == nil { - a.finished = true - return - } - r.conn.Nodes[storeID].decUsedSize(regionSize) - break - } +func (ce *changePeerV2Enter) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { + isFinished = true + var opts []core.RegionCreateOption + for _, pl := range ce.promoteLearners { + peer := region.GetPeer(pl.GetId()) + subOpts := checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_Learner, metapb.PeerRole_IncomingVoter) + if len(subOpts) == 0 { + return } - a.finished = true - if analysis.GetTransferCounter().IsValid { - analysis.GetTransferCounter().AddSource(a.regionID, a.peer.StoreId) + opts = append(opts, subOpts...) + } + for _, dv := range ce.demoteVoters { + peer := region.GetPeer(dv.GetId()) + subOpts := checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_Voter, metapb.PeerRole_DemotingVoter) + if len(subOpts) == 0 { + return } + opts = append(opts, subOpts...) } + newRegion = region.Clone(opts...) + return } -func (a *removePeer) RegionID() uint64 { - return a.regionID -} +type changePeerV2Leave struct{} -func (a *removePeer) IsFinished() bool { - return a.finished +func (cl *changePeerV2Leave) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { + isFinished = true + var opts []core.RegionCreateOption + for _, peer := range region.GetPeers() { + switch peer.GetRole() { + case metapb.PeerRole_IncomingVoter: + opts = append(opts, checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_IncomingVoter, metapb.PeerRole_Voter)...) + case metapb.PeerRole_DemotingVoter: + opts = append(opts, checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_IncomingVoter, metapb.PeerRole_Voter)...) + } + } + if len(opts) < 4 { + simutil.Logger.Error("fewer than two peers should not need to leave the joint state") + return + } + newRegion = region.Clone(opts...) + return } -type addLearner struct { - regionID uint64 +type addPeer struct { + peer *metapb.Peer size int64 keys int64 - epoch *metapb.RegionEpoch - peer *metapb.Peer - finished bool sendingStat *snapshotStat receivingStat *snapshotStat } -func (a *addLearner) Desc() string { - return fmt.Sprintf("add learner %+v for region %d", a.peer, a.regionID) -} - -func (a *addLearner) Step(r *RaftEngine) { - if a.finished { - return +func (a *addPeer) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { + // Check + sendNode := engine.conn.Nodes[region.GetLeader().GetStoreId()] + if sendNode == nil { + return nil, true } - region := r.GetRegion(a.regionID) - if region.GetRegionEpoch().GetVersion() > a.epoch.Version || region.GetRegionEpoch().GetConfVer() > a.epoch.ConfVer { - a.finished = true - return + recvNode := engine.conn.Nodes[a.peer.GetStoreId()] + if recvNode == nil { + return nil, true } - - snapshotSize := region.GetApproximateSize() - sendNode := r.conn.Nodes[region.GetLeader().GetStoreId()] - if sendNode == nil { - a.finished = true - return + // Step 1: Generate Pending Peers + if region.GetPeer(a.peer.GetId()) == nil { + switch a.peer.GetRole() { + case metapb.PeerRole_Voter: + engine.schedulerStats.taskStats.incAddVoter(region.GetID()) + case metapb.PeerRole_Learner: + engine.schedulerStats.taskStats.incAddLearner(region.GetID()) + } + pendingPeers := append(region.GetPendingPeers(), a.peer) + return region.Clone(core.WithAddPeer(a.peer), core.WithIncConfVer(), core.WithPendingPeers(pendingPeers)), false } + // Step 2: Process Snapshot if !processSnapshot(sendNode, a.sendingStat) { - return + return nil, false } - r.schedulerStats.snapshotStats.incSendSnapshot(sendNode.Id) + engine.schedulerStats.snapshotStats.incSendSnapshot(sendNode.Id) + if !processSnapshot(recvNode, a.receivingStat) { + return nil, false + } + engine.schedulerStats.snapshotStats.incReceiveSnapshot(recvNode.Id) + recvNode.incUsedSize(uint64(region.GetApproximateSize())) + // Step 3: Remove the Pending state + newRegion = region.Clone(removePendingPeer(region, a.peer)) + isFinished = true - recvNode := r.conn.Nodes[a.peer.GetStoreId()] - if recvNode == nil { - a.finished = true - return + // analysis + if analysis.GetTransferCounter().IsValid { + analysis.GetTransferCounter().AddTarget(region.GetID(), a.peer.GetStoreId()) } - if !processSnapshot(recvNode, a.receivingStat) { - return + + return +} + +type removePeer struct { + peer *metapb.Peer + size int64 + speed int64 +} + +func (r *removePeer) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { + // Step 1: Delete data + r.size -= r.speed + if r.size > 0 { + return nil, false + } + // Step 2: Remove Peer + engine.schedulerStats.taskStats.incRemovePeer(region.GetID()) + newRegion = region.Clone( + core.WithIncConfVer(), + core.WithRemoveStorePeer(r.peer.GetStoreId()), + removePendingPeer(region, r.peer), + removeDownPeers(region, r.peer)) + isFinished = true + + if store := engine.conn.Nodes[r.peer.GetStoreId()]; store != nil { + store.decUsedSize(uint64(region.GetApproximateSize())) + // analysis + if analysis.GetTransferCounter().IsValid { + analysis.GetTransferCounter().AddSource(region.GetID(), r.peer.GetStoreId()) + } } - r.schedulerStats.snapshotStats.incReceiveSnapshot(recvNode.Id) - if region.GetPeer(a.peer.GetId()) == nil { - newRegion := region.Clone( - core.WithAddPeer(a.peer), - core.WithIncConfVer(), - ) - r.SetRegion(newRegion) - r.recordRegionChange(newRegion) - r.schedulerStats.taskStats.incAddLeaner(region.GetID()) - recvNode.incUsedSize(uint64(snapshotSize)) - a.finished = true + return +} + +func removePendingPeer(region *core.RegionInfo, removePeer *metapb.Peer) core.RegionCreateOption { + pendingPeers := make([]*metapb.Peer, 0, len(region.GetPendingPeers())) + for _, peer := range region.GetPendingPeers() { + if peer.GetId() != removePeer.GetId() { + pendingPeers = append(pendingPeers, peer) + } } + return core.WithPendingPeers(pendingPeers) +} - if analysis.GetTransferCounter().IsValid { - analysis.GetTransferCounter().AddTarget(a.regionID, a.peer.StoreId) +func removeDownPeers(region *core.RegionInfo, removePeer *metapb.Peer) core.RegionCreateOption { + downPeers := make([]*pdpb.PeerStats, 0, len(region.GetDownPeers())) + for _, peer := range region.GetDownPeers() { + if peer.GetPeer().GetId() != removePeer.GetId() { + downPeers = append(downPeers, peer) + } } + return core.WithDownPeers(downPeers) } -func (a *addLearner) RegionID() uint64 { - return a.regionID +type snapshotStat struct { + action snapAction + remainSize int64 + status snapStatus + start time.Time } -func (a *addLearner) IsFinished() bool { - return a.finished +func newSnapshotState(size int64, action snapAction) *snapshotStat { + if action == receive { + size /= compressionRatio + } + return &snapshotStat{ + remainSize: size, + action: action, + status: pending, + start: time.Now(), + } } func processSnapshot(n *Node, stat *snapshotStat) bool { From 1e19335d6e54a7cde44eea6b732597f7ac576825 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 27 Oct 2022 15:47:57 +0800 Subject: [PATCH 19/67] *: improve region path (#5636) ref tikv/pd#5586 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- server/storage/endpoint/key_path.go | 28 ++++++++++++++++++++++- server/storage/endpoint/key_path_test.go | 29 ++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 server/storage/endpoint/key_path_test.go diff --git a/server/storage/endpoint/key_path.go b/server/storage/endpoint/key_path.go index 9de751c3f464..4800a7967dd9 100644 --- a/server/storage/endpoint/key_path.go +++ b/server/storage/endpoint/key_path.go @@ -17,6 +17,8 @@ package endpoint import ( "fmt" "path" + "strconv" + "strings" ) const ( @@ -38,6 +40,10 @@ const ( keyspaceMetaInfix = "meta" keyspaceIDInfix = "id" keyspaceAllocID = "alloc_id" + regionPathPrefix = "raft/r" + + // we use uint64 to represent ID, the max length of uint64 is 20. + keyLen = 20 ) // AppendToRootPath appends the given key to the rootPath. @@ -74,7 +80,27 @@ func storeRegionWeightPath(storeID uint64) string { // RegionPath returns the region meta info key path with the given region ID. func RegionPath(regionID uint64) string { - return path.Join(clusterPath, "r", fmt.Sprintf("%020d", regionID)) + var buf strings.Builder + buf.WriteString(regionPathPrefix) + buf.WriteString("/") + s := strconv.FormatUint(regionID, 10) + if len(s) > keyLen { + s = s[len(s)-keyLen:] + } else { + b := make([]byte, keyLen) + diff := keyLen - len(s) + for i := 0; i < keyLen; i++ { + if i < diff { + b[i] = 48 + } else { + b[i] = s[i-diff] + } + } + s = string(b) + } + buf.WriteString(s) + + return buf.String() } func ruleKeyPath(ruleKey string) string { diff --git a/server/storage/endpoint/key_path_test.go b/server/storage/endpoint/key_path_test.go new file mode 100644 index 000000000000..57313f3a7b24 --- /dev/null +++ b/server/storage/endpoint/key_path_test.go @@ -0,0 +1,29 @@ +package endpoint + +import ( + "fmt" + "math/rand" + "path" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestRegionPath(t *testing.T) { + re := require.New(t) + f := func(id uint64) string { + return path.Join(regionPathPrefix, fmt.Sprintf("%020d", id)) + } + rand.Seed(time.Now().Unix()) + for i := 0; i < 1000; i++ { + id := rand.Uint64() + re.Equal(f(id), RegionPath(id)) + } +} + +func BenchmarkRegionPath(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = RegionPath(uint64(i)) + } +} From 8501508da5a26e72e3d595a4f49483f9b0d8abd9 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 27 Oct 2022 17:21:57 +0800 Subject: [PATCH 20/67] api: fix unstable test for TestSchedulerDiagnosticAPI (#5642) close tikv/pd#5641 Signed-off-by: bufferflies <1045931706@qq.com> --- server/api/diagnostic_test.go | 40 +++++++++++++---------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/server/api/diagnostic_test.go b/server/api/diagnostic_test.go index 5709edc1f8ad..be9e4d86f07d 100644 --- a/server/api/diagnostic_test.go +++ b/server/api/diagnostic_test.go @@ -63,6 +63,16 @@ func (suite *diagnosticTestSuite) TearDownSuite() { suite.cleanup() } +func (suite *diagnosticTestSuite) checkStatus(status string, url string) { + re := suite.Require() + suite.Eventually(func() bool { + result := &cluster.DiagnosticResult{} + err := tu.ReadGetJSON(re, testDialClient, url, result) + suite.NoError(err) + return result.Status == status + }, time.Second, time.Millisecond*50) +} + func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { re := suite.Require() addr := suite.configPrefix @@ -99,12 +109,7 @@ func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { suite.NoError(err) err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex, body, tu.StatusOK(suite.Require())) suite.NoError(err) - - time.Sleep(time.Millisecond * 100) - result = &cluster.DiagnosticResult{} - err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) - suite.NoError(err) - suite.Equal("pending", result.Status) + suite.checkStatus("pending", balanceRegionURL) input = make(map[string]interface{}) input["delay"] = 30 @@ -112,35 +117,20 @@ func (suite *diagnosticTestSuite) TestSchedulerDiagnosticAPI() { suite.NoError(err) err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex+"/"+schedulers.BalanceRegionName, pauseArgs, tu.StatusOK(re)) suite.NoError(err) - time.Sleep(time.Millisecond * 100) - result = &cluster.DiagnosticResult{} - err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) - suite.NoError(err) - suite.Equal("paused", result.Status) + suite.checkStatus("paused", balanceRegionURL) input["delay"] = 0 pauseArgs, err = json.Marshal(input) suite.NoError(err) err = tu.CheckPostJSON(testDialClient, suite.schedulerPrifex+"/"+schedulers.BalanceRegionName, pauseArgs, tu.StatusOK(re)) suite.NoError(err) - time.Sleep(time.Millisecond * 100) - result = &cluster.DiagnosticResult{} - err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) - suite.NoError(err) - suite.Equal("pending", result.Status) + suite.checkStatus("pending", balanceRegionURL) mustPutRegion(re, suite.svr, 1000, 1, []byte("a"), []byte("b"), core.SetApproximateSize(60)) - time.Sleep(time.Millisecond * 100) - result = &cluster.DiagnosticResult{} - err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) - suite.NoError(err) - suite.Equal("normal", result.Status) + suite.checkStatus("normal", balanceRegionURL) deleteURL := fmt.Sprintf("%s/%s", suite.schedulerPrifex, schedulers.BalanceRegionName) _, err = apiutil.DoDelete(testDialClient, deleteURL) suite.NoError(err) - result = &cluster.DiagnosticResult{} - err = tu.ReadGetJSON(re, testDialClient, balanceRegionURL, result) - suite.NoError(err) - suite.Equal("disabled", result.Status) + suite.checkStatus("disabled", balanceRegionURL) } From cf003bb91c58afe759518617d8dbe478c504ae29 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 28 Oct 2022 14:25:58 +0800 Subject: [PATCH 21/67] metrics: fix bug that the target is wrong (#5647) close tikv/pd#5646 Signed-off-by: bufferflies <1045931706@qq.com> --- metrics/grafana/pd.json | 8 ++++---- server/schedule/filter/counter.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 978102eaba88..24468a1026e1 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -7736,10 +7736,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", action=\"filter-source\"}[1m])) by (store, type, scope)", + "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", action=\"filter-source\"}[1m])) by (source, type, scope)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{scope}}-store-{{store}}-{{type}}", + "legendFormat": "{{scope}}-store-{{source}}-{{type}}", "metric": "pd_scheduler_event_count", "refId": "A", "step": 4 @@ -7832,10 +7832,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", action=\"filter-target\"}[1m])) by (store, type, scope)", + "expr": "sum(delta(pd_schedule_filter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", store=~\"$store\", action=\"filter-target\"}[1m])) by (target, type, scope)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{scope}}-store-{{store}}-{{type}}", + "legendFormat": "{{scope}}-store-{{target}}-{{type}}", "metric": "pd_scheduler_event_count", "refId": "A", "step": 4 diff --git a/server/schedule/filter/counter.go b/server/schedule/filter/counter.go index 9cdeb6aad976..783eabfe6f72 100644 --- a/server/schedule/filter/counter.go +++ b/server/schedule/filter/counter.go @@ -200,7 +200,7 @@ func (c *Counter) Flush() { for sourceID, count := range counters { sourceIDStr := strconv.FormatUint(sourceID, 10) for targetID, value := range count { - targetIDStr := strconv.FormatUint(sourceID, 10) + targetIDStr := strconv.FormatUint(targetID, 10) if value > 0 { filterCounter.WithLabelValues(actionName, c.scope, filterName, sourceIDStr, targetIDStr). Add(float64(value)) From 224923e92780b678af0c4a48109d0afc374ed154 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 28 Oct 2022 15:03:58 +0800 Subject: [PATCH 22/67] api: using index to replace tree scan if only returns count (#5610) close tikv/pd#5607 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- server/api/stats.go | 8 ++++- server/api/stats_test.go | 60 +++++++++++++++++++++--------------- server/cluster/cluster.go | 7 +++++ server/core/basic_cluster.go | 7 +++++ server/core/region.go | 24 ++++++++++++++- 5 files changed, 80 insertions(+), 26 deletions(-) diff --git a/server/api/stats.go b/server/api/stats.go index 9a5983f43ba7..a4f50779a11f 100644 --- a/server/api/stats.go +++ b/server/api/stats.go @@ -18,6 +18,7 @@ import ( "net/http" "github.com/tikv/pd/server" + "github.com/tikv/pd/server/statistics" "github.com/unrolled/render" ) @@ -43,6 +44,11 @@ func newStatsHandler(svr *server.Server, rd *render.Render) *statsHandler { func (h *statsHandler) GetRegionStatus(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) startKey, endKey := r.URL.Query().Get("start_key"), r.URL.Query().Get("end_key") - stats := rc.GetRegionStats([]byte(startKey), []byte(endKey)) + var stats *statistics.RegionStats + if r.URL.Query().Has("count") { + stats = rc.GetRangeCount([]byte(startKey), []byte(endKey)) + } else { + stats = rc.GetRegionStats([]byte(startKey), []byte(endKey)) + } h.rd.JSON(w, http.StatusOK, stats) } diff --git a/server/api/stats_test.go b/server/api/stats_test.go index c2a53597a901..bf92634ae588 100644 --- a/server/api/stats_test.go +++ b/server/api/stats_test.go @@ -146,22 +146,6 @@ func (suite *statsTestSuite) TestRegionStats() { StorePeerSize: map[uint64]int64{1: 301, 2: 100, 3: 100, 4: 250, 5: 201}, StorePeerKeys: map[uint64]int64{1: 201, 2: 50, 3: 50, 4: 170, 5: 151}, } - res, err := testDialClient.Get(statsURL) - suite.NoError(err) - defer res.Body.Close() - stats := &statistics.RegionStats{} - err = apiutil.ReadJSON(res.Body, stats) - suite.NoError(err) - suite.Equal(statsAll, stats) - - args := fmt.Sprintf("?start_key=%s&end_key=%s", url.QueryEscape("\x01\x02"), url.QueryEscape("xyz\x00\x00")) - res, err = testDialClient.Get(statsURL + args) - suite.NoError(err) - defer res.Body.Close() - stats = &statistics.RegionStats{} - err = apiutil.ReadJSON(res.Body, stats) - suite.NoError(err) - suite.Equal(statsAll, stats) stats23 := &statistics.RegionStats{ Count: 2, @@ -176,12 +160,40 @@ func (suite *statsTestSuite) TestRegionStats() { StorePeerKeys: map[uint64]int64{1: 151, 4: 150, 5: 151}, } - args = fmt.Sprintf("?start_key=%s&end_key=%s", url.QueryEscape("a"), url.QueryEscape("x")) - res, err = testDialClient.Get(statsURL + args) - suite.NoError(err) - defer res.Body.Close() - stats = &statistics.RegionStats{} - err = apiutil.ReadJSON(res.Body, stats) - suite.NoError(err) - suite.Equal(stats23, stats) + testdata := []struct { + startKey string + endKey string + expect *statistics.RegionStats + }{ + { + startKey: "", + endKey: "", + expect: statsAll, + }, { + startKey: url.QueryEscape("\x01\x02"), + endKey: url.QueryEscape("xyz\x00\x00"), + expect: statsAll, + }, + { + startKey: url.QueryEscape("a"), + endKey: url.QueryEscape("x"), + expect: stats23, + }, + } + + for _, data := range testdata { + for _, query := range []string{"", "count"} { + args := fmt.Sprintf("?start_key=%s&end_key=%s&%s", data.startKey, data.endKey, query) + res, err := testDialClient.Get(statsURL + args) + suite.NoError(err) + defer res.Body.Close() + stats := &statistics.RegionStats{} + err = apiutil.ReadJSON(res.Body, stats) + suite.NoError(err) + suite.Equal(data.expect.Count, stats.Count) + if query != "count" { + suite.Equal(data.expect, stats) + } + } + } } diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 984c37812c1c..c73df22d1e6e 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -1969,6 +1969,13 @@ func (c *RaftCluster) GetRegionStats(startKey, endKey []byte) *statistics.Region return statistics.GetRegionStats(c.core.ScanRange(startKey, endKey, -1)) } +// GetRangeCount returns the number of regions in the range. +func (c *RaftCluster) GetRangeCount(startKey, endKey []byte) *statistics.RegionStats { + stats := &statistics.RegionStats{} + stats.Count = c.core.GetRangeCount(startKey, endKey) + return stats +} + // GetStoresStats returns stores' statistics from cluster. // And it will be unnecessary to filter unhealthy store, because it has been solved in process heartbeat func (c *RaftCluster) GetStoresStats() *statistics.StoresStats { diff --git a/server/core/basic_cluster.go b/server/core/basic_cluster.go index 997434b649f6..e6bc2bb8d756 100644 --- a/server/core/basic_cluster.go +++ b/server/core/basic_cluster.go @@ -366,6 +366,13 @@ func (bc *BasicCluster) ScanRange(startKey, endKey []byte, limit int) []*RegionI return bc.Regions.ScanRange(startKey, endKey, limit) } +// GetRangeCount returns the number of regions that overlap with the range [startKey, endKey). +func (bc *BasicCluster) GetRangeCount(startKey, endKey []byte) int { + bc.Regions.mu.RLock() + defer bc.Regions.mu.RUnlock() + return bc.Regions.GetRangeCount(startKey, endKey) +} + // GetOverlaps returns the regions which are overlapped with the specified region range. func (bc *BasicCluster) GetOverlaps(region *RegionInfo) []*RegionInfo { bc.Regions.mu.RLock() diff --git a/server/core/region.go b/server/core/region.go index cdf961749c4a..524daeb7723a 100644 --- a/server/core/region.go +++ b/server/core/region.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/kvproto/pkg/replication_modepb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/logutil" + "github.com/tikv/pd/pkg/rangetree" "github.com/tikv/pd/pkg/typeutil" "go.uber.org/zap" ) @@ -1174,6 +1175,27 @@ func (r *RegionInfo) GetWriteLoads() []float64 { } } +// GetRangeCount returns the number of regions that overlap with the range [startKey, endKey). +func (r *RegionsInfo) GetRangeCount(startKey, endKey []byte) int { + start := ®ionItem{&RegionInfo{meta: &metapb.Region{StartKey: startKey}}} + end := ®ionItem{&RegionInfo{meta: &metapb.Region{StartKey: endKey}}} + // it returns 0 if startKey is nil. + _, startIndex := r.tree.tree.GetWithIndex(start) + var endIndex int + var item rangetree.RangeItem + // it should return the length of the tree if endKey is nil. + if len(endKey) == 0 { + endIndex = r.tree.tree.Len() - 1 + } else { + item, endIndex = r.tree.tree.GetWithIndex(end) + // it should return the endIndex - 1 if the endKey is the startKey of a region. + if item != nil && bytes.Equal(item.GetStartKey(), endKey) { + endIndex-- + } + } + return endIndex - startIndex + 1 +} + // ScanRange scans regions intersecting [start key, end key), returns at most // `limit` regions. limit <= 0 means no limit. func (r *RegionsInfo) ScanRange(startKey, endKey []byte, limit int) []*RegionInfo { @@ -1185,7 +1207,7 @@ func (r *RegionsInfo) ScanRange(startKey, endKey []byte, limit int) []*RegionInf if limit > 0 && len(res) >= limit { return false } - res = append(res, r.GetRegion(region.GetID())) + res = append(res, region) return true }) return res From 80f0d8ca4d07a6b02e0dd5aa3ed2a96e9d9857ee Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Mon, 31 Oct 2022 10:57:58 +0800 Subject: [PATCH 23/67] cluster: support external timestamp (#5631) close tikv/pd#5637, ref pingcap/tidb#38274 Signed-off-by: lhy1024 --- client/client.go | 34 +++++++ client/go.mod | 2 +- client/go.sum | 4 +- go.mod | 2 +- go.sum | 4 +- pkg/tsoutil/tso.go | 26 +++++- server/cluster/cluster.go | 25 +++++ server/grpc_service.go | 61 +++++++++++- server/server.go | 36 ++++++++ server/storage/endpoint/external_timestamp.go | 54 +++++++++++ server/storage/endpoint/key_path.go | 8 +- server/storage/storage.go | 1 + server/tso/allocator_manager.go | 9 ++ tests/client/go.mod | 2 +- tests/client/go.sum | 5 +- tests/server/cluster/cluster_test.go | 92 +++++++++++++++++++ tools/pd-tso-bench/go.sum | 2 + 17 files changed, 352 insertions(+), 15 deletions(-) create mode 100644 server/storage/endpoint/external_timestamp.go diff --git a/client/client.go b/client/client.go index bc03c24dd082..b7e15fe6eb23 100644 --- a/client/client.go +++ b/client/client.go @@ -130,6 +130,11 @@ type Client interface { // UpdateOption updates the client option. UpdateOption(option DynamicOption, value interface{}) error + // GetExternalTimestamp returns external timestamp + GetExternalTimestamp(ctx context.Context) (uint64, error) + // SetExternalTimestamp sets external timestamp + SetExternalTimestamp(ctx context.Context, timestamp uint64) error + // KeyspaceClient manages keyspace metadata. KeyspaceClient // Close closes the client. @@ -1885,6 +1890,35 @@ func (c *client) WatchGlobalConfig(ctx context.Context) (chan []GlobalConfigItem return globalConfigWatcherCh, err } +func (c *client) GetExternalTimestamp(ctx context.Context) (uint64, error) { + resp, err := c.getClient().GetExternalTimestamp(ctx, &pdpb.GetExternalTimestampRequest{ + Header: c.requestHeader(), + }) + if err != nil { + return 0, err + } + resErr := resp.GetHeader().GetError() + if resErr != nil { + return 0, errors.Errorf("[pd]" + resErr.Message) + } + return resp.GetTimestamp(), nil +} + +func (c *client) SetExternalTimestamp(ctx context.Context, timestamp uint64) error { + resp, err := c.getClient().SetExternalTimestamp(ctx, &pdpb.SetExternalTimestampRequest{ + Header: c.requestHeader(), + Timestamp: timestamp, + }) + if err != nil { + return err + } + resErr := resp.GetHeader().GetError() + if resErr != nil { + return errors.Errorf("[pd]" + resErr.Message) + } + return nil +} + func (c *client) respForErr(observer prometheus.Observer, start time.Time, err error, header *pdpb.ResponseHeader) error { if err != nil || header.GetError() != nil { observer.Observe(time.Since(start).Seconds()) diff --git a/client/go.mod b/client/go.mod index 4401e88b95e8..5978c3372c7b 100644 --- a/client/go.mod +++ b/client/go.mod @@ -6,7 +6,7 @@ require ( github.com/opentracing/opentracing-go v1.2.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad + github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 github.com/prometheus/client_golang v1.11.0 github.com/stretchr/testify v1.7.0 diff --git a/client/go.sum b/client/go.sum index f56e7a130b37..a87ac7297a13 100644 --- a/client/go.sum +++ b/client/go.sum @@ -104,8 +104,8 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= -github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad h1:lGKxsEwdE0pVXzHYD1SQ1vfa3t/bFVU/latrQz8b/w0= -github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= +github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/go.mod b/go.mod index dc013b203dbc..80abf756aa90 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce - github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a + github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 diff --git a/go.sum b/go.sum index 91add432cacb..115e14c7ddb6 100644 --- a/go.sum +++ b/go.sum @@ -417,8 +417,8 @@ github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMt github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a h1:McYxPhA8SHqfUtLfQHHN0fQl4dy93IkhlX4Pp2MKIFA= -github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= +github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= diff --git a/pkg/tsoutil/tso.go b/pkg/tsoutil/tso.go index 4cf77c58ae5a..796012ae0311 100644 --- a/pkg/tsoutil/tso.go +++ b/pkg/tsoutil/tso.go @@ -27,12 +27,18 @@ const ( // ParseTS parses the ts to (physical,logical). func ParseTS(ts uint64) (time.Time, uint64) { - logical := ts & logicalBits - physical := ts >> physicalShiftBits + physical, logical := ParseTSUint64(ts) physicalTime := time.Unix(int64(physical/1000), int64(physical)%1000*time.Millisecond.Nanoseconds()) return physicalTime, logical } +// ParseTSUint64 parses the ts to (physical,logical). +func ParseTSUint64(ts uint64) (physical uint64, logical uint64) { + logical = ts & logicalBits + physical = ts >> physicalShiftBits + return physical, logical +} + // ParseTimestamp parses `pdpb.Timestamp` to `time.Time` func ParseTimestamp(ts pdpb.Timestamp) (time.Time, uint64) { logical := uint64(ts.GetLogical()) @@ -71,3 +77,19 @@ func CompareTimestamp(tsoOne, tsoTwo *pdpb.Timestamp) int { } return -1 } + +// CompareTimestampUint64 is used to compare two timestamps. +// If tsoOne > tsoTwo, returns 1. +// If tsoOne = tsoTwo, returns 0. +// If tsoOne < tsoTwo, returns -1. +func CompareTimestampUint64(tsoOne, tsoTwo uint64) int { + tsoOnePhysical, tsoOneLogical := ParseTSUint64(tsoOne) + tsoTwoPhysical, tsoTwoLogical := ParseTSUint64(tsoTwo) + if tsoOnePhysical > tsoTwoPhysical || (tsoOnePhysical == tsoTwoPhysical && tsoOneLogical > tsoTwoLogical) { + return 1 + } + if tsoOnePhysical == tsoTwoPhysical && tsoOneLogical == tsoTwoLogical { + return 0 + } + return -1 +} diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index c73df22d1e6e..84ffb1652d75 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -119,6 +119,8 @@ type RaftCluster struct { storeConfigManager *config.StoreConfigManager storage storage.Storage minResolvedTS uint64 + externalTS uint64 + // Keep the previous store limit settings when removing a store. prevStoreLimit map[uint64]map[storelimit.Type]float64 @@ -270,6 +272,10 @@ func (c *RaftCluster) Start(s Server) error { c.coordinator = newCoordinator(c.ctx, cluster, s.GetHBStreams()) c.regionStats = statistics.NewRegionStatistics(c.opt, c.ruleManager, c.storeConfigManager) c.limiter = NewStoreLimiter(s.GetPersistOptions()) + c.externalTS, err = c.storage.LoadExternalTS() + if err != nil { + log.Error("load external timestamp meets error", zap.Error(err)) + } c.wg.Add(8) go c.runCoordinator() @@ -2229,6 +2235,25 @@ func (c *RaftCluster) GetMinResolvedTS() uint64 { return c.minResolvedTS } +// GetExternalTS returns the external timestamp. +func (c *RaftCluster) GetExternalTS() uint64 { + c.RLock() + defer c.RUnlock() + if !c.isInitialized() { + return math.MaxUint64 + } + return c.externalTS +} + +// SetExternalTS sets the external timestamp. +func (c *RaftCluster) SetExternalTS(timestamp uint64) error { + c.Lock() + defer c.Unlock() + c.externalTS = timestamp + c.storage.SaveExternalTS(timestamp) + return nil +} + // SetStoreLimit sets a store limit for a given type and rate. func (c *RaftCluster) SetStoreLimit(storeID uint64, typ storelimit.Type, ratePerMin float64) error { old := c.opt.GetScheduleConfig().Clone() diff --git a/server/grpc_service.go b/server/grpc_service.go index 570fd847218b..2165e2e75264 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1541,6 +1541,13 @@ func (s *GrpcServer) incompatibleVersion(tag string) *pdpb.ResponseHeader { }) } +func (s *GrpcServer) invalidValue(msg string) *pdpb.ResponseHeader { + return s.errorHeader(&pdpb.Error{ + Type: pdpb.ErrorType_INVALID_VALUE, + Message: msg, + }) +} + // Only used for the TestLocalAllocatorLeaderChange. var mockLocalAllocatorLeaderChangeFlag = false @@ -2005,8 +2012,8 @@ func (s *GrpcServer) ReportMinResolvedTS(ctx context.Context, request *pdpb.Repo return &pdpb.ReportMinResolvedTsResponse{Header: s.notBootstrappedHeader()}, nil } - storeID := request.StoreId - minResolvedTS := request.MinResolvedTs + storeID := request.GetStoreId() + minResolvedTS := request.GetMinResolvedTs() if err := rc.SetMinResolvedTS(storeID, minResolvedTS); err != nil { return nil, err } @@ -2017,3 +2024,53 @@ func (s *GrpcServer) ReportMinResolvedTS(ctx context.Context, request *pdpb.Repo Header: s.header(), }, nil } + +// SetExternalTimestamp implements gRPC PDServer. +func (s *GrpcServer) SetExternalTimestamp(ctx context.Context, request *pdpb.SetExternalTimestampRequest) (*pdpb.SetExternalTimestampResponse, error) { + forwardedHost := getForwardedHost(ctx) + if !s.isLocalRequest(forwardedHost) { + client, err := s.getDelegateClient(ctx, forwardedHost) + if err != nil { + return nil, err + } + ctx = grpcutil.ResetForwardContext(ctx) + return pdpb.NewPDClient(client).SetExternalTimestamp(ctx, request) + } + + if err := s.validateRequest(request.GetHeader()); err != nil { + return nil, err + } + + timestamp := request.GetTimestamp() + if err := s.SetExternalTS(timestamp); err != nil { + return &pdpb.SetExternalTimestampResponse{Header: s.invalidValue(err.Error())}, nil + } + log.Debug("set external timestamp", + zap.Uint64("timestamp", timestamp)) + return &pdpb.SetExternalTimestampResponse{ + Header: s.header(), + }, nil +} + +// GetExternalTimestamp implements gRPC PDServer. +func (s *GrpcServer) GetExternalTimestamp(ctx context.Context, request *pdpb.GetExternalTimestampRequest) (*pdpb.GetExternalTimestampResponse, error) { + forwardedHost := getForwardedHost(ctx) + if !s.isLocalRequest(forwardedHost) { + client, err := s.getDelegateClient(ctx, forwardedHost) + if err != nil { + return nil, err + } + ctx = grpcutil.ResetForwardContext(ctx) + return pdpb.NewPDClient(client).GetExternalTimestamp(ctx, request) + } + + if err := s.validateRequest(request.GetHeader()); err != nil { + return nil, err + } + + timestamp := s.GetExternalTS() + return &pdpb.GetExternalTimestampResponse{ + Header: s.header(), + Timestamp: timestamp, + }, nil +} diff --git a/server/server.go b/server/server.go index d3ce4fa20027..433be6f1dfc0 100644 --- a/server/server.go +++ b/server/server.go @@ -48,6 +48,7 @@ import ( "github.com/tikv/pd/pkg/logutil" "github.com/tikv/pd/pkg/ratelimit" "github.com/tikv/pd/pkg/systimemon" + "github.com/tikv/pd/pkg/tsoutil" "github.com/tikv/pd/pkg/typeutil" "github.com/tikv/pd/server/cluster" "github.com/tikv/pd/server/config" @@ -1678,3 +1679,38 @@ func (s *Server) UnmarkSnapshotRecovering(ctx context.Context) error { func (s *Server) RecoverAllocID(ctx context.Context, id uint64) error { return s.idAllocator.SetBase(id) } + +// GetGlobalTS returns global tso. +func (s *Server) GetGlobalTS() (uint64, error) { + ts, err := s.tsoAllocatorManager.GetGlobalTSO() + if err != nil { + return 0, err + } + return tsoutil.GenerateTS(ts), nil +} + +// GetExternalTS returns external timestamp. +func (s *Server) GetExternalTS() uint64 { + return s.GetRaftCluster().GetExternalTS() +} + +// SetExternalTS returns external timestamp. +func (s *Server) SetExternalTS(externalTS uint64) error { + globalTS, err := s.GetGlobalTS() + if err != nil { + return err + } + if tsoutil.CompareTimestampUint64(externalTS, globalTS) == 1 { + desc := "the external timestamp should not be larger than global ts" + log.Error(desc, zap.Uint64("request timestamp", externalTS), zap.Uint64("global ts", globalTS)) + return errors.New(desc) + } + currentExternalTS := s.GetRaftCluster().GetExternalTS() + if tsoutil.CompareTimestampUint64(externalTS, currentExternalTS) != 1 { + desc := "the external timestamp should be larger than now" + log.Error(desc, zap.Uint64("request timestamp", externalTS), zap.Uint64("current external timestamp", currentExternalTS)) + return errors.New(desc) + } + s.GetRaftCluster().SetExternalTS(externalTS) + return nil +} diff --git a/server/storage/endpoint/external_timestamp.go b/server/storage/endpoint/external_timestamp.go new file mode 100644 index 000000000000..bc5377d19f49 --- /dev/null +++ b/server/storage/endpoint/external_timestamp.go @@ -0,0 +1,54 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package endpoint + +import ( + "strconv" + + "github.com/tikv/pd/pkg/errs" +) + +// ExternalTimestamp is the external timestamp. +// NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. +type ExternalTimestamp struct { + ExternalTimestamp uint64 `json:"external_timestamp"` +} + +// ExternalTSStorage defines the storage operations on the external timestamp. +type ExternalTSStorage interface { + LoadExternalTS() (uint64, error) + SaveExternalTS(timestamp uint64) error +} + +var _ ExternalTSStorage = (*StorageEndpoint)(nil) + +// LoadExternalTS loads the external timestamp from storage. +func (se *StorageEndpoint) LoadExternalTS() (uint64, error) { + value, err := se.Load(ExternalTimestampPath()) + if err != nil || value == "" { + return 0, err + } + timestamp, err := strconv.ParseUint(value, 16, 64) + if err != nil { + return 0, errs.ErrStrconvParseUint.Wrap(err).GenWithStackByArgs() + } + return timestamp, nil +} + +// SaveExternalTS saves the external timestamp. +func (se *StorageEndpoint) SaveExternalTS(timestamp uint64) error { + value := strconv.FormatUint(timestamp, 16) + return se.Save(ExternalTimestampPath(), value) +} diff --git a/server/storage/endpoint/key_path.go b/server/storage/endpoint/key_path.go index 4800a7967dd9..3d73b3a5d4f9 100644 --- a/server/storage/endpoint/key_path.go +++ b/server/storage/endpoint/key_path.go @@ -34,6 +34,7 @@ const ( customScheduleConfigPath = "scheduler_config" gcWorkerServiceSafePointID = "gc_worker" minResolvedTS = "min_resolved_ts" + externalTimeStamp = "external_timestamp" keyspaceSafePointPrefix = "keyspaces/gc_safepoint" keyspaceGCSafePointSuffix = "gc" keyspacePrefix = "keyspaces" @@ -132,11 +133,16 @@ func gcSafePointServicePath(serviceID string) string { return path.Join(gcSafePointPath(), "service", serviceID) } -// MinResolvedTSPath returns the min resolved ts path +// MinResolvedTSPath returns the min resolved ts path. func MinResolvedTSPath() string { return path.Join(clusterPath, minResolvedTS) } +// ExternalTimestampPath returns the external timestamp path. +func ExternalTimestampPath() string { + return path.Join(clusterPath, externalTimeStamp) +} + // KeySpaceServiceSafePointPrefix returns the prefix of given service's service safe point. // Prefix: /keyspaces/gc_safepoint/{space_id}/service/ func KeySpaceServiceSafePointPrefix(spaceID string) string { diff --git a/server/storage/storage.go b/server/storage/storage.go index 78b8bcd32839..d441dab0f7b8 100644 --- a/server/storage/storage.go +++ b/server/storage/storage.go @@ -39,6 +39,7 @@ type Storage interface { endpoint.ReplicationStatusStorage endpoint.GCSafePointStorage endpoint.MinResolvedTSStorage + endpoint.ExternalTSStorage endpoint.KeySpaceGCSafePointStorage endpoint.KeyspaceStorage } diff --git a/server/tso/allocator_manager.go b/server/tso/allocator_manager.go index bd359bc99b87..8ce1b898287c 100644 --- a/server/tso/allocator_manager.go +++ b/server/tso/allocator_manager.go @@ -1125,6 +1125,15 @@ func (am *AllocatorManager) GetMaxLocalTSO(ctx context.Context) (*pdpb.Timestamp return maxTSO, nil } +// GetGlobalTSO returns global tso. +func (am *AllocatorManager) GetGlobalTSO() (*pdpb.Timestamp, error) { + globalAllocator, err := am.GetAllocator(GlobalDCLocation) + if err != nil { + return nil, err + } + return globalAllocator.(*GlobalTSOAllocator).getCurrentTSO() +} + func (am *AllocatorManager) getGRPCConn(addr string) (*grpc.ClientConn, bool) { am.localAllocatorConn.RLock() defer am.localAllocatorConn.RUnlock() diff --git a/tests/client/go.mod b/tests/client/go.mod index 4cdc8f323e30..64ffd2938a84 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -5,7 +5,7 @@ go 1.18 require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a + github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 github.com/stretchr/testify v1.7.0 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 diff --git a/tests/client/go.sum b/tests/client/go.sum index 4591bd2ef17f..66881f5c1302 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -390,9 +390,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a h1:McYxPhA8SHqfUtLfQHHN0fQl4dy93IkhlX4Pp2MKIFA= -github.com/pingcap/kvproto v0.0.0-20221014081430-26e28e6a281a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= +github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index c791b891c0ec..5235ed9abf42 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -33,6 +33,7 @@ import ( "github.com/tikv/pd/pkg/dashboard" "github.com/tikv/pd/pkg/mock/mockid" "github.com/tikv/pd/pkg/testutil" + "github.com/tikv/pd/pkg/tsoutil" "github.com/tikv/pd/pkg/typeutil" "github.com/tikv/pd/server" "github.com/tikv/pd/server/cluster" @@ -43,6 +44,7 @@ import ( syncer "github.com/tikv/pd/server/region_syncer" "github.com/tikv/pd/server/schedule/operator" "github.com/tikv/pd/server/storage" + "github.com/tikv/pd/server/tso" "github.com/tikv/pd/tests" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -1439,3 +1441,93 @@ func TestTransferLeaderBack(t *testing.T) { re.Equal(meta, rc.GetMetaCluster()) re.Equal(3, rc.GetStoreCount()) } + +func TestExternalTimestamp(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + tc, err := tests.NewTestCluster(ctx, 1) + defer tc.Destroy() + re.NoError(err) + err = tc.RunInitialServers() + re.NoError(err) + tc.WaitLeader() + leaderServer := tc.GetServer(tc.GetLeader()) + grpcPDClient := testutil.MustNewGrpcClient(re, leaderServer.GetAddr()) + clusterID := leaderServer.GetClusterID() + bootstrapCluster(re, clusterID, grpcPDClient) + rc := leaderServer.GetRaftCluster() + store := &metapb.Store{ + Id: 1, + Version: "v6.0.0", + Address: "127.0.0.1:" + strconv.Itoa(int(1)), + } + resp, err := putStore(grpcPDClient, clusterID, store) + re.NoError(err) + re.Equal(pdpb.ErrorType_OK, resp.GetHeader().GetError().GetType()) + id := leaderServer.GetAllocator() + putRegionWithLeader(re, rc, id, 1) + time.Sleep(100 * time.Millisecond) + + ts := uint64(233) + { // case1: set external timestamp + req := &pdpb.SetExternalTimestampRequest{ + Header: testutil.NewRequestHeader(clusterID), + Timestamp: ts, + } + _, err = grpcPDClient.SetExternalTimestamp(context.Background(), req) + re.NoError(err) + + req2 := &pdpb.GetExternalTimestampRequest{ + Header: testutil.NewRequestHeader(clusterID), + } + resp2, err := grpcPDClient.GetExternalTimestamp(context.Background(), req2) + re.NoError(err) + re.Equal(ts, resp2.GetTimestamp()) + } + + { // case2: set external timestamp less than now + req := &pdpb.SetExternalTimestampRequest{ + Header: testutil.NewRequestHeader(clusterID), + Timestamp: ts - 1, + } + _, err = grpcPDClient.SetExternalTimestamp(context.Background(), req) + re.NoError(err) + + req2 := &pdpb.GetExternalTimestampRequest{ + Header: testutil.NewRequestHeader(clusterID), + } + resp2, err := grpcPDClient.GetExternalTimestamp(context.Background(), req2) + re.NoError(err) + re.Equal(ts, resp2.GetTimestamp()) + } + + { // case3: set external timestamp larger than global ts + req := &pdpb.TsoRequest{ + Header: testutil.NewRequestHeader(clusterID), + Count: 1, + DcLocation: tso.GlobalDCLocation, + } + tsoClient, err := grpcPDClient.Tso(ctx) + re.NoError(err) + defer tsoClient.CloseSend() + re.NoError(tsoClient.Send(req)) + resp, err := tsoClient.Recv() + re.NoError(err) + globalTS := tsoutil.GenerateTS(resp.Timestamp) + + req2 := &pdpb.SetExternalTimestampRequest{ + Header: testutil.NewRequestHeader(clusterID), + Timestamp: globalTS + 1, + } + _, err = grpcPDClient.SetExternalTimestamp(context.Background(), req2) + re.NoError(err) + + req3 := &pdpb.GetExternalTimestampRequest{ + Header: testutil.NewRequestHeader(clusterID), + } + resp2, err := grpcPDClient.GetExternalTimestamp(context.Background(), req3) + re.NoError(err) + re.Equal(ts, resp2.GetTimestamp()) + } +} diff --git a/tools/pd-tso-bench/go.sum b/tools/pd-tso-bench/go.sum index 068c3a13b5b4..53b4231cdc6b 100644 --- a/tools/pd-tso-bench/go.sum +++ b/tools/pd-tso-bench/go.sum @@ -106,6 +106,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad h1:lGKxsEwdE0pVXzHYD1SQ1vfa3t/bFVU/latrQz8b/w0= github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= +github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= From 5916a46deca5d467017fc1c5d5c0c8f58726accc Mon Sep 17 00:00:00 2001 From: ShuNing Date: Tue, 1 Nov 2022 11:17:58 +0800 Subject: [PATCH 24/67] merge_checker: use leader info to valid region (#5655) close tikv/pd#4399 merge_checker: use leader info to valid region Signed-off-by: nolouch --- server/schedule/checker/merge_checker.go | 7 ++----- server/schedule/checker/merge_checker_test.go | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/server/schedule/checker/merge_checker.go b/server/schedule/checker/merge_checker.go index 4b4bde625d1a..4dc59340c83b 100644 --- a/server/schedule/checker/merge_checker.go +++ b/server/schedule/checker/merge_checker.go @@ -101,11 +101,8 @@ func (m *MergeChecker) Check(region *core.RegionInfo) []*operator.Operator { } // when pd just started, it will load region meta from region storage, - // but the size for these loaded region info is 0 - // pd don't know the real size of one region until the first heartbeat of the region - // thus here when size is 0, just skip. - if region.GetApproximateSize() == 0 { - checkerCounter.WithLabelValues("merge_checker", "skip").Inc() + if region.GetLeader() == nil { + checkerCounter.WithLabelValues("merge_checker", "skip-uninit-region").Inc() return nil } diff --git a/server/schedule/checker/merge_checker_test.go b/server/schedule/checker/merge_checker_test.go index 01cd18b0af12..eefa07d4fe0a 100644 --- a/server/schedule/checker/merge_checker_test.go +++ b/server/schedule/checker/merge_checker_test.go @@ -74,7 +74,7 @@ func (suite *mergeCheckerTestSuite) SetupTest() { suite.regions = []*core.RegionInfo{ newRegionInfo(1, "", "a", 1, 1, []uint64{101, 1}, []uint64{101, 1}, []uint64{102, 2}), newRegionInfo(2, "a", "t", 200, 200, []uint64{104, 4}, []uint64{103, 1}, []uint64{104, 4}, []uint64{105, 5}), - newRegionInfo(3, "t", "x", 1, 1, []uint64{108, 6}, []uint64{106, 2}, []uint64{107, 5}, []uint64{108, 6}), + newRegionInfo(3, "t", "x", 0, 0, []uint64{108, 6}, []uint64{106, 2}, []uint64{107, 5}, []uint64{108, 6}), newRegionInfo(4, "x", "", 1, 1, []uint64{109, 4}, []uint64{109, 4}), } From 1bda03e9486f35963e7fba1b09eb153b239ffbdd Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 1 Nov 2022 18:10:00 +0800 Subject: [PATCH 25/67] tools: accelerate prepare speed (#5662) ref tikv/pd#5468 Signed-off-by: Ryan Leung --- tools/pd-simulator/simulator/drive.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/pd-simulator/simulator/drive.go b/tools/pd-simulator/simulator/drive.go index 86d798e69bbb..312b4f6a3d4b 100644 --- a/tools/pd-simulator/simulator/drive.go +++ b/tools/pd-simulator/simulator/drive.go @@ -16,14 +16,19 @@ package simulator import ( "context" + "path" + "strconv" "sync" + "time" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/tikv/pd/pkg/typeutil" "github.com/tikv/pd/server/core" "github.com/tikv/pd/tools/pd-simulator/simulator/cases" "github.com/tikv/pd/tools/pd-simulator/simulator/info" "github.com/tikv/pd/tools/pd-simulator/simulator/simutil" + "go.etcd.io/etcd/clientv3" "go.uber.org/zap" ) @@ -86,7 +91,28 @@ func (d *Driver) Prepare() error { } // Setup alloc id. + // TODO: This is a hack way. Once we have reset alloc ID API, we need to replace it. maxID := cases.IDAllocator.GetID() + requestTimeout := 10 * time.Second + etcdTimeout := 3 * time.Second + etcdClient, err := clientv3.New(clientv3.Config{ + Endpoints: []string{d.pdAddr}, + DialTimeout: etcdTimeout, + }) + if err != nil { + return err + } + ctx, cancel = context.WithTimeout(context.Background(), requestTimeout) + clusterID := d.client.GetClusterID(ctx) + rootPath := path.Join("/pd", strconv.FormatUint(clusterID, 10)) + allocIDPath := path.Join(rootPath, "alloc_id") + _, err = etcdClient.Put(ctx, allocIDPath, string(typeutil.Uint64ToBytes(maxID+1000))) + if err != nil { + cancel() + return err + } + cancel() + for { var id uint64 id, err = d.client.AllocID(context.Background()) From 25982e60b78a2b16a6d9be49637b6ce89a19d234 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Tue, 1 Nov 2022 22:04:00 +0800 Subject: [PATCH 26/67] schedule: fix bug for disbaled move leader (#5664) close tikv/pd#5665 Signed-off-by: bufferflies <1045931706@qq.com> --- server/schedule/filter/filters.go | 10 +++++++++- server/schedule/filter/filters_test.go | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/server/schedule/filter/filters.go b/server/schedule/filter/filters.go index df364117d050..2951a46ee160 100644 --- a/server/schedule/filter/filters.go +++ b/server/schedule/filter/filters.go @@ -18,6 +18,7 @@ import ( "strconv" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/log" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/typeutil" "github.com/tikv/pd/server/config" @@ -25,6 +26,7 @@ import ( "github.com/tikv/pd/server/core/storelimit" "github.com/tikv/pd/server/schedule/placement" "github.com/tikv/pd/server/schedule/plan" + "go.uber.org/zap" ) // SelectSourceStores selects stores that be selected as source store from the list. @@ -658,7 +660,13 @@ func (f *ruleLeaderFitFilter) Source(_ *config.PersistOptions, _ *core.StoreInfo return statusOK } -func (f *ruleLeaderFitFilter) Target(options *config.PersistOptions, store *core.StoreInfo) *plan.Status { +func (f *ruleLeaderFitFilter) Target(_ *config.PersistOptions, store *core.StoreInfo) *plan.Status { + targetStoreID := store.GetID() + targetPeer := f.region.GetStorePeer(targetStoreID) + if targetPeer == nil && !f.allowMoveLeader { + log.Warn("ruleLeaderFitFilter couldn't find peer on target Store", zap.Uint64("target-store", store.GetID())) + return statusStoreNotMatchRule + } if f.oldFit.Replace(f.srcLeaderStoreID, store, f.region) { return statusOK } diff --git a/server/schedule/filter/filters_test.go b/server/schedule/filter/filters_test.go index adb7d8aeb696..aef7cec1b8f6 100644 --- a/server/schedule/filter/filters_test.go +++ b/server/schedule/filter/filters_test.go @@ -135,7 +135,13 @@ func TestRuleFitFilter(t *testing.T) { filter := newRuleFitFilter("", testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, 1) re.Equal(testCase.sourceRes, filter.Source(testCluster.GetOpts(), testCluster.GetStore(testCase.storeID)).StatusCode) re.Equal(testCase.targetRes, filter.Target(testCluster.GetOpts(), testCluster.GetStore(testCase.storeID)).StatusCode) + leaderFilter := newRuleLeaderFitFilter("", testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, 1, true) + re.Equal(testCase.targetRes, leaderFilter.Target(testCluster.GetOpts(), testCluster.GetStore(testCase.storeID)).StatusCode) } + + // store-6 is not exist in the peers, so it will not allow transferring leader to store 6. + leaderFilter := newRuleLeaderFitFilter("", testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, 1, false) + re.False(leaderFilter.Target(testCluster.GetOpts(), testCluster.GetStore(6)).IsOK()) } func TestStoreStateFilter(t *testing.T) { From 72e558ea8791ade6add0a07b5040d5f12cbb2d0a Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 2 Nov 2022 12:22:01 +0800 Subject: [PATCH 27/67] schedule: remove unused function (#5666) ref tikv/pd#4399, ref tikv/pd#5523 Signed-off-by: lhy1024 --- server/schedule/placement/fit.go | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/server/schedule/placement/fit.go b/server/schedule/placement/fit.go index 33e0af2dbc95..ac8df67854cd 100644 --- a/server/schedule/placement/fit.go +++ b/server/schedule/placement/fit.go @@ -116,27 +116,6 @@ func (f *RegionFit) GetRegionStores() []*core.StoreInfo { return f.regionStores } -// CompareRegionFit determines the superiority of 2 fits. -// It returns 1 when the first fit result is better. -func CompareRegionFit(a, b *RegionFit) int { - for i := range a.RuleFits { - if i >= len(b.RuleFits) { - break - } - if cmp := compareRuleFit(a.RuleFits[i], b.RuleFits[i]); cmp != 0 { - return cmp - } - } - switch { - case len(a.OrphanPeers) < len(b.OrphanPeers): - return 1 - case len(a.OrphanPeers) > len(b.OrphanPeers): - return -1 - default: - return 0 - } -} - // RuleFit is the result of fitting status of a Rule. type RuleFit struct { Rule *Rule `json:"rule"` From 54ce3173f1335bf877ad37f61b588d785d4ec923 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 2 Nov 2022 16:15:59 +0800 Subject: [PATCH 28/67] tests: fix unstable test `pdctl/TestScheduler` (#5668) close tikv/pd#5667 Signed-off-by: lhy1024 --- tests/pdctl/scheduler/scheduler_test.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index 81f98a6e8da4..953c02934e89 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -17,6 +17,7 @@ package scheduler_test import ( "context" "encoding/json" + "reflect" "testing" "time" @@ -390,8 +391,10 @@ func TestScheduler(t *testing.T) { mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) // cannot set qps as write-peer-priorities mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) - mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) + re.Eventually(func() bool { + mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + return reflect.DeepEqual(expected1, conf1) + }, time.Second*10, time.Millisecond*50) // test remove and add mustExec([]string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) mustExec([]string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) From d407cae7c4db687303e795f474eba35f939b74bf Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 2 Nov 2022 16:24:00 +0800 Subject: [PATCH 29/67] tests: fix unstable test `TestMinResolvedTS` (#5669) close tikv/pd#5589 Signed-off-by: lhy1024 --- server/api/min_resolved_ts_test.go | 26 ++++++++-------- tests/server/cluster/cluster_test.go | 44 +++++++++++++--------------- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/server/api/min_resolved_ts_test.go b/server/api/min_resolved_ts_test.go index 0394ced3cc93..4041a19c5026 100644 --- a/server/api/min_resolved_ts_test.go +++ b/server/api/min_resolved_ts_test.go @@ -16,6 +16,7 @@ package api import ( "fmt" + "reflect" "testing" "time" @@ -72,7 +73,6 @@ func (suite *minResolvedTSTestSuite) TestMinResolvedTS() { // case2: stop run job zero := typeutil.Duration{Duration: 0} suite.setMinResolvedTSPersistenceInterval(zero) - time.Sleep(interval.Duration) // wait sync suite.checkMinResolvedTS(&minResolvedTS{ MinResolvedTS: 0, IsRealTime: false, @@ -81,8 +81,9 @@ func (suite *minResolvedTSTestSuite) TestMinResolvedTS() { // case3: start run job interval = typeutil.Duration{Duration: suite.defaultInterval} suite.setMinResolvedTSPersistenceInterval(interval) - suite.Equal(interval, suite.svr.GetRaftCluster().GetOpts().GetPDServerConfig().MinResolvedTSPersistenceInterval) - time.Sleep(suite.defaultInterval) // wait sync + suite.Eventually(func() bool { + return interval == suite.svr.GetRaftCluster().GetOpts().GetPDServerConfig().MinResolvedTSPersistenceInterval + }, time.Second*10, time.Millisecond*20) suite.checkMinResolvedTS(&minResolvedTS{ MinResolvedTS: 0, IsRealTime: true, @@ -92,7 +93,6 @@ func (suite *minResolvedTSTestSuite) TestMinResolvedTS() { rc := suite.svr.GetRaftCluster() ts := uint64(233) rc.SetMinResolvedTS(1, ts) - time.Sleep(suite.defaultInterval) // wait sync suite.checkMinResolvedTS(&minResolvedTS{ MinResolvedTS: ts, IsRealTime: true, @@ -101,14 +101,12 @@ func (suite *minResolvedTSTestSuite) TestMinResolvedTS() { // case5: stop persist and return last persist value when interval is 0 interval = typeutil.Duration{Duration: 0} suite.setMinResolvedTSPersistenceInterval(interval) - time.Sleep(suite.defaultInterval) // wait sync suite.checkMinResolvedTS(&minResolvedTS{ MinResolvedTS: ts, IsRealTime: false, PersistInterval: interval, }) rc.SetMinResolvedTS(1, ts+1) - time.Sleep(suite.defaultInterval) // wait sync suite.checkMinResolvedTS(&minResolvedTS{ MinResolvedTS: ts, // last persist value IsRealTime: false, @@ -123,11 +121,13 @@ func (suite *minResolvedTSTestSuite) setMinResolvedTSPersistenceInterval(duratio } func (suite *minResolvedTSTestSuite) checkMinResolvedTS(expect *minResolvedTS) { - res, err := testDialClient.Get(suite.url) - suite.NoError(err) - defer res.Body.Close() - listResp := &minResolvedTS{} - err = apiutil.ReadJSON(res.Body, listResp) - suite.NoError(err) - suite.Equal(expect, listResp) + suite.Eventually(func() bool { + res, err := testDialClient.Get(suite.url) + suite.NoError(err) + defer res.Body.Close() + listResp := &minResolvedTS{} + err = apiutil.ReadJSON(res.Body, listResp) + suite.NoError(err) + return reflect.DeepEqual(expect, listResp) + }, time.Second*10, time.Millisecond*20) } diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index 5235ed9abf42..f92eb716f484 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -1252,17 +1252,19 @@ func putRegionWithLeader(re *require.Assertions, rc *cluster.RaftCluster, id id. re.Equal(3, rc.GetStore(storeID).GetLeaderCount()) } -func checkMinResolvedTS(re *require.Assertions, rc *cluster.RaftCluster, expect uint64, interval time.Duration) { - time.Sleep(interval) - ts := rc.GetMinResolvedTS() - re.Equal(expect, ts) +func checkMinResolvedTS(re *require.Assertions, rc *cluster.RaftCluster, expect uint64) { + re.Eventually(func() bool { + ts := rc.GetMinResolvedTS() + return expect == ts + }, time.Second*10, time.Millisecond*50) } -func checkMinResolvedTSFromStorage(re *require.Assertions, rc *cluster.RaftCluster, expect uint64, interval time.Duration) { - time.Sleep(interval) - ts2, err := rc.GetStorage().LoadMinResolvedTS() - re.NoError(err) - re.Equal(expect, ts2) +func checkMinResolvedTSFromStorage(re *require.Assertions, rc *cluster.RaftCluster, expect uint64) { + re.Eventually(func() bool { + ts2, err := rc.GetStorage().LoadMinResolvedTS() + re.NoError(err) + return expect == ts2 + }, time.Second*10, time.Millisecond*50) } func setMinResolvedTSPersistenceInterval(re *require.Assertions, rc *cluster.RaftCluster, svr *server.Server, interval time.Duration) { @@ -1270,7 +1272,6 @@ func setMinResolvedTSPersistenceInterval(re *require.Assertions, rc *cluster.Raf cfg.MinResolvedTSPersistenceInterval = typeutil.NewDuration(interval) err := svr.SetPDServerConfig(*cfg) re.NoError(err) - time.Sleep(time.Millisecond + interval) } func TestMinResolvedTS(t *testing.T) { @@ -1321,7 +1322,6 @@ func TestMinResolvedTS(t *testing.T) { // default run job re.NotEqual(rc.GetOpts().GetMinResolvedTSPersistenceInterval(), 0) setMinResolvedTSPersistenceInterval(re, rc, svr, 0) - time.Sleep(config.DefaultMinResolvedTSPersistenceInterval) // wait sync re.Equal(time.Duration(0), rc.GetOpts().GetMinResolvedTSPersistenceInterval()) // case1: cluster is no initialized @@ -1335,14 +1335,13 @@ func TestMinResolvedTS(t *testing.T) { // case2: add leader peer to store1 but no run job // min resolved ts should be zero putRegionWithLeader(re, rc, id, store1) - checkMinResolvedTS(re, rc, 0, cluster.DefaultMinResolvedTSPersistenceInterval) + checkMinResolvedTS(re, rc, 0) // case3: add leader peer to store1 and run job // min resolved ts should be store1TS - interval := time.Millisecond - setMinResolvedTSPersistenceInterval(re, rc, svr, interval) - checkMinResolvedTS(re, rc, store1TS, interval) - checkMinResolvedTSFromStorage(re, rc, store1TS, interval) + setMinResolvedTSPersistenceInterval(re, rc, svr, time.Millisecond) + checkMinResolvedTS(re, rc, store1TS) + checkMinResolvedTSFromStorage(re, rc, store1TS) // case4: add tiflash store // min resolved ts should no change @@ -1357,16 +1356,15 @@ func TestMinResolvedTS(t *testing.T) { // case6: set store1 to tombstone // min resolved ts should change to store 3 resetStoreState(re, rc, store1, metapb.StoreState_Tombstone) - time.Sleep(interval) // wait sync - checkMinResolvedTS(re, rc, store3TS, interval) - checkMinResolvedTSFromStorage(re, rc, store3TS, interval) + checkMinResolvedTS(re, rc, store3TS) + checkMinResolvedTSFromStorage(re, rc, store3TS) // case7: add a store with leader peer but no report min resolved ts // min resolved ts should be no change store4 := addStoreAndCheckMinResolvedTS(re, false /* not tiflash */, 0, store3TS) putRegionWithLeader(re, rc, id, store4) - checkMinResolvedTS(re, rc, store3TS, interval) - checkMinResolvedTSFromStorage(re, rc, store3TS, interval) + checkMinResolvedTS(re, rc, store3TS) + checkMinResolvedTSFromStorage(re, rc, store3TS) resetStoreState(re, rc, store4, metapb.StoreState_Tombstone) // case8: set min resolved ts persist interval to zero @@ -1376,9 +1374,9 @@ func TestMinResolvedTS(t *testing.T) { store5 := addStoreAndCheckMinResolvedTS(re, false /* not tiflash */, store5TS, store3TS) resetStoreState(re, rc, store3, metapb.StoreState_Tombstone) putRegionWithLeader(re, rc, id, store5) - checkMinResolvedTS(re, rc, store3TS, interval) + checkMinResolvedTS(re, rc, store3TS) setMinResolvedTSPersistenceInterval(re, rc, svr, time.Millisecond) - checkMinResolvedTS(re, rc, store5TS, interval) + checkMinResolvedTS(re, rc, store5TS) } // See https://github.com/tikv/pd/issues/4941 From 540b2447e6d2e66f7336bcb7ecb25fa747e4cb87 Mon Sep 17 00:00:00 2001 From: "Reg [bot]" <86050514+tidb-dashboard-bot@users.noreply.github.com> Date: Thu, 3 Nov 2022 10:30:00 +0800 Subject: [PATCH 30/67] Update TiDB Dashboard to v2022.11.02.1 [master] (#5671) ref tikv/pd#4257 Signed-off-by: tidb-dashboard-bot Co-authored-by: tidb-dashboard-bot --- go.mod | 2 +- go.sum | 4 ++-- tests/client/go.mod | 2 +- tests/client/go.sum | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 80abf756aa90..bbaadb64d695 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d - github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 + github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d github.com/prometheus/client_golang v1.1.0 github.com/prometheus/common v0.6.0 github.com/sasha-s/go-deadlock v0.2.0 diff --git a/go.sum b/go.sum index 115e14c7ddb6..92abde26b910 100644 --- a/go.sum +++ b/go.sum @@ -427,8 +427,8 @@ github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/y github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM= -github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 h1:kL1CW5qsn459kHZ2YoBYb+YOSWjSlshk55YP/XNQNWo= -github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594/go.mod h1:EVq5HOrxNNUStmD8FV+Glki4MDA6s3k9rg6kb1YgcBQ= +github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d h1:qH0xCmmBSRgWV71c753o/1FTtBOWJSk78dBsqPQ4oC4= +github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d/go.mod h1:EVq5HOrxNNUStmD8FV+Glki4MDA6s3k9rg6kb1YgcBQ= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/tests/client/go.mod b/tests/client/go.mod index 64ffd2938a84..fb7bd3779e17 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -94,7 +94,7 @@ require ( github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d // indirect - github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 // indirect + github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect diff --git a/tests/client/go.sum b/tests/client/go.sum index 66881f5c1302..f72acf175291 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -400,8 +400,8 @@ github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/y github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM= -github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594 h1:kL1CW5qsn459kHZ2YoBYb+YOSWjSlshk55YP/XNQNWo= -github.com/pingcap/tidb-dashboard v0.0.0-20220728104842-3743e533b594/go.mod h1:EVq5HOrxNNUStmD8FV+Glki4MDA6s3k9rg6kb1YgcBQ= +github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d h1:qH0xCmmBSRgWV71c753o/1FTtBOWJSk78dBsqPQ4oC4= +github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d/go.mod h1:EVq5HOrxNNUStmD8FV+Glki4MDA6s3k9rg6kb1YgcBQ= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= From 20efeca054067da23a502af4d6e7fa694baacbec Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 3 Nov 2022 11:28:00 +0800 Subject: [PATCH 31/67] config: cache `ParseMbFrom` call to speed up region heartbeat (#5650) close tikv/pd#5649 Signed-off-by: bufferflies <1045931706@qq.com> --- server/config/store_config.go | 45 ++++++++++++------- server/config/store_config_test.go | 10 ++++- server/schedule/checker/merge_checker_test.go | 5 ++- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/server/config/store_config.go b/server/config/store_config.go index c370742ce4fe..960ea6688e79 100644 --- a/server/config/store_config.go +++ b/server/config/store_config.go @@ -48,6 +48,10 @@ var ( // nolint type StoreConfig struct { Coprocessor `json:"coprocessor"` + + RegionMaxSizeMB uint64 `json:"_"` + RegionSplitSizeMB uint64 `json:"_"` + RegionBucketSizeMB uint64 `json:"_"` } // Coprocessor is the config of coprocessor. @@ -63,6 +67,11 @@ type Coprocessor struct { RegionBucketSize string `json:"region-bucket-size"` } +// Equal returns true if the two configs are equal. +func (c *StoreConfig) Equal(other *StoreConfig) bool { + return reflect.DeepEqual(c.Coprocessor, other.Coprocessor) +} + // String implements fmt.Stringer interface. func (c *StoreConfig) String() string { data, err := json.MarshalIndent(c, "", " ") @@ -74,23 +83,23 @@ func (c *StoreConfig) String() string { // GetRegionMaxSize returns the max region size in MB func (c *StoreConfig) GetRegionMaxSize() uint64 { - if c == nil || len(c.Coprocessor.RegionMaxSize) == 0 { + if c == nil || len(c.RegionMaxSize) == 0 { return defaultRegionMaxSize } - return typeutil.ParseMBFromText(c.Coprocessor.RegionMaxSize, defaultRegionMaxSize) + return c.RegionMaxSizeMB } // GetRegionSplitSize returns the region split size in MB func (c *StoreConfig) GetRegionSplitSize() uint64 { - if c == nil || len(c.Coprocessor.RegionSplitSize) == 0 { + if c == nil || len(c.RegionSplitSize) == 0 { return defaultRegionSplitSize } - return typeutil.ParseMBFromText(c.Coprocessor.RegionSplitSize, defaultRegionSplitSize) + return c.RegionSplitSizeMB } // GetRegionSplitKeys returns the region split keys func (c *StoreConfig) GetRegionSplitKeys() uint64 { - if c == nil || c.Coprocessor.RegionSplitKeys == 0 { + if c == nil || c.RegionSplitKeys == 0 { return defaultRegionSplitKey } return uint64(c.Coprocessor.RegionSplitKeys) @@ -98,10 +107,10 @@ func (c *StoreConfig) GetRegionSplitKeys() uint64 { // GetRegionMaxKeys returns the region split keys func (c *StoreConfig) GetRegionMaxKeys() uint64 { - if c == nil || c.Coprocessor.RegionMaxKeys == 0 { + if c == nil || c.RegionMaxKeys == 0 { return defaultRegionMaxKey } - return uint64(c.Coprocessor.RegionMaxKeys) + return uint64(c.RegionMaxKeys) } // IsEnableRegionBucket return true if the region bucket is enabled. @@ -117,10 +126,10 @@ func (c *StoreConfig) GetRegionBucketSize() uint64 { if c == nil || !c.Coprocessor.EnableRegionBucket { return 0 } - if len(c.Coprocessor.RegionBucketSize) == 0 { + if len(c.RegionBucketSize) == 0 { return defaultBucketSize } - return typeutil.ParseMBFromText(c.Coprocessor.RegionBucketSize, defaultBucketSize) + return c.RegionBucketSizeMB } // CheckRegionSize return error if the smallest region's size is less than mergeSize @@ -188,13 +197,20 @@ func (m *StoreConfigManager) ObserveConfig(address string) error { return err } old := m.GetStoreConfig() - if cfg != nil && !reflect.DeepEqual(cfg, old) { + if cfg != nil && !old.Equal(cfg) { log.Info("sync the store config successful", zap.String("store-address", address), zap.String("store-config", cfg.String())) - m.config.Store(cfg) + m.update(cfg) } return nil } +func (m *StoreConfigManager) update(cfg *StoreConfig) { + cfg.RegionMaxSizeMB = typeutil.ParseMBFromText(cfg.RegionMaxSize, defaultRegionMaxSize) + cfg.RegionSplitSizeMB = typeutil.ParseMBFromText(cfg.RegionSplitSize, defaultRegionSplitSize) + cfg.RegionBucketSizeMB = typeutil.ParseMBFromText(cfg.RegionBucketSize, defaultBucketSize) + m.config.Store(cfg) +} + // GetStoreConfig returns the current store configuration. func (m *StoreConfigManager) GetStoreConfig() *StoreConfig { if m == nil { @@ -257,10 +273,7 @@ func (f *FakeSource) GetConfig(url string) (*StoreConfig, error) { if !slice.Contains(f.whiteList, url) { return nil, fmt.Errorf("[url:%s] is not in white list", url) } - config := &StoreConfig{ - Coprocessor{ - RegionMaxSize: "10MiB", - }, - } + config := &StoreConfig{} + config.RegionMaxSize = "10MiB" return config, nil } diff --git a/server/config/store_config_test.go b/server/config/store_config_test.go index 8dd29bede9a8..6916fedc9295 100644 --- a/server/config/store_config_test.go +++ b/server/config/store_config_test.go @@ -26,6 +26,7 @@ import ( func TestTiKVConfig(t *testing.T) { re := require.New(t) + m := NewStoreConfigManager(nil) // case1: big region. { body := `{ "coprocessor": { @@ -40,7 +41,7 @@ func TestTiKVConfig(t *testing.T) { }}` var config StoreConfig re.NoError(json.Unmarshal([]byte(body), &config)) - + m.update(&config) re.Equal(uint64(144000000), config.GetRegionMaxKeys()) re.Equal(uint64(96000000), config.GetRegionSplitKeys()) re.Equal(15*units.GiB/units.MiB, int(config.GetRegionMaxSize())) @@ -67,6 +68,11 @@ func TestUpdateConfig(t *testing.T) { manager.ObserveConfig("tidb.com") re.Equal(uint64(10), manager.GetStoreConfig().GetRegionMaxSize()) + // case2: the config should not update if config is same expect some ignore field. + c, err := manager.source.GetConfig("tidb.com") + re.NoError(err) + re.True(manager.GetStoreConfig().Equal(c)) + client := &http.Client{ Transport: &http.Transport{ DisableKeepAlives: true, @@ -79,6 +85,7 @@ func TestUpdateConfig(t *testing.T) { func TestParseConfig(t *testing.T) { re := require.New(t) + m := NewStoreConfigManager(nil) body := ` { "coprocessor":{ @@ -99,6 +106,7 @@ func TestParseConfig(t *testing.T) { var config StoreConfig re.NoError(json.Unmarshal([]byte(body), &config)) + m.update(&config) re.Equal(uint64(96), config.GetRegionBucketSize()) } diff --git a/server/schedule/checker/merge_checker_test.go b/server/schedule/checker/merge_checker_test.go index eefa07d4fe0a..ffcf3a75acf9 100644 --- a/server/schedule/checker/merge_checker_test.go +++ b/server/schedule/checker/merge_checker_test.go @@ -104,11 +104,12 @@ func (suite *mergeCheckerTestSuite) TestBasic() { // it can merge if the max region size of the store is greater than the target region size. config := suite.cluster.GetStoreConfig() - config.RegionMaxSize = "10Gib" + config.RegionMaxSize = "144MiB" + config.RegionMaxSizeMB = 10 * 1024 ops = suite.mc.Check(suite.regions[2]) suite.NotNil(ops) - config.RegionMaxSize = "144Mib" + config.RegionMaxSizeMB = 144 ops = suite.mc.Check(suite.regions[2]) suite.Nil(ops) // change the size back From 6f0fb32fbe2d53eddb072f84c21f869da3af34a4 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 3 Nov 2022 16:40:00 +0800 Subject: [PATCH 32/67] tools: support profiling simulator (#5674) ref tikv/pd#5468 Signed-off-by: Ryan Leung --- tools/pd-simulator/main.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/pd-simulator/main.go b/tools/pd-simulator/main.go index 24cf099b90c1..48602d10aa92 100644 --- a/tools/pd-simulator/main.go +++ b/tools/pd-simulator/main.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "net/http" + "net/http/pprof" "os" "os/signal" "syscall" @@ -97,7 +98,7 @@ func main() { func run(simCase string, simConfig *simulator.SimConfig) { if *pdAddr != "" { - go runMetrics() + go runHTTPServer() simStart(*pdAddr, simCase, simConfig) } else { local, clean := NewSingleServer(context.Background(), simConfig) @@ -115,8 +116,17 @@ func run(simCase string, simConfig *simulator.SimConfig) { } } -func runMetrics() { +func runHTTPServer() { http.Handle("/metrics", promhttp.Handler()) + // profile API + http.HandleFunc("/pprof/profile", pprof.Profile) + http.HandleFunc("/pprof/trace", pprof.Trace) + http.HandleFunc("/pprof/symbol", pprof.Symbol) + http.Handle("/pprof/heap", pprof.Handler("heap")) + http.Handle("/pprof/mutex", pprof.Handler("mutex")) + http.Handle("/pprof/allocs", pprof.Handler("allocs")) + http.Handle("/pprof/block", pprof.Handler("block")) + http.Handle("/pprof/goroutine", pprof.Handler("goroutine")) // nolint http.ListenAndServe(*statusAddress, nil) } From 6fcb52805d28ba15913d0d726d5347489c5038a4 Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Thu, 3 Nov 2022 17:22:01 +0800 Subject: [PATCH 33/67] schedule: Add leader filter for scatter (#5663) close tikv/pd#5622 schedule: Add leader filter for scatter Signed-off-by: Cabinfever_B Co-authored-by: Ti Chi Robot --- server/schedule/region_scatterer.go | 16 +++-- server/schedule/region_scatterer_test.go | 86 ++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/server/schedule/region_scatterer.go b/server/schedule/region_scatterer.go index d3a610a32743..97b46375e4ec 100644 --- a/server/schedule/region_scatterer.go +++ b/server/schedule/region_scatterer.go @@ -337,7 +337,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string) * // FIXME: target leader only considers the ordinary stores, maybe we need to consider the // special engine stores if the engine supports to become a leader. But now there is only // one engine, tiflash, which does not support the leader, so don't consider it for now. - targetLeader := r.selectAvailableLeaderStore(group, targetPeers, r.ordinaryEngine) + targetLeader := r.selectAvailableLeaderStore(group, region, targetPeers, r.ordinaryEngine) if targetLeader == 0 { scatterCounter.WithLabelValues("no-leader", "").Inc() return nil @@ -457,16 +457,22 @@ func (r *RegionScatterer) selectStore(group string, peer *metapb.Peer, sourceSto } // selectAvailableLeaderStore select the target leader store from the candidates. The candidates would be collected by -// the existed peers store depended on the leader counts in the group level. -func (r *RegionScatterer) selectAvailableLeaderStore(group string, peers map[uint64]*metapb.Peer, context engineContext) uint64 { +// the existed peers store depended on the leader counts in the group level. Please use this func before scatter spacial engines. +func (r *RegionScatterer) selectAvailableLeaderStore(group string, region *core.RegionInfo, peers map[uint64]*metapb.Peer, context engineContext) uint64 { + sourceStore := r.cluster.GetStore(region.GetLeader().GetStoreId()) + if sourceStore == nil { + log.Error("failed to get the store", zap.Uint64("store-id", region.GetLeader().GetStoreId()), errs.ZapError(errs.ErrGetSourceStore)) + return 0 + } leaderCandidateStores := make([]uint64, 0) + // use PlacementLeaderSafeguard for filtering follower and learner in rule + filter := filter.NewPlacementLeaderSafeguard(r.name, r.cluster.GetOpts(), r.cluster.GetBasicCluster(), r.cluster.GetRuleManager(), region, sourceStore, true /*allowMoveLeader*/) for storeID := range peers { store := r.cluster.GetStore(storeID) if store == nil { return 0 } - engine := store.GetLabelValue(core.EngineKey) - if len(engine) < 1 { + if filter == nil || filter.Target(r.cluster.GetOpts(), store).IsOK() { leaderCandidateStores = append(leaderCandidateStores, storeID) } } diff --git a/server/schedule/region_scatterer_test.go b/server/schedule/region_scatterer_test.go index 3c636e7cc729..8e88c5555bea 100644 --- a/server/schedule/region_scatterer_test.go +++ b/server/schedule/region_scatterer_test.go @@ -569,6 +569,92 @@ func TestRegionFromDifferentGroups(t *testing.T) { check(scatterer.ordinaryEngine.selectedPeer) } +func TestRegionHasLearner(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + opt := config.NewTestOptions() + tc := mockcluster.NewCluster(ctx, opt) + stream := hbstream.NewTestHeartbeatStreams(ctx, tc.ID, tc, false) + oc := NewOperatorController(ctx, tc, stream) + // Add 8 stores. + voterCount := uint64(6) + storeCount := uint64(8) + for i := uint64(1); i <= voterCount; i++ { + tc.AddLabelsStore(i, 0, map[string]string{"zone": "z1"}) + } + for i := voterCount + 1; i <= 8; i++ { + tc.AddLabelsStore(i, 0, map[string]string{"zone": "z2"}) + } + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 3, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "zone", + Op: placement.In, + Values: []string{"z1"}, + }, + }, + }) + tc.RuleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "learner", + Role: placement.Learner, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "zone", + Op: placement.In, + Values: []string{"z2"}, + }, + }, + }) + scatterer := NewRegionScatterer(ctx, tc, oc) + regionCount := 50 + for i := 1; i <= regionCount; i++ { + _, err := scatterer.Scatter(tc.AddRegionWithLearner(uint64(i), uint64(1), []uint64{uint64(2), uint64(3)}, []uint64{7}), "group") + re.NoError(err) + } + check := func(ss *selectedStores) { + max := uint64(0) + min := uint64(math.MaxUint64) + for i := uint64(1); i <= max; i++ { + count := ss.TotalCountByStore(i) + if count > max { + max = count + } + if count < min { + min = count + } + } + re.LessOrEqual(max-min, uint64(2)) + } + check(scatterer.ordinaryEngine.selectedPeer) + checkLeader := func(ss *selectedStores) { + max := uint64(0) + min := uint64(math.MaxUint64) + for i := uint64(1); i <= voterCount; i++ { + count := ss.TotalCountByStore(i) + if count > max { + max = count + } + if count < min { + min = count + } + } + re.LessOrEqual(max-2, uint64(regionCount)/voterCount) + re.LessOrEqual(min-1, uint64(regionCount)/voterCount) + for i := voterCount + 1; i <= storeCount; i++ { + count := ss.TotalCountByStore(i) + re.LessOrEqual(count, uint64(0)) + } + } + checkLeader(scatterer.ordinaryEngine.selectedLeader) +} + // TestSelectedStores tests if the peer count has changed due to the picking strategy. // Ref https://github.com/tikv/pd/issues/4565 func TestSelectedStores(t *testing.T) { From ce7b13931c0be51eaf01c69d33f83530d820a3eb Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 4 Nov 2022 11:06:01 +0800 Subject: [PATCH 34/67] schedulers: region fit once in one attemption. (#5640) close tikv/pd#5537 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- server/schedule/filter/filters.go | 13 +++++++++---- server/schedule/filter/filters_test.go | 8 ++++---- server/schedule/filter/region_filters.go | 20 ++++++++++++++++---- server/schedule/region_scatterer.go | 2 +- server/schedulers/balance_region.go | 6 +++++- server/schedulers/grant_hot_region.go | 2 +- server/schedulers/hot_region.go | 2 +- server/schedulers/shuffle_hot_region.go | 2 +- server/schedulers/shuffle_region.go | 2 +- server/schedulers/utils.go | 2 ++ 10 files changed, 41 insertions(+), 18 deletions(-) diff --git a/server/schedule/filter/filters.go b/server/schedule/filter/filters.go index 2951a46ee160..b663730d50d8 100644 --- a/server/schedule/filter/filters.go +++ b/server/schedule/filter/filters.go @@ -584,13 +584,17 @@ type ruleFitFilter struct { // newRuleFitFilter creates a filter that ensures after replace a peer with new // one, the isolation level will not decrease. Its function is the same as // distinctScoreFilter but used when placement rules is enabled. -func newRuleFitFilter(scope string, cluster *core.BasicCluster, ruleManager *placement.RuleManager, region *core.RegionInfo, oldStoreID uint64) Filter { +func newRuleFitFilter(scope string, cluster *core.BasicCluster, ruleManager *placement.RuleManager, + region *core.RegionInfo, oldFit *placement.RegionFit, oldStoreID uint64) Filter { + if oldFit == nil { + oldFit = ruleManager.FitRegion(cluster, region) + } return &ruleFitFilter{ scope: scope, cluster: cluster, ruleManager: ruleManager, region: region, - oldFit: ruleManager.FitRegion(cluster, region), + oldFit: oldFit, srcStore: oldStoreID, } } @@ -679,9 +683,10 @@ func (f *ruleLeaderFitFilter) GetSourceStoreID() uint64 { // NewPlacementSafeguard creates a filter that ensures after replace a peer with new // peer, the placement restriction will not become worse. -func NewPlacementSafeguard(scope string, opt *config.PersistOptions, cluster *core.BasicCluster, ruleManager *placement.RuleManager, region *core.RegionInfo, sourceStore *core.StoreInfo) Filter { +func NewPlacementSafeguard(scope string, opt *config.PersistOptions, cluster *core.BasicCluster, ruleManager *placement.RuleManager, + region *core.RegionInfo, sourceStore *core.StoreInfo, oldFit *placement.RegionFit) Filter { if opt.IsPlacementRulesEnabled() { - return newRuleFitFilter(scope, cluster, ruleManager, region, sourceStore.GetID()) + return newRuleFitFilter(scope, cluster, ruleManager, region, oldFit, sourceStore.GetID()) } return NewLocationSafeguard(scope, opt.GetLocationLabels(), cluster.GetRegionStores(region), sourceStore) } diff --git a/server/schedule/filter/filters_test.go b/server/schedule/filter/filters_test.go index aef7cec1b8f6..19edef830a9b 100644 --- a/server/schedule/filter/filters_test.go +++ b/server/schedule/filter/filters_test.go @@ -132,7 +132,7 @@ func TestRuleFitFilter(t *testing.T) { testCluster.AddLabelsStore(testCase.storeID, testCase.regionCount, testCase.labels) } for _, testCase := range testCases { - filter := newRuleFitFilter("", testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, 1) + filter := newRuleFitFilter("", testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, nil, 1) re.Equal(testCase.sourceRes, filter.Source(testCluster.GetOpts(), testCluster.GetStore(testCase.storeID)).StatusCode) re.Equal(testCase.targetRes, filter.Target(testCluster.GetOpts(), testCluster.GetStore(testCase.storeID)).StatusCode) leaderFilter := newRuleLeaderFitFilter("", testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, 1, true) @@ -344,10 +344,10 @@ func TestPlacementGuard(t *testing.T) { store := testCluster.GetStore(1) re.IsType(NewLocationSafeguard("", []string{"zone"}, testCluster.GetRegionStores(region), store), - NewPlacementSafeguard("", testCluster.GetOpts(), testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, store)) + NewPlacementSafeguard("", testCluster.GetOpts(), testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, store, nil)) testCluster.SetEnablePlacementRules(true) - re.IsType(newRuleFitFilter("", testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, 1), - NewPlacementSafeguard("", testCluster.GetOpts(), testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, store)) + re.IsType(newRuleFitFilter("", testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, nil, 1), + NewPlacementSafeguard("", testCluster.GetOpts(), testCluster.GetBasicCluster(), testCluster.GetRuleManager(), region, store, nil)) } func TestSpecialUseFilter(t *testing.T) { diff --git a/server/schedule/filter/region_filters.go b/server/schedule/filter/region_filters.go index 75d97b8e6462..7b2613583452 100644 --- a/server/schedule/filter/region_filters.go +++ b/server/schedule/filter/region_filters.go @@ -17,6 +17,7 @@ package filter import ( "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/server/core" + "github.com/tikv/pd/server/schedule/placement" "github.com/tikv/pd/server/schedule/plan" ) @@ -94,20 +95,31 @@ func (f *regionDownFilter) Select(region *core.RegionInfo) *plan.Status { return statusOK } -type regionReplicatedFilter struct { +// RegionReplicatedFilter filters all unreplicated regions. +type RegionReplicatedFilter struct { cluster regionHealthCluster + fit *placement.RegionFit } // NewRegionReplicatedFilter creates a RegionFilter that filters all unreplicated regions. func NewRegionReplicatedFilter(cluster regionHealthCluster) RegionFilter { - return ®ionReplicatedFilter{cluster: cluster} + return &RegionReplicatedFilter{cluster: cluster} } -func (f *regionReplicatedFilter) Select(region *core.RegionInfo) *plan.Status { +// GetFit returns the region fit. +func (f *RegionReplicatedFilter) GetFit() *placement.RegionFit { + return f.fit +} + +// Select returns Ok if the given region satisfy the replication. +// it will cache the lasted region fit if the region satisfy the replication. +func (f *RegionReplicatedFilter) Select(region *core.RegionInfo) *plan.Status { if f.cluster.GetOpts().IsPlacementRulesEnabled() { - if !isRegionPlacementRuleSatisfied(f.cluster, region) { + fit := f.cluster.GetRuleManager().FitRegion(f.cluster, region) + if !fit.IsSatisfied() { return statusRegionNotMatchRule } + f.fit = fit return statusOK } if !isRegionReplicasSatisfied(f.cluster, region) { diff --git a/server/schedule/region_scatterer.go b/server/schedule/region_scatterer.go index 97b46375e4ec..0bd5e4564534 100644 --- a/server/schedule/region_scatterer.go +++ b/server/schedule/region_scatterer.go @@ -396,7 +396,7 @@ func (r *RegionScatterer) selectCandidates(region *core.RegionInfo, sourceStoreI filters := []filter.Filter{ filter.NewExcludedFilter(r.name, nil, selectedStores), } - scoreGuard := filter.NewPlacementSafeguard(r.name, r.cluster.GetOpts(), r.cluster.GetBasicCluster(), r.cluster.GetRuleManager(), region, sourceStore) + scoreGuard := filter.NewPlacementSafeguard(r.name, r.cluster.GetOpts(), r.cluster.GetBasicCluster(), r.cluster.GetRuleManager(), region, sourceStore, nil) for _, filterFunc := range context.filterFuncs { filters = append(filters, filterFunc()) } diff --git a/server/schedulers/balance_region.go b/server/schedulers/balance_region.go index 8209214691fd..8b0c21ab8cfb 100644 --- a/server/schedulers/balance_region.go +++ b/server/schedulers/balance_region.go @@ -227,6 +227,9 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) continue } solver.step++ + // the replica filter will cache the last region fit and the select one will only pict the first one region that + // satisfy all the filters, so the region fit must belong the scheduled region. + solver.fit = replicaFilter.(*filter.RegionReplicatedFilter).GetFit() if op := s.transferPeer(solver, collector, sourceStores[sourceIndex+1:], faultTargets); op != nil { s.retryQuota.ResetLimit(solver.source) op.Counters = append(op.Counters, schedulerCounter.WithLabelValues(s.GetName(), "new-operator")) @@ -251,7 +254,8 @@ func (s *balanceRegionScheduler) transferPeer(solver *solver, collector *plan.Co // the more expensive the filter is, the later it should be placed. filters := []filter.Filter{ filter.NewExcludedFilter(s.GetName(), nil, excludeTargets), - filter.NewPlacementSafeguard(s.GetName(), solver.GetOpts(), solver.GetBasicCluster(), solver.GetRuleManager(), solver.region, solver.source), + filter.NewPlacementSafeguard(s.GetName(), solver.GetOpts(), solver.GetBasicCluster(), solver.GetRuleManager(), + solver.region, solver.source, solver.fit), } candidates := filter.NewCandidates(dstStores).FilterTarget(solver.GetOpts(), collector, s.filterCounter, filters...) if len(candidates.Stores) != 0 { diff --git a/server/schedulers/grant_hot_region.go b/server/schedulers/grant_hot_region.go index e2927631f20f..f89ec0ebe7bc 100644 --- a/server/schedulers/grant_hot_region.go +++ b/server/schedulers/grant_hot_region.go @@ -352,7 +352,7 @@ func (s *grantHotRegionScheduler) transfer(cluster schedule.Cluster, regionID ui return nil, errs.ErrStoreNotFound } filters := []filter.Filter{ - filter.NewPlacementSafeguard(s.GetName(), cluster.GetOpts(), cluster.GetBasicCluster(), cluster.GetRuleManager(), srcRegion, srcStore), + filter.NewPlacementSafeguard(s.GetName(), cluster.GetOpts(), cluster.GetBasicCluster(), cluster.GetRuleManager(), srcRegion, srcStore, nil), } destStoreIDs := make([]uint64, 0, len(s.conf.StoreIDs)) diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index c53dff369ab1..4386d5daa267 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -877,7 +877,7 @@ func (bs *balanceSolver) filterDstStores() map[uint64]*statistics.StoreLoadDetai &filter.StoreStateFilter{ActionScope: bs.sche.GetName(), MoveRegion: true}, filter.NewExcludedFilter(bs.sche.GetName(), bs.cur.region.GetStoreIDs(), bs.cur.region.GetStoreIDs()), filter.NewSpecialUseFilter(bs.sche.GetName(), filter.SpecialUseHotRegion), - filter.NewPlacementSafeguard(bs.sche.GetName(), bs.GetOpts(), bs.GetBasicCluster(), bs.GetRuleManager(), bs.cur.region, srcStore), + filter.NewPlacementSafeguard(bs.sche.GetName(), bs.GetOpts(), bs.GetBasicCluster(), bs.GetRuleManager(), bs.cur.region, srcStore, nil), } for _, detail := range bs.stLoadDetail { candidates = append(candidates, detail) diff --git a/server/schedulers/shuffle_hot_region.go b/server/schedulers/shuffle_hot_region.go index 81fb09510a4e..de1eb6fefbd6 100644 --- a/server/schedulers/shuffle_hot_region.go +++ b/server/schedulers/shuffle_hot_region.go @@ -181,7 +181,7 @@ func (s *shuffleHotRegionScheduler) randomSchedule(cluster schedule.Cluster, loa filters := []filter.Filter{ &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true}, filter.NewExcludedFilter(s.GetName(), srcRegion.GetStoreIDs(), srcRegion.GetStoreIDs()), - filter.NewPlacementSafeguard(s.GetName(), cluster.GetOpts(), cluster.GetBasicCluster(), cluster.GetRuleManager(), srcRegion, srcStore), + filter.NewPlacementSafeguard(s.GetName(), cluster.GetOpts(), cluster.GetBasicCluster(), cluster.GetRuleManager(), srcRegion, srcStore, nil), } stores := cluster.GetStores() destStoreIDs := make([]uint64, 0, len(stores)) diff --git a/server/schedulers/shuffle_region.go b/server/schedulers/shuffle_region.go index acd822a39024..02064038df2d 100644 --- a/server/schedulers/shuffle_region.go +++ b/server/schedulers/shuffle_region.go @@ -165,7 +165,7 @@ func (s *shuffleRegionScheduler) scheduleAddPeer(cluster schedule.Cluster, regio if store == nil { return nil } - scoreGuard := filter.NewPlacementSafeguard(s.GetName(), cluster.GetOpts(), cluster.GetBasicCluster(), cluster.GetRuleManager(), region, store) + scoreGuard := filter.NewPlacementSafeguard(s.GetName(), cluster.GetOpts(), cluster.GetBasicCluster(), cluster.GetRuleManager(), region, store, nil) excludedFilter := filter.NewExcludedFilter(s.GetName(), nil, region.GetStoreIDs()) target := filter.NewCandidates(cluster.GetStores()). diff --git a/server/schedulers/utils.go b/server/schedulers/utils.go index 2228c8af9344..277ca904fdb4 100644 --- a/server/schedulers/utils.go +++ b/server/schedulers/utils.go @@ -24,6 +24,7 @@ import ( "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/schedule" "github.com/tikv/pd/server/schedule/operator" + "github.com/tikv/pd/server/schedule/placement" "github.com/tikv/pd/server/statistics" "go.uber.org/zap" ) @@ -45,6 +46,7 @@ type solver struct { opInfluence operator.OpInfluence tolerantSizeRatio float64 tolerantSource int64 + fit *placement.RegionFit sourceScore float64 targetScore float64 From 99528a67e6e1f8ca2aaaf1ed80714007b3773dd2 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Fri, 4 Nov 2022 11:48:01 +0800 Subject: [PATCH 35/67] operator: the operator timeout duation depends on all the step not separated (#5600) ref tikv/pd#5596 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- server/schedule/operator/create_operator.go | 1 + server/schedule/operator/operator.go | 28 ++--- server/schedule/operator/operator_test.go | 80 ++++---------- server/schedule/operator/status_tracker.go | 7 +- .../schedule/operator/status_tracker_test.go | 7 +- server/schedule/operator/step.go | 103 ++++++++++-------- server/schedule/operator_controller_test.go | 4 +- 7 files changed, 103 insertions(+), 127 deletions(-) diff --git a/server/schedule/operator/create_operator.go b/server/schedule/operator/create_operator.go index ef88cb397a15..a3f982481b92 100644 --- a/server/schedule/operator/create_operator.go +++ b/server/schedule/operator/create_operator.go @@ -186,6 +186,7 @@ func CreateMergeRegionOperator(desc string, ci ClusterInformer, source *core.Reg ToRegion: target.GetMeta(), IsPassive: true, }) + op2.Sync(op1) return []*Operator{op1, op2}, nil } diff --git a/server/schedule/operator/operator.go b/server/schedule/operator/operator.go index 62f20104538a..c673dfd5199b 100644 --- a/server/schedule/operator/operator.go +++ b/server/schedule/operator/operator.go @@ -31,12 +31,6 @@ const ( // OperatorExpireTime is the duration that when an operator is not started // after it, the operator will be considered expired. OperatorExpireTime = 3 * time.Second - // FastOperatorWaitTime is the duration that when an operator that is not marked - // `OpRegion` runs longer than it, the operator will be considered timeout. - FastOperatorWaitTime = 10 * time.Second - // SlowOperatorWaitTime is the duration that when an operator marked `OpRegion` - // runs longer than it, the operator will be considered timeout. - SlowOperatorWaitTime = 10 * time.Minute ) // Operator contains execution steps generated by scheduler. @@ -56,6 +50,7 @@ type Operator struct { FinishedCounters []prometheus.Counter AdditionalInfos map[string]string ApproximateSize int64 + timeout time.Duration } // NewOperator creates a new operator. @@ -64,6 +59,10 @@ func NewOperator(desc, brief string, regionID uint64, regionEpoch *metapb.Region if kind&OpAdmin != 0 { level = core.Urgent } + maxDuration := float64(0) + for _, v := range steps { + maxDuration += v.Timeout(approximateSize).Seconds() + } return &Operator{ desc: desc, brief: brief, @@ -76,17 +75,23 @@ func NewOperator(desc, brief string, regionID uint64, regionEpoch *metapb.Region level: level, AdditionalInfos: make(map[string]string), ApproximateSize: approximateSize, + timeout: time.Duration(maxDuration) * time.Second, } } +// Sync some attribute with the given timeout. +func (o *Operator) Sync(other *Operator) { + o.timeout = other.timeout +} + func (o *Operator) String() string { stepStrs := make([]string, len(o.steps)) for i := range o.steps { stepStrs[i] = o.steps[i].String() } - s := fmt.Sprintf("%s {%s} (kind:%s, region:%v(%v, %v), createAt:%s, startAt:%s, currentStep:%v, size:%d, steps:[%s])", + s := fmt.Sprintf("%s {%s} (kind:%s, region:%v(%v, %v), createAt:%s, startAt:%s, currentStep:%v, size:%d, steps:[%s],timeout:[%s])", o.desc, o.brief, o.kind, o.regionID, o.regionEpoch.GetVersion(), o.regionEpoch.GetConfVer(), o.GetCreateTime(), - o.GetStartTime(), atomic.LoadInt32(&o.currentStep), o.ApproximateSize, strings.Join(stepStrs, ", ")) + o.GetStartTime(), atomic.LoadInt32(&o.currentStep), o.ApproximateSize, strings.Join(stepStrs, ", "), o.timeout.String()) if o.CheckSuccess() { s += " finished" } @@ -224,15 +229,12 @@ func (o *Operator) CheckExpired() bool { return o.status.CheckExpired(OperatorExpireTime) } -// CheckTimeout checks if the operator is timeout, and update the status. +// CheckTimeout returns true if the operator is timeout, and update the status. func (o *Operator) CheckTimeout() bool { if o.CheckSuccess() { return false } - if startTime, step := o.getCurrentTimeAndStep(); step != nil { - return o.status.CheckStepTimeout(startTime, step, o.ApproximateSize) - } - return false + return o.status.CheckTimeout(o.timeout) } // Len returns the operator's steps count. diff --git a/server/schedule/operator/operator_test.go b/server/schedule/operator/operator_test.go index 851f2ef5d142..87e44e7a6c6d 100644 --- a/server/schedule/operator/operator_test.go +++ b/server/schedule/operator/operator_test.go @@ -17,6 +17,7 @@ package operator import ( "context" "encoding/json" + "fmt" "sync/atomic" "testing" "time" @@ -120,7 +121,7 @@ func (suite *operatorTestSuite) TestOperator() { suite.Nil(op.Check(region)) suite.Equal(SUCCESS, op.Status()) - SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowOperatorWaitTime-time.Second)) + SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowStepWaitTime-time.Second)) suite.False(op.CheckTimeout()) // addPeer1, transferLeader1, removePeer2 @@ -136,10 +137,9 @@ func (suite *operatorTestSuite) TestOperator() { suite.Equal(RemovePeer{FromStore: 2}, op.Check(region)) suite.Equal(int32(2), atomic.LoadInt32(&op.currentStep)) suite.False(op.CheckTimeout()) - SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-FastOperatorWaitTime-time.Second)) + SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-FastStepWaitTime-2*FastStepWaitTime+time.Second)) suite.False(op.CheckTimeout()) - op.stepsTime[op.currentStep-1] = op.GetReachTimeOf(STARTED).Unix() - SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-SlowOperatorWaitTime-time.Second)) + SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-SlowStepWaitTime-2*FastStepWaitTime-time.Second)) suite.True(op.CheckTimeout()) res, err := json.Marshal(op) suite.NoError(err) @@ -150,7 +150,7 @@ func (suite *operatorTestSuite) TestOperator() { op = suite.newTestOperator(1, OpLeader, steps...) op.Start() suite.False(op.CheckTimeout()) - SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-FastOperatorWaitTime-time.Second)) + SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-FastStepWaitTime-time.Second)) suite.True(op.CheckTimeout()) // case2: check timeout operator will return false not panic. @@ -159,7 +159,7 @@ func (suite *operatorTestSuite) TestOperator() { suite.True(op.status.To(STARTED)) suite.True(op.status.To(TIMEOUT)) suite.False(op.CheckSuccess()) - suite.False(op.CheckTimeout()) + suite.True(op.CheckTimeout()) } func (suite *operatorTestSuite) TestInfluence() { @@ -311,7 +311,7 @@ func (suite *operatorTestSuite) TestCheckTimeout() { suite.Equal(CREATED, op.Status()) suite.True(op.Start()) op.currentStep = int32(len(op.steps)) - SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowOperatorWaitTime)) + SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowStepWaitTime)) suite.False(op.CheckTimeout()) suite.Equal(SUCCESS, op.Status()) } @@ -374,7 +374,7 @@ func (suite *operatorTestSuite) TestCheck() { suite.True(op.Start()) suite.NotNil(op.Check(region)) suite.Equal(STARTED, op.Status()) - op.stepsTime[op.currentStep-1] = time.Now().Add(-SlowOperatorWaitTime).Unix() + SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowStepWaitTime-2*FastStepWaitTime)) suite.NotNil(op.Check(region)) suite.Equal(TIMEOUT, op.Status()) } @@ -389,7 +389,7 @@ func (suite *operatorTestSuite) TestCheck() { suite.True(op.Start()) suite.NotNil(op.Check(region)) suite.Equal(STARTED, op.Status()) - op.status.setTime(STARTED, time.Now().Add(-SlowOperatorWaitTime)) + op.status.setTime(STARTED, time.Now().Add(-SlowStepWaitTime)) region = suite.newTestRegion(1, 1, [2]uint64{1, 1}) suite.Nil(op.Check(region)) suite.Equal(SUCCESS, op.Status()) @@ -436,83 +436,45 @@ func (suite *operatorTestSuite) TestOpStepTimeout() { testData := []struct { step []OpStep regionSize int64 - start time.Time - expect bool + expect time.Duration }{ { // case1: 10GB region will have 60,000s to executor. step: []OpStep{AddLearner{}, AddPeer{}}, regionSize: 10 * 1000, - start: time.Now().Add(-(time.Second*(6*10*1000) + time.Second)), - expect: true, - }, - { - step: []OpStep{AddLearner{}, AddPeer{}}, - regionSize: 10 * 1000, - start: time.Now().Add(-(time.Second*(6*10*1000) - time.Second)), - expect: false, + expect: time.Second * (6 * 10 * 1000), }, { - // case2: 10MB region will have at least SlowOperatorWaitTime(10min) to executor. + // case2: 10MB region will have at least SlowStepWaitTime(10min) to executor. step: []OpStep{AddLearner{}, AddPeer{}}, regionSize: 10, - start: time.Now().Add(-(SlowOperatorWaitTime + time.Second)), - expect: true, - }, { - step: []OpStep{AddLearner{}, AddPeer{}}, - regionSize: 10, - start: time.Now().Add(-(time.Second*(6*10) - time.Second)), - expect: false, + expect: SlowStepWaitTime, }, { // case3: 10GB region will have 1000s to executor for RemovePeer, TransferLeader, SplitRegion, PromoteLearner. step: []OpStep{RemovePeer{}, TransferLeader{}, SplitRegion{}, PromoteLearner{}}, - start: time.Now().Add(-(time.Second*(1000) + time.Second)), - regionSize: 10 * 1000, - expect: true, - }, { - step: []OpStep{RemovePeer{}, TransferLeader{}, SplitRegion{}, PromoteLearner{}}, - start: time.Now().Add(-(time.Second*(1000) - time.Second)), regionSize: 10 * 1000, - expect: false, + expect: time.Second * (10 * 1000 * 0.6), }, { - // case4: 10MB will have at lease FastOperatorWaitTime(10s) to executor for RemovePeer, TransferLeader, SplitRegion, PromoteLearner. + // case4: 10MB will have at lease FastStepWaitTime(10s) to executor for RemovePeer, TransferLeader, SplitRegion, PromoteLearner. step: []OpStep{RemovePeer{}, TransferLeader{}, SplitRegion{}, PromoteLearner{}}, - start: time.Now().Add(-(FastOperatorWaitTime + time.Second)), regionSize: 10, - expect: true, - }, { - step: []OpStep{RemovePeer{}, TransferLeader{}, SplitRegion{}, PromoteLearner{}}, - start: time.Now().Add(-(FastOperatorWaitTime - time.Second)), - regionSize: 10, - expect: false, + expect: FastStepWaitTime, }, { // case5: 10GB region will have 1000*3 for ChangePeerV2Enter, ChangePeerV2Leave. step: []OpStep{ChangePeerV2Enter{PromoteLearners: []PromoteLearner{{}, {}}}, ChangePeerV2Leave{PromoteLearners: []PromoteLearner{{}, {}}}}, - start: time.Now().Add(-(time.Second*(3000) + time.Second)), regionSize: 10 * 1000, - expect: true, - }, { - step: []OpStep{ChangePeerV2Enter{PromoteLearners: []PromoteLearner{{}, {}}}, - ChangePeerV2Leave{PromoteLearners: []PromoteLearner{{}, {}}}}, - start: time.Now().Add(-(time.Second*(3000) - time.Second)), - regionSize: 10 * 1000, - expect: false, + expect: time.Second * (10 * 1000 * 0.6 * 3), }, { //case6: 10GB region will have 1000*10s for ChangePeerV2Enter, ChangePeerV2Leave. step: []OpStep{MergeRegion{}}, - start: time.Now().Add(-(time.Second*(10000) + time.Second)), regionSize: 10 * 1000, - expect: true, - }, { - step: []OpStep{MergeRegion{}}, - start: time.Now().Add(-(time.Second*(10000) - time.Second)), - regionSize: 10 * 1000, - expect: false, + expect: time.Second * (10 * 1000 * 0.6 * 10), }, } - for _, v := range testData { + for i, v := range testData { + fmt.Printf("case:%d\n", i) for _, step := range v.step { - suite.Equal(step.Timeout(v.start, v.regionSize), v.expect) + suite.Equal(v.expect, step.Timeout(v.regionSize)) } } } diff --git a/server/schedule/operator/status_tracker.go b/server/schedule/operator/status_tracker.go index 36a8a99c228a..c17ae17bf17d 100644 --- a/server/schedule/operator/status_tracker.go +++ b/server/schedule/operator/status_tracker.go @@ -115,12 +115,13 @@ func (trk *OpStatusTracker) CheckExpired(exp time.Duration) bool { return trk.current == EXPIRED } -// CheckStepTimeout checks if timeout, and update the current status. -func (trk *OpStatusTracker) CheckStepTimeout(start time.Time, step OpStep, approximateSize int64) bool { +// CheckTimeout returns true if timeout, and update the current status. +func (trk *OpStatusTracker) CheckTimeout(duration time.Duration) bool { trk.rw.Lock() defer trk.rw.Unlock() if trk.current == STARTED { - if !step.Timeout(start, approximateSize) { + start := trk.getTime(STARTED) + if time.Since(start) < duration { return false } _ = trk.toLocked(TIMEOUT) diff --git a/server/schedule/operator/status_tracker_test.go b/server/schedule/operator/status_tracker_test.go index 04eb16d6ecde..e53b017229ab 100644 --- a/server/schedule/operator/status_tracker_test.go +++ b/server/schedule/operator/status_tracker_test.go @@ -123,11 +123,11 @@ func TestCheckStepTimeout(t *testing.T) { status OpStatus }{{ step: AddLearner{}, - start: time.Now().Add(-(SlowOperatorWaitTime - time.Second)), + start: time.Now().Add(-(SlowStepWaitTime - time.Second)), status: STARTED, }, { step: AddLearner{}, - start: time.Now().Add(-(SlowOperatorWaitTime + time.Second)), + start: time.Now().Add(-(SlowStepWaitTime + time.Second)), status: TIMEOUT, }} @@ -135,7 +135,8 @@ func TestCheckStepTimeout(t *testing.T) { // Timeout and status changed trk := NewOpStatusTracker() trk.To(STARTED) - re.Equal(v.status == TIMEOUT, trk.CheckStepTimeout(v.start, v.step, 0)) + trk.reachTimes[STARTED] = v.start + re.Equal(v.status == TIMEOUT, trk.CheckTimeout(SlowStepWaitTime)) re.Equal(v.status, trk.Status()) } } diff --git a/server/schedule/operator/step.go b/server/schedule/operator/step.go index 3000cd6a6506..8e3d177b0a2d 100644 --- a/server/schedule/operator/step.go +++ b/server/schedule/operator/step.go @@ -32,12 +32,20 @@ import ( ) const ( - // DefaultSlowExecutorRate is the fast rate of the operator executor. + // DefaultSlowExecutorRate is the fast rate of the step executor. // default: 6 s/Mb DefaultSlowExecutorRate = 6 - // DefaultFastExecutorRate is the slow rate of the operator executor. - // default: 0.1 s/Mb - DefaultFastExecutorRate = 0.1 + // DefaultFastExecutorRate is the slow rate of the step executor. + // default: 0.6 s/Mb + DefaultFastExecutorRate = 0.6 + // FastStepWaitTime is the duration that the OpStep may take. + // there are some steps that may take a short time, such as transfer leader, remove peer etc. + // It should consider the latency of handling region heartbeat especially big cluster. + // The update duration of region heartbeat should be less than the region heartbeat interval(default 60s). + FastStepWaitTime = 60 * time.Second + // SlowStepWaitTime is the duration that the OpStep may take. + // there are some steps that may take a long time, such as add peer, merge region etc. + SlowStepWaitTime = 10 * time.Minute ) // OpStep describes the basic scheduling steps that can not be subdivided. @@ -47,7 +55,7 @@ type OpStep interface { IsFinish(region *core.RegionInfo) bool CheckInProgress(ci ClusterInformer, region *core.RegionInfo) error Influence(opInfluence OpInfluence, region *core.RegionInfo) - Timeout(start time.Time, regionSize int64) bool + Timeout(regionSize int64) time.Duration GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb.RegionHeartbeatResponse } @@ -111,9 +119,9 @@ func (tl TransferLeader) Influence(opInfluence OpInfluence, region *core.RegionI to.LeaderCount++ } -// Timeout returns true if the step is timeout. -func (tl TransferLeader) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > fastStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (tl TransferLeader) Timeout(regionSize int64) time.Duration { + return fastStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -182,14 +190,14 @@ func (ap AddPeer) CheckInProgress(ci ClusterInformer, region *core.RegionInfo) e } peer := region.GetStorePeer(ap.ToStore) if peer != nil && peer.GetId() != ap.PeerID { - return errors.Errorf("peer %d has already existed in store %d, the operator is trying to add peer %d on the same store", peer.GetId(), ap.ToStore, ap.PeerID) + return errors.Errorf("peer %d has already existed in store %d, the timeout is trying to add peer %d on the same store", peer.GetId(), ap.ToStore, ap.PeerID) } return nil } -// Timeout returns true if the step is timeout. -func (ap AddPeer) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > slowStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (ap AddPeer) Timeout(regionSize int64) time.Duration { + return slowStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -250,9 +258,9 @@ func (bw BecomeWitness) Influence(opInfluence OpInfluence, region *core.RegionIn to.AdjustStepCost(storelimit.RemovePeer, regionSize) } -// Timeout returns true if the step is timeout. -func (bw BecomeWitness) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > fastStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (bw BecomeWitness) Timeout(regionSize int64) time.Duration { + return fastStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -310,9 +318,9 @@ func (bn BecomeNonWitness) Influence(opInfluence OpInfluence, region *core.Regio to.AdjustStepCost(storelimit.AddPeer, regionSize) } -// Timeout returns true if the step is timeout -func (bn BecomeNonWitness) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > slowStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (bn BecomeNonWitness) Timeout(regionSize int64) time.Duration { + return slowStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -362,7 +370,7 @@ func (al AddLearner) CheckInProgress(ci ClusterInformer, region *core.RegionInfo return nil } if peer.GetId() != al.PeerID { - return errors.Errorf("peer %d has already existed in store %d, the operator is trying to add peer %d on the same store", peer.GetId(), al.ToStore, al.PeerID) + return errors.Errorf("peer %d has already existed in store %d, the timeout is trying to add peer %d on the same store", peer.GetId(), al.ToStore, al.PeerID) } if !core.IsLearner(peer) { return errors.New("peer already is a voter") @@ -383,9 +391,9 @@ func (al AddLearner) Influence(opInfluence OpInfluence, region *core.RegionInfo) to.AdjustStepCost(storelimit.AddPeer, regionSize) } -// Timeout returns true if the step is timeout. -func (al AddLearner) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > slowStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (al AddLearner) Timeout(regionSize int64) time.Duration { + return slowStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -438,9 +446,9 @@ func (pl PromoteLearner) CheckInProgress(_ ClusterInformer, region *core.RegionI // Influence calculates the store difference that current step makes. func (pl PromoteLearner) Influence(_ OpInfluence, _ *core.RegionInfo) {} -// Timeout returns true if the step is timeout. -func (pl PromoteLearner) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > fastStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (pl PromoteLearner) Timeout(regionSize int64) time.Duration { + return fastStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -499,9 +507,9 @@ func (rp RemovePeer) Influence(opInfluence OpInfluence, region *core.RegionInfo) from.AdjustStepCost(storelimit.RemovePeer, regionSize) } -// Timeout returns true if the step is timeout. -func (rp RemovePeer) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > fastStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (rp RemovePeer) Timeout(regionSize int64) time.Duration { + return fastStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -560,9 +568,10 @@ func (mr MergeRegion) Influence(opInfluence OpInfluence, region *core.RegionInfo } } -// Timeout returns true if the step is timeout. -func (mr MergeRegion) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > fastStepWaitDuration(regionSize)*10 +// Timeout returns duration that current step may take. +// The merge step need more time to finish but less than slow step. +func (mr MergeRegion) Timeout(regionSize int64) time.Duration { + return fastStepWaitDuration(regionSize) * 10 } // GetCmd returns the schedule command for heartbeat response. @@ -614,9 +623,9 @@ func (sr SplitRegion) CheckInProgress(_ ClusterInformer, _ *core.RegionInfo) err return nil } -// Timeout returns true if the step is timeout. -func (sr SplitRegion) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > fastStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (sr SplitRegion) Timeout(regionSize int64) time.Duration { + return fastStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -666,9 +675,9 @@ func (dv DemoteVoter) IsFinish(region *core.RegionInfo) bool { return false } -// Timeout returns true if the step is timeout. -func (dv DemoteVoter) Timeout(start time.Time, regionSize int64) bool { - return time.Since(start) > fastStepWaitDuration(regionSize) +// Timeout returns duration that current step may take. +func (dv DemoteVoter) Timeout(regionSize int64) time.Duration { + return fastStepWaitDuration(regionSize) } // GetCmd returns the schedule command for heartbeat response. @@ -786,10 +795,10 @@ func (cpe ChangePeerV2Enter) CheckInProgress(_ ClusterInformer, region *core.Reg // Influence calculates the store difference that current step makes. func (cpe ChangePeerV2Enter) Influence(_ OpInfluence, _ *core.RegionInfo) {} -// Timeout returns true if the step is timeout. -func (cpe ChangePeerV2Enter) Timeout(start time.Time, regionSize int64) bool { +// Timeout returns duration that current step may take. +func (cpe ChangePeerV2Enter) Timeout(regionSize int64) time.Duration { count := uint64(len(cpe.PromoteLearners)+len(cpe.DemoteVoters)) + 1 - return time.Since(start) > fastStepWaitDuration(regionSize)*time.Duration(count) + return fastStepWaitDuration(regionSize) * time.Duration(count) } // GetCmd returns the schedule command for heartbeat response. @@ -926,10 +935,10 @@ func (cpl ChangePeerV2Leave) CheckInProgress(_ ClusterInformer, region *core.Reg // Influence calculates the store difference that current step makes. func (cpl ChangePeerV2Leave) Influence(_ OpInfluence, _ *core.RegionInfo) {} -// Timeout returns true if the step is timeout. -func (cpl ChangePeerV2Leave) Timeout(start time.Time, regionSize int64) bool { +// Timeout returns duration that current step may take. +func (cpl ChangePeerV2Leave) Timeout(regionSize int64) time.Duration { count := uint64(len(cpl.PromoteLearners)+len(cpl.DemoteVoters)) + 1 - return time.Since(start) > fastStepWaitDuration(regionSize)*time.Duration(count) + return fastStepWaitDuration(regionSize) * time.Duration(count) } // GetCmd returns the schedule command for heartbeat response. @@ -957,8 +966,8 @@ func validateStore(ci ClusterInformer, id uint64) error { func slowStepWaitDuration(regionSize int64) time.Duration { seconds := DefaultSlowExecutorRate * regionSize wait := time.Duration(seconds) * time.Second - if wait < SlowOperatorWaitTime { - wait = SlowOperatorWaitTime + if wait < SlowStepWaitTime { + wait = SlowStepWaitTime } return wait } @@ -966,8 +975,8 @@ func slowStepWaitDuration(regionSize int64) time.Duration { func fastStepWaitDuration(regionSize int64) time.Duration { seconds := int64(DefaultFastExecutorRate * float64(regionSize)) wait := time.Duration(seconds) * time.Second - if wait < FastOperatorWaitTime { - wait = FastOperatorWaitTime + if wait < FastStepWaitTime { + wait = FastStepWaitTime } return wait } diff --git a/server/schedule/operator_controller_test.go b/server/schedule/operator_controller_test.go index 7ab638e3e06b..afbfdc22e1b2 100644 --- a/server/schedule/operator_controller_test.go +++ b/server/schedule/operator_controller_test.go @@ -122,7 +122,7 @@ func (suite *operatorControllerTestSuite) TestOperatorStatus() { oc.SetOperator(op2) suite.Equal(pdpb.OperatorStatus_RUNNING, oc.GetOperatorStatus(1).Status) suite.Equal(pdpb.OperatorStatus_RUNNING, oc.GetOperatorStatus(2).Status) - operator.SetOperatorStatusReachTime(op1, operator.STARTED, time.Now().Add(-10*time.Minute)) + operator.SetOperatorStatusReachTime(op1, operator.STARTED, time.Now().Add(-operator.SlowStepWaitTime-operator.FastStepWaitTime)) region2 = ApplyOperatorStep(region2, op2) tc.PutRegion(region2) oc.Dispatch(region1, "test") @@ -246,7 +246,7 @@ func (suite *operatorControllerTestSuite) TestCheckAddUnexpectedStatus() { op := operator.NewTestOperator(1, &metapb.RegionEpoch{}, operator.OpRegion, steps...) suite.True(oc.checkAddOperator(false, op)) op.Start() - operator.SetOperatorStatusReachTime(op, operator.STARTED, time.Now().Add(-operator.SlowOperatorWaitTime)) + operator.SetOperatorStatusReachTime(op, operator.STARTED, time.Now().Add(-operator.SlowStepWaitTime-operator.FastStepWaitTime)) suite.True(op.CheckTimeout()) suite.False(oc.checkAddOperator(false, op)) } From 5b7c29e8da0574ce552f2e34bb5f1f8cdd52f757 Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Fri, 4 Nov 2022 12:22:01 +0800 Subject: [PATCH 36/67] scheduler: modify baseSchedulePlan to fit new balance region impl (#5614) ref tikv/pd#5257, ref tikv/pd#5544 modify baseSchedulePlan to fit new balance region impl Signed-off-by: Cabinfever_B Co-authored-by: Ti Chi Robot --- server/schedule/filter/filters.go | 1 - server/schedulers/balance_plan.go | 27 +++++++++++++------------- server/schedulers/balance_plan_test.go | 6 ++---- server/schedulers/balance_region.go | 4 ++++ server/schedulers/balance_test.go | 6 +++--- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/server/schedule/filter/filters.go b/server/schedule/filter/filters.go index b663730d50d8..0b1563a0348e 100644 --- a/server/schedule/filter/filters.go +++ b/server/schedule/filter/filters.go @@ -109,7 +109,6 @@ func SelectTargetStores(stores []*core.StoreInfo, filters []Filter, opt *config. if collector != nil { collector.Collect(plan.SetResource(s), plan.SetStatus(status)) } - return false } return true diff --git a/server/schedulers/balance_plan.go b/server/schedulers/balance_plan.go index 421b24ab9acf..04dd1a494c19 100644 --- a/server/schedulers/balance_plan.go +++ b/server/schedulers/balance_plan.go @@ -72,6 +72,7 @@ func (p *balanceSchedulerPlan) GetResource(step int) uint64 { if p.step < step { return 0 } + // Please use with care. Add a nil check if need in the future switch step { case pickSource: return p.source.GetID() @@ -114,10 +115,10 @@ func (p *balanceSchedulerPlan) Clone(opts ...plan.Option) plan.Plan { // BalancePlanSummary is used to summarize for BalancePlan func BalancePlanSummary(plans []plan.Plan) (map[uint64]plan.Status, bool, error) { // storeStatusCounter is used to count the number of various statuses of each store - var storeStatusCounter map[uint64]map[plan.Status]int + storeStatusCounter := make(map[uint64]map[plan.Status]int) // statusCounter is used to count the number of status which is regarded as best status of each store statusCounter := make(map[uint64]plan.Status) - maxStep := -1 + storeMaxStep := make(map[uint64]int) normal := true for _, pi := range plans { p, ok := pi.(*balanceSchedulerPlan) @@ -129,16 +130,6 @@ func BalancePlanSummary(plans []plan.Plan) (map[uint64]plan.Status, bool, error) if step > pickTarget { step = pickTarget } - if step > maxStep { - storeStatusCounter = make(map[uint64]map[plan.Status]int) - maxStep = step - normal = true - } else if step < maxStep { - continue - } - if !p.status.IsNormal() { - normal = false - } var store uint64 // `step == pickRegion` is a special processing in summary, because we want to exclude the factor of region // and consider the failure as the status of source store. @@ -147,8 +138,18 @@ func BalancePlanSummary(plans []plan.Plan) (map[uint64]plan.Status, bool, error) } else { store = p.GetResource(step) } - if _, ok := storeStatusCounter[store]; !ok { + maxStep, ok := storeMaxStep[store] + if !ok { + maxStep = -1 + } + if step > maxStep { storeStatusCounter[store] = make(map[plan.Status]int) + storeMaxStep[store] = step + } else if step < maxStep { + continue + } + if !p.status.IsNormal() { + normal = false } storeStatusCounter[store][*p.status]++ } diff --git a/server/schedulers/balance_plan_test.go b/server/schedulers/balance_plan_test.go index 266dc22b60c8..842fc984b3d7 100644 --- a/server/schedulers/balance_plan_test.go +++ b/server/schedulers/balance_plan_test.go @@ -223,19 +223,17 @@ func (suite *balanceSchedulerPlanAnalyzeTestSuite) TestAnalyzerResult4() { func (suite *balanceSchedulerPlanAnalyzeTestSuite) TestAnalyzerResult5() { plans := make([]plan.Plan, 0) - plans = append(plans, &balanceSchedulerPlan{source: suite.stores[4], step: 0, status: plan.NewStatus(plan.StatusStoreDown)}) + plans = append(plans, &balanceSchedulerPlan{source: suite.stores[4], step: 0, status: plan.NewStatus(plan.StatusStoreRemoveLimitThrottled)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[3], region: suite.regions[0], step: 1, status: plan.NewStatus(plan.StatusRegionNotMatchRule)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[2], region: suite.regions[0], step: 1, status: plan.NewStatus(plan.StatusRegionNotMatchRule)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[1], target: suite.stores[0], step: 2, status: plan.NewStatus(plan.StatusStoreScoreDisallowed)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[1], target: suite.stores[1], step: 2, status: plan.NewStatus(plan.StatusStoreAlreadyHasPeer)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[1], target: suite.stores[2], step: 2, status: plan.NewStatus(plan.StatusStoreNotMatchRule)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[1], target: suite.stores[3], step: 2, status: plan.NewStatus(plan.StatusStoreNotMatchRule)}) - plans = append(plans, &balanceSchedulerPlan{source: suite.stores[1], target: suite.stores[4], step: 2, status: plan.NewStatus(plan.StatusStoreDown)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[0], target: suite.stores[0], step: 2, status: plan.NewStatus(plan.StatusStoreAlreadyHasPeer)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[0], target: suite.stores[1], step: 3, status: plan.NewStatus(plan.StatusStoreScoreDisallowed)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[0], target: suite.stores[2], step: 2, status: plan.NewStatus(plan.StatusStoreNotMatchRule)}) plans = append(plans, &balanceSchedulerPlan{source: suite.stores[0], target: suite.stores[3], step: 2, status: plan.NewStatus(plan.StatusStoreNotMatchRule)}) - plans = append(plans, &balanceSchedulerPlan{source: suite.stores[0], target: suite.stores[4], step: 4, status: plan.NewStatus(plan.StatusCreateOperatorFailed)}) statuses, isNormal, err := BalancePlanSummary(plans) suite.NoError(err) suite.False(isNormal) @@ -245,7 +243,7 @@ func (suite *balanceSchedulerPlanAnalyzeTestSuite) TestAnalyzerResult5() { 2: plan.NewStatus(plan.StatusStoreAlreadyHasPeer), 3: plan.NewStatus(plan.StatusStoreNotMatchRule), 4: plan.NewStatus(plan.StatusStoreNotMatchRule), - 5: plan.NewStatus(plan.StatusCreateOperatorFailed), + 5: plan.NewStatus(plan.StatusStoreRemoveLimitThrottled), })) } diff --git a/server/schedulers/balance_region.go b/server/schedulers/balance_region.go index 8b0c21ab8cfb..b71ccc082f53 100644 --- a/server/schedulers/balance_region.go +++ b/server/schedulers/balance_region.go @@ -173,6 +173,10 @@ func (s *balanceRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) baseRegionFilters = append(baseRegionFilters, filter.NewRegionEmptyFilter(cluster)) } + if collector != nil && len(sourceStores) > 0 { + collector.Collect(plan.SetResource(sourceStores[0]), plan.SetStatus(plan.NewStatus(plan.StatusStoreScoreDisallowed))) + } + solver.step++ var sourceIndex int diff --git a/server/schedulers/balance_test.go b/server/schedulers/balance_test.go index 84499fbf3e17..b1addea160e8 100644 --- a/server/schedulers/balance_test.go +++ b/server/schedulers/balance_test.go @@ -787,15 +787,15 @@ func TestBalanceRegionSchedule1(t *testing.T) { // test region replicate not match opt.SetMaxReplicas(3) ops, plans := sb.Schedule(tc, true) - re.Len(plans, 100) + re.Len(plans, 101) re.Empty(ops) - re.Equal(int(plans[0].GetStatus().StatusCode), plan.StatusRegionNotReplicated) + re.Equal(int(plans[1].GetStatus().StatusCode), plan.StatusRegionNotReplicated) tc.SetStoreOffline(1) opt.SetMaxReplicas(1) ops, plans = sb.Schedule(tc, true) re.NotEmpty(ops) - re.Len(plans, 3) + re.Len(plans, 4) re.True(plans[0].GetStatus().IsOK()) } From ea9b1e93b71a89f5a09a535e177b4eff5ffbb162 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 4 Nov 2022 12:34:00 +0800 Subject: [PATCH 37/67] Supply AwakenRegions message in StoreHeartbeatResponse for TiKV cluster failure recovery. (#5625) close tikv/pd#5626 Supply extra AwakenRegions message in StoreHeartbeatResponse for the TiKV cluster when there exists abnormal TiKV node in the cluster, to wake up hibernated regions in time. Signed-off-by: Lucasliang Co-authored-by: Ti Chi Robot --- go.mod | 5 ++++ server/cluster/cluster.go | 47 ++++++++++++++++++++++++++++++++++ server/cluster/cluster_test.go | 29 +++++++++++++++++++++ server/core/store.go | 21 ++++++++++----- server/core/store_option.go | 7 +++++ server/grpc_service.go | 15 +++++++++++ 6 files changed, 118 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index bbaadb64d695..ded557001810 100644 --- a/go.mod +++ b/go.mod @@ -175,3 +175,8 @@ require ( moul.io/zapgorm2 v1.1.0 // indirect sigs.k8s.io/yaml v1.1.0 // indirect ) + +// When you modify PD cooperatively with kvproto, this will be useful to submit the PR to PD and the PR to +// kvproto at the same time. You can run `go mod tidy` to make it replaced with go-mod style specification. +// After the PR to kvproto is merged, remember to comment this out and run `go mod tidy`. +// replace github.com/pingcap/kvproto => github.com/$YourPrivateRepo $YourPrivateBranch diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 84ffb1652d75..a687e841cfe6 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -1349,6 +1349,53 @@ func (c *RaftCluster) SlowStoreRecovered(storeID uint64) { c.core.SlowStoreRecovered(storeID) } +// NeedAwakenAllRegionsInStore checks whether we should do AwakenRegions operation. +func (c *RaftCluster) NeedAwakenAllRegionsInStore(storeID uint64) (needAwaken bool, slowStoreIDs []uint64) { + store := c.GetStore(storeID) + // We just return AwakenRegions messages to those Serving stores which need to be awaken. + if store.IsSlow() || !store.NeedAwakenStore() { + return false, nil + } + + needAwaken = false + for _, store := range c.GetStores() { + if store.IsRemoved() { + continue + } + + // We will filter out heartbeat requests from slowStores. + if (store.IsUp() || store.IsRemoving()) && store.IsSlow() && + store.GetStoreStats().GetStoreId() != storeID { + needAwaken = true + slowStoreIDs = append(slowStoreIDs, store.GetID()) + } + } + return needAwaken, slowStoreIDs +} + +// UpdateAwakenStoreTime updates the last awaken time for the store. +func (c *RaftCluster) UpdateAwakenStoreTime(storeID uint64, lastAwakenTime time.Time) error { + c.Lock() + defer c.Unlock() + + store := c.GetStore(storeID) + if store == nil { + return errs.ErrStoreNotFound.FastGenByArgs(storeID) + } + + if store.IsRemoved() { + return errs.ErrStoreRemoved.FastGenByArgs(storeID) + } + + if store.IsPhysicallyDestroyed() { + return errs.ErrStoreDestroyed.FastGenByArgs(storeID) + } + + newStore := store.Clone(core.SetLastAwakenTime(lastAwakenTime)) + + return c.putStoreLocked(newStore) +} + // UpStore up a store from offline func (c *RaftCluster) UpStore(storeID uint64) error { c.Lock() diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index dabbe5e2f913..3821fcbeb262 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -1748,6 +1748,35 @@ func TestCheckStaleRegion(t *testing.T) { re.Error(checkStaleRegion(region.GetMeta(), origin.GetMeta())) } +func TestAwakenStore(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + _, opt, err := newTestScheduleConfig() + re.NoError(err) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + n := uint64(3) + stores := newTestStores(n, "6.0.0") + re.False(stores[0].NeedAwakenStore()) + for _, store := range stores { + re.NoError(cluster.PutStore(store.GetMeta())) + } + for i := uint64(1); i <= n; i++ { + needAwaken, _ := cluster.NeedAwakenAllRegionsInStore(i) + re.False(needAwaken) + } + + now := time.Now() + store4 := stores[0].Clone(core.SetLastHeartbeatTS(now), core.SetLastAwakenTime(now.Add(-31*time.Second))) + re.NoError(cluster.putStoreLocked(store4)) + store1 := cluster.GetStore(1) + re.True(store1.NeedAwakenStore()) + re.NoError(cluster.UpdateAwakenStoreTime(1, now)) + store1 = cluster.GetStore(1) + re.False(store1.NeedAwakenStore()) +} + type testCluster struct { *RaftCluster } diff --git a/server/core/store.go b/server/core/store.go index 49752de41d18..01157f2e3d03 100644 --- a/server/core/store.go +++ b/server/core/store.go @@ -33,6 +33,7 @@ const ( storePersistInterval = 5 * time.Minute initialMinSpace = 8 * units.GiB // 2^33=8GB slowStoreThreshold = 80 + awakenStoreInterval = 30 * time.Second // EngineKey is the label key used to indicate engine. EngineKey = "engine" @@ -60,17 +61,19 @@ type StoreInfo struct { regionWeight float64 limiter map[storelimit.Type]*storelimit.StoreLimit minResolvedTS uint64 + lastAwakenTime time.Time } // NewStoreInfo creates StoreInfo with meta data. func NewStoreInfo(store *metapb.Store, opts ...StoreCreateOption) *StoreInfo { storeInfo := &StoreInfo{ - meta: store, - storeStats: newStoreStats(), - leaderWeight: 1.0, - regionWeight: 1.0, - limiter: make(map[storelimit.Type]*storelimit.StoreLimit), - minResolvedTS: 0, + meta: store, + storeStats: newStoreStats(), + leaderWeight: 1.0, + regionWeight: 1.0, + limiter: make(map[storelimit.Type]*storelimit.StoreLimit), + minResolvedTS: 0, + lastAwakenTime: time.Now(), } for _, opt := range opts { opt(storeInfo) @@ -469,6 +472,12 @@ func (s *StoreInfo) GetMinResolvedTS() uint64 { return s.minResolvedTS } +// NeedAwakenStore checks whether all hibernated regions in this store should +// be awaken or not. +func (s *StoreInfo) NeedAwakenStore() bool { + return s.GetLastHeartbeatTS().Sub(s.lastAwakenTime) > awakenStoreInterval +} + var ( // If a store's last heartbeat is storeDisconnectDuration ago, the store will // be marked as disconnected state. The value should be greater than tikv's diff --git a/server/core/store_option.go b/server/core/store_option.go index 3d66097bd6bd..e6ee7965afab 100644 --- a/server/core/store_option.go +++ b/server/core/store_option.go @@ -243,3 +243,10 @@ func ResetStoreLimit(limitType storelimit.Type, ratePerSec ...float64) StoreCrea store.limiter[limitType] = storelimit.NewStoreLimit(ratePerSec[0], storelimit.RegionInfluence[limitType]) } } + +// SetLastAwakenTime sets last awaken time for the store. +func SetLastAwakenTime(lastAwaken time.Time) StoreCreateOption { + return func(store *StoreInfo) { + store.lastAwakenTime = lastAwaken + } +} diff --git a/server/grpc_service.go b/server/grpc_service.go index 2165e2e75264..2e111a129237 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -664,6 +664,21 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear ClusterVersion: rc.GetClusterVersion(), } rc.GetUnsafeRecoveryController().HandleStoreHeartbeat(request, resp) + + // If this cluster has slow stores, we should awaken hibernated regions in other stores. + // TODO: waited to be polished. It's recommended to merge following AwakenRegions checking + // and UpdateAwakenStoreTime into HandlStoreHeartbeat. + if needAwaken, slowStoreIDs := rc.NeedAwakenAllRegionsInStore(storeID); needAwaken { + log.Info("forcely awaken hibernated regions", zap.Uint64("store-id", storeID), zap.Uint64s("slow-stores", slowStoreIDs)) + err := rc.UpdateAwakenStoreTime(storeID, time.Now()) + if err != nil { + log.Warn("failed to awaken hibernated regions in store", zap.Uint64("store-id", storeID)) + } else { + resp.AwakenRegions = &pdpb.AwakenRegions{ + AbnormalStores: slowStoreIDs, + } + } + } return resp, nil } From b3bd6da9bbf3a53afae267555a6765fde4dfba87 Mon Sep 17 00:00:00 2001 From: Zwb Date: Mon, 7 Nov 2022 15:01:50 +0800 Subject: [PATCH 38/67] schedulers: add witness transfer leader scheduler (#5639) close tikv/pd#5638 add transfer leader scheduler Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot --- server/cluster/cluster.go | 1 + server/cluster/coordinator.go | 17 +++ server/core/region.go | 8 + server/schedulers/transfer_witness_leader.go | 140 ++++++++++++++++++ .../transfer_witness_leader_test.go | 99 +++++++++++++ tests/pdctl/scheduler/scheduler_test.go | 95 ++++++------ 6 files changed, 318 insertions(+), 42 deletions(-) create mode 100644 server/schedulers/transfer_witness_leader.go create mode 100644 server/schedulers/transfer_witness_leader_test.go diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index a687e841cfe6..9f6eddc0ed5e 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -793,6 +793,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { peerInfo := core.NewPeerInfo(peer, region.GetWriteLoads(), interval) c.hotStat.CheckWriteAsync(statistics.NewCheckPeerTask(peerInfo, region)) } + c.coordinator.CheckTransferWitnessLeader(region) // Save to storage if meta is updated. // Save to cache if meta or leader is updated, or contains any down/pending peer. diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index 581e76557e3c..973821469ce8 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -37,6 +37,7 @@ import ( "github.com/tikv/pd/server/schedule/hbstream" "github.com/tikv/pd/server/schedule/operator" "github.com/tikv/pd/server/schedule/plan" + "github.com/tikv/pd/server/schedulers" "github.com/tikv/pd/server/statistics" "github.com/tikv/pd/server/storage" "go.uber.org/zap" @@ -966,3 +967,19 @@ func (c *coordinator) getPausedSchedulerDelayUntil(name string) (int64, error) { } return s.GetDelayUntil(), nil } + +// CheckTransferWitnessLeader determines if transfer leader is required, then sends to the scheduler if needed +func (c *coordinator) CheckTransferWitnessLeader(region *core.RegionInfo) { + if core.NeedTransferWitnessLeader(region) { + c.RLock() + s, ok := c.schedulers[schedulers.TransferWitnessLeaderName] + c.RUnlock() + if ok { + select { + case schedulers.RecvRegionInfo(s.Scheduler) <- region: + default: + log.Warn("drop transfer witness leader due to recv region channel full", zap.Uint64("region-id", region.GetID())) + } + } + } +} diff --git a/server/core/region.go b/server/core/region.go index 524daeb7723a..6be0ece03ae4 100644 --- a/server/core/region.go +++ b/server/core/region.go @@ -1421,3 +1421,11 @@ func (h HexRegionsMeta) String() string { } return strings.TrimSpace(b.String()) } + +// NeedTransferWitnessLeader is used to judge if the region's leader is a witness +func NeedTransferWitnessLeader(region *RegionInfo) bool { + if region == nil || region.GetLeader() == nil { + return false + } + return region.GetLeader().IsWitness +} diff --git a/server/schedulers/transfer_witness_leader.go b/server/schedulers/transfer_witness_leader.go new file mode 100644 index 000000000000..2770de9c3f05 --- /dev/null +++ b/server/schedulers/transfer_witness_leader.go @@ -0,0 +1,140 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schedulers + +import ( + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/server/core" + "github.com/tikv/pd/server/schedule" + "github.com/tikv/pd/server/schedule/filter" + "github.com/tikv/pd/server/schedule/operator" + "github.com/tikv/pd/server/schedule/plan" + "github.com/tikv/pd/server/storage/endpoint" +) + +const ( + // TransferWitnessLeaderName is transfer witness leader scheduler name. + TransferWitnessLeaderName = "transfer-witness-leader-scheduler" + // TransferWitnessLeaderType is transfer witness leader scheduler type. + TransferWitnessLeaderType = "transfer-witness-leader" + // TransferWitnessLeaderBatchSize is the number of operators to to transfer + // leaders by one scheduling + transferWitnessLeaderBatchSize = 3 + // TransferWitnessLeaderRecvMaxRegionSize is the max number of region can receive + // TODO: make it a reasonable value + transferWitnessLeaderRecvMaxRegionSize = 1000 +) + +func init() { + schedule.RegisterSliceDecoderBuilder(TransferWitnessLeaderType, func(args []string) schedule.ConfigDecoder { + return func(v interface{}) error { + return nil + } + }) + + schedule.RegisterScheduler(TransferWitnessLeaderType, func(opController *schedule.OperatorController, _ endpoint.ConfigStorage, _ schedule.ConfigDecoder) (schedule.Scheduler, error) { + return newTransferWitnessLeaderScheduler(opController), nil + }) +} + +type trasferWitnessLeaderScheduler struct { + *BaseScheduler + regions chan *core.RegionInfo +} + +// newTransferWitnessLeaderScheduler creates an admin scheduler that transfers witness leader of a region. +func newTransferWitnessLeaderScheduler(opController *schedule.OperatorController) schedule.Scheduler { + return &trasferWitnessLeaderScheduler{ + BaseScheduler: NewBaseScheduler(opController), + regions: make(chan *core.RegionInfo, transferWitnessLeaderRecvMaxRegionSize), + } +} + +func (s *trasferWitnessLeaderScheduler) GetName() string { + return TransferWitnessLeaderName +} + +func (s *trasferWitnessLeaderScheduler) GetType() string { + return TransferWitnessLeaderType +} + +func (s *trasferWitnessLeaderScheduler) IsScheduleAllowed(cluster schedule.Cluster) bool { + // TODO: make sure the restriction is reasonable + allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetOpts().GetLeaderScheduleLimit() + if !allowed { + operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + } + return allowed +} + +func (s *trasferWitnessLeaderScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { + schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() + return s.scheduleTransferWitnessLeaderBatch(s.GetName(), s.GetType(), cluster, transferWitnessLeaderBatchSize), nil +} + +func (s *trasferWitnessLeaderScheduler) scheduleTransferWitnessLeaderBatch(name, typ string, cluster schedule.Cluster, batchSize int) []*operator.Operator { + var ops []*operator.Operator + for i := 0; i < batchSize; i++ { + select { + case region := <-s.regions: + op, err := s.scheduleTransferWitnessLeader(name, typ, cluster, region) + if err != nil { + log.Debug("fail to create transfer leader operator", errs.ZapError(err)) + continue + } + if op != nil { + op.SetPriorityLevel(core.Urgent) + op.Counters = append(op.Counters, schedulerCounter.WithLabelValues(name, "new-operator")) + ops = append(ops, op) + } + default: + break + } + } + return ops +} + +func (s *trasferWitnessLeaderScheduler) scheduleTransferWitnessLeader(name, typ string, cluster schedule.Cluster, region *core.RegionInfo) (*operator.Operator, error) { + var filters []filter.Filter + unhealthyPeerStores := make(map[uint64]struct{}) + for _, peer := range region.GetDownPeers() { + unhealthyPeerStores[peer.GetPeer().GetStoreId()] = struct{}{} + } + for _, peer := range region.GetPendingPeers() { + unhealthyPeerStores[peer.GetStoreId()] = struct{}{} + } + filters = append(filters, filter.NewExcludedFilter(name, nil, unhealthyPeerStores), &filter.StoreStateFilter{ActionScope: name, TransferLeader: true}) + candidates := filter.NewCandidates(cluster.GetFollowerStores(region)).FilterTarget(cluster.GetOpts(), nil, nil, filters...) + // Compatible with old TiKV transfer leader logic. + target := candidates.RandomPick() + targets := candidates.PickAll() + // `targets` MUST contains `target`, so only needs to check if `target` is nil here. + if target == nil { + schedulerCounter.WithLabelValues(name, "no-target-store").Inc() + return nil, errors.New("no target store to schedule") + } + targetIDs := make([]uint64, 0, len(targets)) + for _, t := range targets { + targetIDs = append(targetIDs, t.GetID()) + } + return operator.CreateTransferLeaderOperator(typ, cluster, region, region.GetLeader().GetStoreId(), target.GetID(), targetIDs, operator.OpLeader) +} + +// RecvRegionInfo receives a checked region from coordinator +func RecvRegionInfo(s schedule.Scheduler) chan<- *core.RegionInfo { + return s.(*trasferWitnessLeaderScheduler).regions +} diff --git a/server/schedulers/transfer_witness_leader_test.go b/server/schedulers/transfer_witness_leader_test.go new file mode 100644 index 000000000000..1b65515b1ccd --- /dev/null +++ b/server/schedulers/transfer_witness_leader_test.go @@ -0,0 +1,99 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schedulers + +import ( + "context" + "testing" + + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/mock/mockcluster" + "github.com/tikv/pd/pkg/testutil" + "github.com/tikv/pd/server/config" + "github.com/tikv/pd/server/core" + "github.com/tikv/pd/server/schedule" + "github.com/tikv/pd/server/schedule/operator" + "github.com/tikv/pd/server/storage" +) + +func TestTransferWitnessLeader(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + opt := config.NewTestOptions() + tc := mockcluster.NewCluster(ctx, opt) + + // Add stores 1, 2, 3 + tc.AddLeaderStore(1, 0) + tc.AddLeaderStore(2, 0) + tc.AddLeaderStore(3, 0) + // Add regions 1 with leader in stores 1 + tc.AddLeaderRegion(1, 1, 2, 3) + + sl, err := schedule.CreateScheduler(TransferWitnessLeaderType, schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) + re.NoError(err) + RecvRegionInfo(sl) <- tc.GetRegion(1) + re.True(sl.IsScheduleAllowed(tc)) + ops, _ := sl.Schedule(tc, false) + testutil.CheckMultiTargetTransferLeader(re, ops[0], operator.OpLeader, 1, []uint64{2, 3}) + re.False(ops[0].Step(0).(operator.TransferLeader).IsFinish(tc.MockRegionInfo(1, 1, []uint64{2, 3}, []uint64{}, &metapb.RegionEpoch{ConfVer: 0, Version: 0}))) + re.True(ops[0].Step(0).(operator.TransferLeader).IsFinish(tc.MockRegionInfo(1, 2, []uint64{1, 3}, []uint64{}, &metapb.RegionEpoch{ConfVer: 0, Version: 0}))) +} + +func TestTransferWitnessLeaderWithUnhealthyPeer(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + opt := config.NewTestOptions() + tc := mockcluster.NewCluster(ctx, opt) + sl, err := schedule.CreateScheduler(TransferWitnessLeaderType, schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) + re.NoError(err) + + // Add stores 1, 2, 3 + tc.AddLeaderStore(1, 0) + tc.AddLeaderStore(2, 0) + tc.AddLeaderStore(3, 0) + // Add region 1, which has 3 peers. 1 is leader. 2 is healthy or pending, 3 is healthy or down. + tc.AddLeaderRegion(1, 1, 2, 3) + region := tc.MockRegionInfo(1, 1, []uint64{2, 3}, nil, nil) + withDownPeer := core.WithDownPeers([]*pdpb.PeerStats{{ + Peer: region.GetPeers()[2], + DownSeconds: 1000, + }}) + withPendingPeer := core.WithPendingPeers([]*metapb.Peer{region.GetPeers()[1]}) + + // only pending + tc.PutRegion(region.Clone(withPendingPeer)) + RecvRegionInfo(sl) <- tc.GetRegion(1) + ops, _ := sl.Schedule(tc, false) + testutil.CheckMultiTargetTransferLeader(re, ops[0], operator.OpLeader, 1, []uint64{3}) + ops, _ = sl.Schedule(tc, false) + re.Nil(ops) + // only down + tc.PutRegion(region.Clone(withDownPeer)) + RecvRegionInfo(sl) <- tc.GetRegion(1) + ops, _ = sl.Schedule(tc, false) + testutil.CheckMultiTargetTransferLeader(re, ops[0], operator.OpLeader, 1, []uint64{2}) + // pending + down + tc.PutRegion(region.Clone(withPendingPeer, withDownPeer)) + ops, _ = sl.Schedule(tc, false) + re.Empty(ops) +} + +// TODO: add more tests with witness diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index 953c02934e89..0819500e52d7 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -135,10 +135,11 @@ func TestScheduler(t *testing.T) { // scheduler show command expected := map[string]bool{ - "balance-region-scheduler": true, - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(nil, expected) @@ -149,9 +150,10 @@ func TestScheduler(t *testing.T) { // scheduler delete command args := []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(args, expected) @@ -163,10 +165,11 @@ func TestScheduler(t *testing.T) { // scheduler add command args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, - schedulers[idx]: true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + schedulers[idx]: true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(args, expected) @@ -178,10 +181,11 @@ func TestScheduler(t *testing.T) { // scheduler config update command args = []string{"-u", pdAddr, "scheduler", "config", schedulers[idx], "add-store", "3"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, - schedulers[idx]: true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + schedulers[idx]: true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(args, expected) @@ -192,29 +196,32 @@ func TestScheduler(t *testing.T) { // scheduler delete command args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx]} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(args, expected) // scheduler add command args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, - schedulers[idx]: true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + schedulers[idx]: true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(args, expected) // scheduler add command twice args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "4"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, - schedulers[idx]: true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + schedulers[idx]: true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(args, expected) @@ -225,10 +232,11 @@ func TestScheduler(t *testing.T) { // scheduler remove command [old] args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-4"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, - schedulers[idx]: true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + schedulers[idx]: true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(args, expected) @@ -239,19 +247,21 @@ func TestScheduler(t *testing.T) { // scheduler remove command, when remove the last store, it should remove whole scheduler args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-2"} expected = map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + "transfer-witness-leader-scheduler": true, } checkSchedulerCommand(args, expected) } // test shuffle region config checkSchedulerCommand([]string{"-u", pdAddr, "scheduler", "add", "shuffle-region-scheduler"}, map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, - "shuffle-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + "shuffle-region-scheduler": true, + "transfer-witness-leader-scheduler": true, }) var roles []string mustExec([]string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) @@ -264,11 +274,12 @@ func TestScheduler(t *testing.T) { // test grant hot region scheduler config checkSchedulerCommand([]string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{ - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "split-bucket-scheduler": true, - "shuffle-region-scheduler": true, - "grant-hot-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "split-bucket-scheduler": true, + "shuffle-region-scheduler": true, + "grant-hot-region-scheduler": true, + "transfer-witness-leader-scheduler": true, }) var conf3 map[string]interface{} expected3 := map[string]interface{}{ From 91f16642cfa3dc53da5aafa481afa63159224aaf Mon Sep 17 00:00:00 2001 From: Lucas Date: Tue, 8 Nov 2022 10:31:50 +0800 Subject: [PATCH 39/67] Lower down the frequency of sending AwakenRegions. (#5681) close tikv/pd#5680 Lower down the frequency of AwakenRegions from 30s to 10min. Signed-off-by: Lucasliang --- server/cluster/cluster_test.go | 10 +++++----- server/core/store.go | 15 +++++++-------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 3821fcbeb262..a4e4061eb1ef 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -1758,7 +1758,7 @@ func TestAwakenStore(t *testing.T) { cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) n := uint64(3) stores := newTestStores(n, "6.0.0") - re.False(stores[0].NeedAwakenStore()) + re.True(stores[0].NeedAwakenStore()) for _, store := range stores { re.NoError(cluster.PutStore(store.GetMeta())) } @@ -1768,13 +1768,13 @@ func TestAwakenStore(t *testing.T) { } now := time.Now() - store4 := stores[0].Clone(core.SetLastHeartbeatTS(now), core.SetLastAwakenTime(now.Add(-31*time.Second))) + store4 := stores[0].Clone(core.SetLastHeartbeatTS(now), core.SetLastAwakenTime(now.Add(-6*time.Minute))) re.NoError(cluster.putStoreLocked(store4)) store1 := cluster.GetStore(1) - re.True(store1.NeedAwakenStore()) - re.NoError(cluster.UpdateAwakenStoreTime(1, now)) - store1 = cluster.GetStore(1) re.False(store1.NeedAwakenStore()) + re.NoError(cluster.UpdateAwakenStoreTime(1, now.Add(-11*time.Minute))) + store1 = cluster.GetStore(1) + re.True(store1.NeedAwakenStore()) } type testCluster struct { diff --git a/server/core/store.go b/server/core/store.go index 01157f2e3d03..46237c1073a1 100644 --- a/server/core/store.go +++ b/server/core/store.go @@ -33,7 +33,7 @@ const ( storePersistInterval = 5 * time.Minute initialMinSpace = 8 * units.GiB // 2^33=8GB slowStoreThreshold = 80 - awakenStoreInterval = 30 * time.Second + awakenStoreInterval = 10 * time.Minute // 2 * slowScoreRecoveryTime // EngineKey is the label key used to indicate engine. EngineKey = "engine" @@ -67,13 +67,12 @@ type StoreInfo struct { // NewStoreInfo creates StoreInfo with meta data. func NewStoreInfo(store *metapb.Store, opts ...StoreCreateOption) *StoreInfo { storeInfo := &StoreInfo{ - meta: store, - storeStats: newStoreStats(), - leaderWeight: 1.0, - regionWeight: 1.0, - limiter: make(map[storelimit.Type]*storelimit.StoreLimit), - minResolvedTS: 0, - lastAwakenTime: time.Now(), + meta: store, + storeStats: newStoreStats(), + leaderWeight: 1.0, + regionWeight: 1.0, + limiter: make(map[storelimit.Type]*storelimit.StoreLimit), + minResolvedTS: 0, } for _, opt := range opts { opt(storeInfo) From 80801a2949122a51b670f2a3869cfe2d9ce812da Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 10 Nov 2022 12:21:52 +0800 Subject: [PATCH 40/67] cluster: eliminate some duplicated code in processRegionHeartbeat (#5693) ref tikv/pd#5648 Eliminate some duplicated code in `processRegionHeartbeat`. Signed-off-by: JmPotato --- server/cluster/cluster.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 9f6eddc0ed5e..0f3b8f214c6a 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -63,6 +63,8 @@ var ( // DefaultMinResolvedTSPersistenceInterval is the default value of min resolved ts persistence interval. // If interval in config is zero, it means not to persist resolved ts and check config with this DefaultMinResolvedTSPersistenceInterval DefaultMinResolvedTSPersistenceInterval = config.DefaultMinResolvedTSPersistenceInterval + regionUpdateCacheEventCounter = regionEventCounter.WithLabelValues("update_cache") + regionUpdateKVEventCounter = regionEventCounter.WithLabelValues("update_kv") ) // regionLabelGCInterval is the interval to run region-label's GC work. @@ -795,6 +797,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { } c.coordinator.CheckTransferWitnessLeader(region) + hasRegionStats := c.regionStats != nil // Save to storage if meta is updated. // Save to cache if meta or leader is updated, or contains any down/pending peer. // Mark isNew if the region in cache does not have leader. @@ -802,7 +805,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { if !saveKV && !saveCache && !isNew { // Due to some config changes need to update the region stats as well, // so we do some extra checks here. - if c.regionStats != nil && c.regionStats.RegionStatsNeedUpdate(region) { + if hasRegionStats && c.regionStats.RegionStatsNeedUpdate(region) { c.regionStats.Observe(region, c.getRegionStoresLocked(region)) } return nil @@ -843,10 +846,10 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { c.core.UpdateStoreStatus(key) } - regionEventCounter.WithLabelValues("update_cache").Inc() + regionUpdateCacheEventCounter.Inc() } - if c.regionStats != nil { + if hasRegionStats { c.regionStats.Observe(region, c.getRegionStoresLocked(region)) } @@ -874,7 +877,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { logutil.ZapRedactStringer("region-meta", core.RegionToHexMeta(region.GetMeta())), errs.ZapError(err)) } - regionEventCounter.WithLabelValues("update_kv").Inc() + regionUpdateKVEventCounter.Inc() } } From bcc38eaa2af06aecf7f676f1c3f2d53e459bbfd5 Mon Sep 17 00:00:00 2001 From: Lucas Date: Thu, 10 Nov 2022 17:17:54 +0800 Subject: [PATCH 41/67] Polish the mechanism on updating `lastAwakenTime` (#5689) close tikv/pd#5678 Polish and refine the strategy on updating `lastAwakenTime` in stores, to remove an extra and unnecessary `Store.Clone(...)`. Signed-off-by: Lucasliang --- server/cluster/cluster.go | 43 ++++++-------- server/cluster/cluster_test.go | 56 ++++++++++--------- server/grpc_service.go | 24 ++------ tests/pdctl/hot/hot_test.go | 26 +++++---- .../server/storage/hot_region_storage_test.go | 2 +- 5 files changed, 67 insertions(+), 84 deletions(-) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 0f3b8f214c6a..091af969af64 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -672,7 +672,8 @@ func (c *RaftCluster) ClearSuspectKeyRanges() { } // HandleStoreHeartbeat updates the store status. -func (c *RaftCluster) HandleStoreHeartbeat(stats *pdpb.StoreStats) error { +func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest, resp *pdpb.StoreHeartbeatResponse) error { + stats := heartbeat.GetStats() storeID := stats.GetStoreId() c.Lock() defer c.Unlock() @@ -680,7 +681,20 @@ func (c *RaftCluster) HandleStoreHeartbeat(stats *pdpb.StoreStats) error { if store == nil { return errors.Errorf("store %v not found", storeID) } - newStore := store.Clone(core.SetStoreStats(stats), core.SetLastHeartbeatTS(time.Now())) + + nowTime := time.Now() + var newStore *core.StoreInfo + // If this cluster has slow stores, we should awaken hibernated regions in other stores. + if needAwaken, slowStoreIDs := c.NeedAwakenAllRegionsInStore(storeID); needAwaken { + log.Info("forcely awaken hibernated regions", zap.Uint64("store-id", storeID), zap.Uint64s("slow-stores", slowStoreIDs)) + newStore = store.Clone(core.SetStoreStats(stats), core.SetLastHeartbeatTS(nowTime), core.SetLastAwakenTime(nowTime)) + resp.AwakenRegions = &pdpb.AwakenRegions{ + AbnormalStores: slowStoreIDs, + } + } else { + newStore = store.Clone(core.SetStoreStats(stats), core.SetLastHeartbeatTS(nowTime)) + } + if newStore.IsLowSpace(c.opt.GetLowSpaceRatio()) { log.Warn("store does not have enough disk space", zap.Uint64("store-id", storeID), @@ -691,7 +705,7 @@ func (c *RaftCluster) HandleStoreHeartbeat(stats *pdpb.StoreStats) error { if err := c.storage.SaveStore(newStore.GetMeta()); err != nil { log.Error("failed to persist store", zap.Uint64("store-id", storeID), errs.ZapError(err)) } else { - newStore = newStore.Clone(core.SetLastPersistTime(time.Now())) + newStore = newStore.Clone(core.SetLastPersistTime(nowTime)) } } if store := c.core.GetStore(storeID); store != nil { @@ -1377,29 +1391,6 @@ func (c *RaftCluster) NeedAwakenAllRegionsInStore(storeID uint64) (needAwaken bo return needAwaken, slowStoreIDs } -// UpdateAwakenStoreTime updates the last awaken time for the store. -func (c *RaftCluster) UpdateAwakenStoreTime(storeID uint64, lastAwakenTime time.Time) error { - c.Lock() - defer c.Unlock() - - store := c.GetStore(storeID) - if store == nil { - return errs.ErrStoreNotFound.FastGenByArgs(storeID) - } - - if store.IsRemoved() { - return errs.ErrStoreRemoved.FastGenByArgs(storeID) - } - - if store.IsPhysicallyDestroyed() { - return errs.ErrStoreDestroyed.FastGenByArgs(storeID) - } - - newStore := store.Clone(core.SetLastAwakenTime(lastAwakenTime)) - - return c.putStoreLocked(newStore) -} - // UpStore up a store from offline func (c *RaftCluster) UpStore(storeID uint64) error { c.Lock() diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index a4e4061eb1ef..fb607d6e4e15 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -64,24 +64,26 @@ func TestStoreHeartbeat(t *testing.T) { re.Equal(int(n), cluster.core.Regions.RegionsInfo.GetRegionCount()) for i, store := range stores { - storeStats := &pdpb.StoreStats{ + req := &pdpb.StoreHeartbeatRequest{} + resp := &pdpb.StoreHeartbeatResponse{} + req.Stats = &pdpb.StoreStats{ StoreId: store.GetID(), Capacity: 100, Available: 50, RegionCount: 1, } - re.Error(cluster.HandleStoreHeartbeat(storeStats)) + re.Error(cluster.HandleStoreHeartbeat(req, resp)) re.NoError(cluster.putStoreLocked(store)) re.Equal(i+1, cluster.GetStoreCount()) re.Equal(int64(0), store.GetLastHeartbeatTS().UnixNano()) - re.NoError(cluster.HandleStoreHeartbeat(storeStats)) + re.NoError(cluster.HandleStoreHeartbeat(req, resp)) s := cluster.GetStore(store.GetID()) re.NotEqual(int64(0), s.GetLastHeartbeatTS().UnixNano()) - re.Equal(storeStats, s.GetStoreStats()) + re.Equal(req.GetStats(), s.GetStoreStats()) storeMetasAfterHeartbeat = append(storeMetasAfterHeartbeat, s.GetMeta()) } @@ -95,7 +97,9 @@ func TestStoreHeartbeat(t *testing.T) { re.NoError(err) re.Equal(storeMetasAfterHeartbeat[i], tmp) } - hotHeartBeat := &pdpb.StoreStats{ + hotReq := &pdpb.StoreHeartbeatRequest{} + hotResp := &pdpb.StoreHeartbeatResponse{} + hotReq.Stats = &pdpb.StoreStats{ StoreId: 1, RegionCount: 1, Interval: &pdpb.TimeInterval{ @@ -113,7 +117,10 @@ func TestStoreHeartbeat(t *testing.T) { }, }, } - coldHeartBeat := &pdpb.StoreStats{ + hotHeartBeat := hotReq.GetStats() + coldReq := &pdpb.StoreHeartbeatRequest{} + coldResp := &pdpb.StoreHeartbeatResponse{} + coldReq.Stats = &pdpb.StoreStats{ StoreId: 1, RegionCount: 1, Interval: &pdpb.TimeInterval{ @@ -122,9 +129,9 @@ func TestStoreHeartbeat(t *testing.T) { }, PeerStats: []*pdpb.PeerStat{}, } - re.NoError(cluster.HandleStoreHeartbeat(hotHeartBeat)) - re.NoError(cluster.HandleStoreHeartbeat(hotHeartBeat)) - re.NoError(cluster.HandleStoreHeartbeat(hotHeartBeat)) + re.NoError(cluster.HandleStoreHeartbeat(hotReq, hotResp)) + re.NoError(cluster.HandleStoreHeartbeat(hotReq, hotResp)) + re.NoError(cluster.HandleStoreHeartbeat(hotReq, hotResp)) time.Sleep(20 * time.Millisecond) storeStats := cluster.hotStat.RegionStats(statistics.Read, 3) re.Len(storeStats[1], 1) @@ -135,32 +142,32 @@ func TestStoreHeartbeat(t *testing.T) { re.Equal(float64(hotHeartBeat.PeerStats[0].ReadKeys)/interval, storeStats[1][0].Loads[statistics.KeyDim]) re.Equal(float64(hotHeartBeat.PeerStats[0].QueryStats.Get)/interval, storeStats[1][0].Loads[statistics.QueryDim]) // After cold heartbeat, we won't find region 1 peer in regionStats - re.NoError(cluster.HandleStoreHeartbeat(coldHeartBeat)) + re.NoError(cluster.HandleStoreHeartbeat(coldReq, coldResp)) time.Sleep(20 * time.Millisecond) storeStats = cluster.hotStat.RegionStats(statistics.Read, 1) re.Empty(storeStats[1]) // After hot heartbeat, we can find region 1 peer again - re.NoError(cluster.HandleStoreHeartbeat(hotHeartBeat)) + re.NoError(cluster.HandleStoreHeartbeat(hotReq, hotResp)) time.Sleep(20 * time.Millisecond) storeStats = cluster.hotStat.RegionStats(statistics.Read, 3) re.Len(storeStats[1], 1) re.Equal(uint64(1), storeStats[1][0].RegionID) // after several cold heartbeats, and one hot heartbeat, we also can't find region 1 peer - re.NoError(cluster.HandleStoreHeartbeat(coldHeartBeat)) - re.NoError(cluster.HandleStoreHeartbeat(coldHeartBeat)) - re.NoError(cluster.HandleStoreHeartbeat(coldHeartBeat)) + re.NoError(cluster.HandleStoreHeartbeat(coldReq, coldResp)) + re.NoError(cluster.HandleStoreHeartbeat(coldReq, coldResp)) + re.NoError(cluster.HandleStoreHeartbeat(coldReq, coldResp)) time.Sleep(20 * time.Millisecond) storeStats = cluster.hotStat.RegionStats(statistics.Read, 0) re.Empty(storeStats[1]) - re.Nil(cluster.HandleStoreHeartbeat(hotHeartBeat)) + re.Nil(cluster.HandleStoreHeartbeat(hotReq, hotResp)) time.Sleep(20 * time.Millisecond) storeStats = cluster.hotStat.RegionStats(statistics.Read, 1) re.Len(storeStats[1], 0) storeStats = cluster.hotStat.RegionStats(statistics.Read, 3) re.Empty(storeStats[1]) // after 2 hot heartbeats, wo can find region 1 peer again - re.NoError(cluster.HandleStoreHeartbeat(hotHeartBeat)) - re.NoError(cluster.HandleStoreHeartbeat(hotHeartBeat)) + re.NoError(cluster.HandleStoreHeartbeat(hotReq, hotResp)) + re.NoError(cluster.HandleStoreHeartbeat(hotReq, hotResp)) time.Sleep(20 * time.Millisecond) storeStats = cluster.hotStat.RegionStats(statistics.Read, 3) re.Len(storeStats[1], 1) @@ -177,20 +184,22 @@ func TestFilterUnhealthyStore(t *testing.T) { cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) stores := newTestStores(3, "2.0.0") + req := &pdpb.StoreHeartbeatRequest{} + resp := &pdpb.StoreHeartbeatResponse{} for _, store := range stores { - storeStats := &pdpb.StoreStats{ + req.Stats = &pdpb.StoreStats{ StoreId: store.GetID(), Capacity: 100, Available: 50, RegionCount: 1, } re.NoError(cluster.putStoreLocked(store)) - re.NoError(cluster.HandleStoreHeartbeat(storeStats)) + re.NoError(cluster.HandleStoreHeartbeat(req, resp)) re.NotNil(cluster.hotStat.GetRollingStoreStats(store.GetID())) } for _, store := range stores { - storeStats := &pdpb.StoreStats{ + req.Stats = &pdpb.StoreStats{ StoreId: store.GetID(), Capacity: 100, Available: 50, @@ -198,7 +207,7 @@ func TestFilterUnhealthyStore(t *testing.T) { } newStore := store.Clone(core.TombstoneStore()) re.NoError(cluster.putStoreLocked(newStore)) - re.NoError(cluster.HandleStoreHeartbeat(storeStats)) + re.NoError(cluster.HandleStoreHeartbeat(req, resp)) re.Nil(cluster.hotStat.GetRollingStoreStats(store.GetID())) } } @@ -1768,12 +1777,9 @@ func TestAwakenStore(t *testing.T) { } now := time.Now() - store4 := stores[0].Clone(core.SetLastHeartbeatTS(now), core.SetLastAwakenTime(now.Add(-6*time.Minute))) + store4 := stores[0].Clone(core.SetLastHeartbeatTS(now), core.SetLastAwakenTime(now.Add(-11*time.Minute))) re.NoError(cluster.putStoreLocked(store4)) store1 := cluster.GetStore(1) - re.False(store1.NeedAwakenStore()) - re.NoError(cluster.UpdateAwakenStoreTime(1, now.Add(-11*time.Minute))) - store1 = cluster.GetStore(1) re.True(store1.NeedAwakenStore()) } diff --git a/server/grpc_service.go b/server/grpc_service.go index 2e111a129237..de7aa718870c 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -635,13 +635,14 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear }, nil } + resp := &pdpb.StoreHeartbeatResponse{Header: s.header()} // Bypass stats handling if the store report for unsafe recover is not empty. if request.GetStoreReport() == nil { storeAddress := store.GetAddress() storeLabel := strconv.FormatUint(storeID, 10) start := time.Now() - err := rc.HandleStoreHeartbeat(request.GetStats()) + err := rc.HandleStoreHeartbeat(request, resp) if err != nil { return &pdpb.StoreHeartbeatResponse{ Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, @@ -658,27 +659,10 @@ func (s *GrpcServer) StoreHeartbeat(ctx context.Context, request *pdpb.StoreHear rc.GetReplicationMode().UpdateStoreDRStatus(request.GetStats().GetStoreId(), status) } - resp := &pdpb.StoreHeartbeatResponse{ - Header: s.header(), - ReplicationStatus: rc.GetReplicationMode().GetReplicationStatus(), - ClusterVersion: rc.GetClusterVersion(), - } + resp.ReplicationStatus = rc.GetReplicationMode().GetReplicationStatus() + resp.ClusterVersion = rc.GetClusterVersion() rc.GetUnsafeRecoveryController().HandleStoreHeartbeat(request, resp) - // If this cluster has slow stores, we should awaken hibernated regions in other stores. - // TODO: waited to be polished. It's recommended to merge following AwakenRegions checking - // and UpdateAwakenStoreTime into HandlStoreHeartbeat. - if needAwaken, slowStoreIDs := rc.NeedAwakenAllRegionsInStore(storeID); needAwaken { - log.Info("forcely awaken hibernated regions", zap.Uint64("store-id", storeID), zap.Uint64s("slow-stores", slowStoreIDs)) - err := rc.UpdateAwakenStoreTime(storeID, time.Now()) - if err != nil { - log.Warn("failed to awaken hibernated regions in store", zap.Uint64("store-id", storeID)) - } else { - resp.AwakenRegions = &pdpb.AwakenRegions{ - AbnormalStores: slowStoreIDs, - } - } - } return resp, nil } diff --git a/tests/pdctl/hot/hot_test.go b/tests/pdctl/hot/hot_test.go index ae81d49c95b8..7bab989ccc50 100644 --- a/tests/pdctl/hot/hot_test.go +++ b/tests/pdctl/hot/hot_test.go @@ -394,18 +394,20 @@ func TestHotWithoutHotPeer(t *testing.T) { load := 1024.0 for _, store := range stores { for i := 0; i < 5; i++ { - err := leaderServer.GetServer().GetRaftCluster().HandleStoreHeartbeat(&pdpb.StoreStats{ - StoreId: store.Id, - BytesRead: uint64(load * statistics.StoreHeartBeatReportInterval), - KeysRead: uint64(load * statistics.StoreHeartBeatReportInterval), - BytesWritten: uint64(load * statistics.StoreHeartBeatReportInterval), - KeysWritten: uint64(load * statistics.StoreHeartBeatReportInterval), - Capacity: 1000 * units.MiB, - Available: 1000 * units.MiB, - Interval: &pdpb.TimeInterval{ - StartTimestamp: timestamp + uint64(i*statistics.StoreHeartBeatReportInterval), - EndTimestamp: timestamp + uint64((i+1)*statistics.StoreHeartBeatReportInterval)}, - }) + err := leaderServer.GetServer().GetRaftCluster().HandleStoreHeartbeat(&pdpb.StoreHeartbeatRequest{ + Stats: &pdpb.StoreStats{ + StoreId: store.Id, + BytesRead: uint64(load * statistics.StoreHeartBeatReportInterval), + KeysRead: uint64(load * statistics.StoreHeartBeatReportInterval), + BytesWritten: uint64(load * statistics.StoreHeartBeatReportInterval), + KeysWritten: uint64(load * statistics.StoreHeartBeatReportInterval), + Capacity: 1000 * units.MiB, + Available: 1000 * units.MiB, + Interval: &pdpb.TimeInterval{ + StartTimestamp: timestamp + uint64(i*statistics.StoreHeartBeatReportInterval), + EndTimestamp: timestamp + uint64((i+1)*statistics.StoreHeartBeatReportInterval)}, + }, + }, &pdpb.StoreHeartbeatResponse{}) re.NoError(err) } } diff --git a/tests/server/storage/hot_region_storage_test.go b/tests/server/storage/hot_region_storage_test.go index 9432ceb0c77b..39fe345a91b9 100644 --- a/tests/server/storage/hot_region_storage_test.go +++ b/tests/server/storage/hot_region_storage_test.go @@ -93,7 +93,7 @@ func TestHotRegionStorage(t *testing.T) { }, } for _, storeStats := range storeStats { - leaderServer.GetRaftCluster().HandleStoreHeartbeat(storeStats) + leaderServer.GetRaftCluster().HandleStoreHeartbeat(&pdpb.StoreHeartbeatRequest{Stats: storeStats}, &pdpb.StoreHeartbeatResponse{}) } // wait hot scheduler starts time.Sleep(5000 * time.Millisecond) From 818b8657795fa65510bedc169335c14681ce90b3 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 10 Nov 2022 17:29:53 +0800 Subject: [PATCH 42/67] api, ctl: support get the top CPU usage regions (#5696) close tikv/pd#4042 Since the region CPU metering is on by default now on TiKV side, this PR supports to get the top CPU usage regions with command `pd-ctl region topcpu`. Signed-off-by: JmPotato Co-authored-by: Ti Chi Robot --- server/api/region.go | 15 ++++++++++ server/api/region_test.go | 31 +++++++++++++------- server/api/router.go | 1 + server/core/region.go | 12 ++++++++ server/core/region_option.go | 7 +++++ tools/pd-ctl/pdctl/command/region_command.go | 9 ++++++ 6 files changed, 65 insertions(+), 10 deletions(-) diff --git a/server/api/region.go b/server/api/region.go index 4b3fbfd3d30f..796c3acafa4d 100644 --- a/server/api/region.go +++ b/server/api/region.go @@ -111,6 +111,7 @@ type RegionInfo struct { Leader MetaPeer `json:"leader,omitempty"` DownPeers []PDPeerStats `json:"down_peers,omitempty"` PendingPeers []MetaPeer `json:"pending_peers,omitempty"` + CPUUsage uint64 `json:"cpu_usage"` WrittenBytes uint64 `json:"written_bytes"` ReadBytes uint64 `json:"read_bytes"` WrittenKeys uint64 `json:"written_keys"` @@ -158,6 +159,7 @@ func InitRegion(r *core.RegionInfo, s *RegionInfo) *RegionInfo { s.Leader = fromPeer(r.GetLeader()) s.DownPeers = fromPeerStatsSlice(r.GetDownPeers()) s.PendingPeers = fromPeerSlice(r.GetPendingPeers()) + s.CPUUsage = r.GetCPUUsage() s.WrittenBytes = r.GetBytesWritten() s.WrittenKeys = r.GetKeysWritten() s.ReadBytes = r.GetBytesRead() @@ -770,6 +772,19 @@ func (h *regionsHandler) GetTopKeysRegions(w http.ResponseWriter, r *http.Reques }) } +// @Tags region +// @Summary List regions with the highest CPU usage. +// @Param limit query integer false "Limit count" default(16) +// @Produce json +// @Success 200 {object} RegionsInfo +// @Failure 400 {string} string "The input is invalid." +// @Router /regions/cpu [get] +func (h *regionsHandler) GetTopCPURegions(w http.ResponseWriter, r *http.Request) { + h.GetTopNRegions(w, r, func(a, b *core.RegionInfo) bool { + return a.GetCPUUsage() < b.GetCPUUsage() + }) +} + // @Tags region // @Summary Accelerate regions scheduling a in given range, only receive hex format for keys // @Accept json diff --git a/server/api/region_test.go b/server/api/region_test.go index a5dfc1ffcba3..0796cd49c97b 100644 --- a/server/api/region_test.go +++ b/server/api/region_test.go @@ -291,7 +291,8 @@ func (suite *regionTestSuite) TestStoreRegions() { suite.Len(regionIDs, r6.Count) } -func (suite *regionTestSuite) TestTopFlow() { +func (suite *regionTestSuite) TestTop() { + // Top flow. re := suite.Require() r1 := newTestRegionInfo(1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(1000), core.SetReadBytes(1000), core.SetRegionConfVer(1), core.SetRegionVersion(1)) mustRegionHeartbeat(re, suite.svr, r1) @@ -306,22 +307,32 @@ func (suite *regionTestSuite) TestTopFlow() { suite.checkTopRegions(fmt.Sprintf("%s/regions/confver?limit=2", suite.urlPrefix), []uint64{3, 2}) suite.checkTopRegions(fmt.Sprintf("%s/regions/version", suite.urlPrefix), []uint64{2, 3, 1}) suite.checkTopRegions(fmt.Sprintf("%s/regions/version?limit=2", suite.urlPrefix), []uint64{2, 3}) -} - -func (suite *regionTestSuite) TestTopSize() { - re := suite.Require() + // Top size. baseOpt := []core.RegionCreateOption{core.SetRegionConfVer(3), core.SetRegionVersion(3)} opt := core.SetApproximateSize(1000) - r1 := newTestRegionInfo(7, 1, []byte("a"), []byte("b"), append(baseOpt, opt)...) + r1 = newTestRegionInfo(1, 1, []byte("a"), []byte("b"), append(baseOpt, opt)...) mustRegionHeartbeat(re, suite.svr, r1) opt = core.SetApproximateSize(900) - r2 := newTestRegionInfo(8, 1, []byte("b"), []byte("c"), append(baseOpt, opt)...) + r2 = newTestRegionInfo(2, 1, []byte("b"), []byte("c"), append(baseOpt, opt)...) mustRegionHeartbeat(re, suite.svr, r2) opt = core.SetApproximateSize(800) - r3 := newTestRegionInfo(9, 1, []byte("c"), []byte("d"), append(baseOpt, opt)...) + r3 = newTestRegionInfo(3, 1, []byte("c"), []byte("d"), append(baseOpt, opt)...) + mustRegionHeartbeat(re, suite.svr, r3) + suite.checkTopRegions(fmt.Sprintf("%s/regions/size?limit=2", suite.urlPrefix), []uint64{1, 2}) + suite.checkTopRegions(fmt.Sprintf("%s/regions/size", suite.urlPrefix), []uint64{1, 2, 3}) + // Top CPU usage. + baseOpt = []core.RegionCreateOption{core.SetRegionConfVer(4), core.SetRegionVersion(4)} + opt = core.SetCPUUsage(100) + r1 = newTestRegionInfo(1, 1, []byte("a"), []byte("b"), append(baseOpt, opt)...) + mustRegionHeartbeat(re, suite.svr, r1) + opt = core.SetCPUUsage(300) + r2 = newTestRegionInfo(2, 1, []byte("b"), []byte("c"), append(baseOpt, opt)...) + mustRegionHeartbeat(re, suite.svr, r2) + opt = core.SetCPUUsage(500) + r3 = newTestRegionInfo(3, 1, []byte("c"), []byte("d"), append(baseOpt, opt)...) mustRegionHeartbeat(re, suite.svr, r3) - // query with limit - suite.checkTopRegions(fmt.Sprintf("%s/regions/size?limit=%d", suite.urlPrefix, 2), []uint64{7, 8}) + suite.checkTopRegions(fmt.Sprintf("%s/regions/cpu?limit=2", suite.urlPrefix), []uint64{3, 2}) + suite.checkTopRegions(fmt.Sprintf("%s/regions/cpu", suite.urlPrefix), []uint64{3, 2, 1}) } func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRange() { diff --git a/server/api/router.go b/server/api/router.go index e98bd86f4b3d..2c750b12eb76 100644 --- a/server/api/router.go +++ b/server/api/router.go @@ -251,6 +251,7 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { registerFunc(clusterRouter, "/regions/version", regionsHandler.GetTopVersionRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/size", regionsHandler.GetTopSizeRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/keys", regionsHandler.GetTopKeysRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/regions/cpu", regionsHandler.GetTopCPURegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/check/miss-peer", regionsHandler.GetMissPeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/check/extra-peer", regionsHandler.GetExtraPeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/check/pending-peer", regionsHandler.GetPendingPeerRegions, setMethods(http.MethodGet), setAuditBackend(prometheus)) diff --git a/server/core/region.go b/server/core/region.go index 6be0ece03ae4..14792ab3d46c 100644 --- a/server/core/region.go +++ b/server/core/region.go @@ -54,6 +54,7 @@ type RegionInfo struct { leader *metapb.Peer downPeers []*pdpb.PeerStats pendingPeers []*metapb.Peer + cpuUsage uint64 writtenBytes uint64 writtenKeys uint64 readBytes uint64 @@ -153,6 +154,7 @@ func RegionFromHeartbeat(heartbeat *pdpb.RegionHeartbeatRequest, opts ...RegionC leader: heartbeat.GetLeader(), downPeers: heartbeat.GetDownPeers(), pendingPeers: heartbeat.GetPendingPeers(), + cpuUsage: heartbeat.GetCpuUsage(), writtenBytes: heartbeat.GetBytesWritten(), writtenKeys: heartbeat.GetKeysWritten(), readBytes: heartbeat.GetBytesRead(), @@ -218,6 +220,7 @@ func (r *RegionInfo) Clone(opts ...RegionCreateOption) *RegionInfo { leader: typeutil.DeepClone(r.leader, RegionPeerFactory), downPeers: downPeers, pendingPeers: pendingPeers, + cpuUsage: r.cpuUsage, writtenBytes: r.writtenBytes, writtenKeys: r.writtenKeys, readBytes: r.readBytes, @@ -493,6 +496,15 @@ func (r *RegionInfo) GetPendingPeers() []*metapb.Peer { return r.pendingPeers } +// GetCPUUsage returns the CPU usage of the region since the last heartbeat. +// The number range is [0, N * 100], where N is the number of CPU cores. +// However, since the TiKV basically only meters the CPU usage inside the +// Unified Read Pool, it should be considered as an indicator of Region read +// CPU overhead for now. +func (r *RegionInfo) GetCPUUsage() uint64 { + return r.cpuUsage +} + // GetBytesRead returns the read bytes of the region. func (r *RegionInfo) GetBytesRead() uint64 { return r.readBytes diff --git a/server/core/region_option.go b/server/core/region_option.go index 64e329fd3b33..b405dabab337 100644 --- a/server/core/region_option.go +++ b/server/core/region_option.go @@ -170,6 +170,13 @@ func WithDecConfVer() RegionCreateOption { } } +// SetCPUUsage sets the CPU usage of the region. +func SetCPUUsage(v uint64) RegionCreateOption { + return func(region *RegionInfo) { + region.cpuUsage = v + } +} + // SetWrittenBytes sets the written bytes for the region. func SetWrittenBytes(v uint64) RegionCreateOption { return func(region *RegionInfo) { diff --git a/tools/pd-ctl/pdctl/command/region_command.go b/tools/pd-ctl/pdctl/command/region_command.go index 9e07abff2fc7..fcebb30e6d88 100644 --- a/tools/pd-ctl/pdctl/command/region_command.go +++ b/tools/pd-ctl/pdctl/command/region_command.go @@ -41,6 +41,7 @@ var ( regionsVersionPrefix = "pd/api/v1/regions/version" regionsSizePrefix = "pd/api/v1/regions/size" regionTopKeysPrefix = "pd/api/v1/regions/keys" + regionTopCPUPrefix = "pd/api/v1/regions/cpu" regionsKeyPrefix = "pd/api/v1/regions/key" regionsSiblingPrefix = "pd/api/v1/regions/sibling" regionsRangeHolesPrefix = "pd/api/v1/regions/range-holes" @@ -110,6 +111,14 @@ func NewRegionCommand() *cobra.Command { topKeys.Flags().String("jq", "", "jq query") r.AddCommand(topKeys) + topCPU := &cobra.Command{ + Use: `topcpu [--jq=""]`, + Short: "show regions with top CPU usage", + Run: showRegionsTopCommand(regionTopCPUPrefix), + } + topCPU.Flags().String("jq", "", "jq query") + r.AddCommand(topCPU) + scanRegion := &cobra.Command{ Use: `scan [--jq=""]`, Short: "scan all regions", From 53cb66d13b39822c6ef71d470930a6f1b59072d8 Mon Sep 17 00:00:00 2001 From: Mike <842725815@qq.com> Date: Thu, 10 Nov 2022 19:59:53 +0800 Subject: [PATCH 43/67] Add curl command into docker image (#5695) ref pingcap/tidb-operator#4764 Add curl command into docker image. Signed-off-by: mikechengwei <842725815@qq.com> --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 11d67f7e7fda..db4004a36095 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,6 +33,9 @@ COPY --from=builder /go/src/github.com/tikv/pd/bin/pd-ctl /pd-ctl COPY --from=builder /go/src/github.com/tikv/pd/bin/pd-recover /pd-recover COPY --from=builder /jq /usr/local/bin/jq +RUN apk add --no-cache \ + curl + EXPOSE 2379 2380 ENTRYPOINT ["/pd-server"] From 179dbe14e73ec3a6aed65b062665b4086572fd32 Mon Sep 17 00:00:00 2001 From: buffer <1045931706@qq.com> Date: Thu, 10 Nov 2022 21:51:53 +0800 Subject: [PATCH 44/67] mod: upgrade log module (#5694) ref pingcap/log#30, ref tikv/pd#5604 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: Ti Chi Robot --- client/go.mod | 2 +- client/go.sum | 4 ++-- go.mod | 2 +- go.sum | 4 ++-- tests/client/go.mod | 2 +- tests/client/go.sum | 4 ++-- tools/pd-tso-bench/go.mod | 2 +- tools/pd-tso-bench/go.sum | 4 ++-- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/client/go.mod b/client/go.mod index 5978c3372c7b..09277d77a4d9 100644 --- a/client/go.mod +++ b/client/go.mod @@ -7,7 +7,7 @@ require ( github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 - github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 + github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/prometheus/client_golang v1.11.0 github.com/stretchr/testify v1.7.0 go.uber.org/goleak v1.1.11 diff --git a/client/go.sum b/client/go.sum index a87ac7297a13..32758cfe55e6 100644 --- a/client/go.sum +++ b/client/go.sum @@ -106,8 +106,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= -github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= +github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= diff --git a/go.mod b/go.mod index ded557001810..0ea251607306 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 - github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 + github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d github.com/prometheus/client_golang v1.1.0 diff --git a/go.sum b/go.sum index 92abde26b910..526c737c18e3 100644 --- a/go.sum +++ b/go.sum @@ -423,8 +423,8 @@ github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIf github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v0.0.0-20210906054005-afc726e70354/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= -github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= -github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= +github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM= github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d h1:qH0xCmmBSRgWV71c753o/1FTtBOWJSk78dBsqPQ4oC4= diff --git a/tests/client/go.mod b/tests/client/go.mod index fb7bd3779e17..70644bda3caf 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -92,7 +92,7 @@ require ( github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 // indirect github.com/pingcap/errcode v0.3.0 // indirect github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect - github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect + github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 // indirect github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d // indirect github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect diff --git a/tests/client/go.sum b/tests/client/go.sum index f72acf175291..914456539682 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -396,8 +396,8 @@ github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIf github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v0.0.0-20210906054005-afc726e70354/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= -github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= -github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= +github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM= github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d h1:qH0xCmmBSRgWV71c753o/1FTtBOWJSk78dBsqPQ4oC4= diff --git a/tools/pd-tso-bench/go.mod b/tools/pd-tso-bench/go.mod index 5864ea5d9dee..ee4fa4576996 100644 --- a/tools/pd-tso-bench/go.mod +++ b/tools/pd-tso-bench/go.mod @@ -5,7 +5,7 @@ go 1.16 require ( github.com/influxdata/tdigest v0.0.1 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c - github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 + github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/prometheus/client_golang v1.11.0 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 go.uber.org/zap v1.20.0 diff --git a/tools/pd-tso-bench/go.sum b/tools/pd-tso-bench/go.sum index 53b4231cdc6b..a7b5f5a29814 100644 --- a/tools/pd-tso-bench/go.sum +++ b/tools/pd-tso-bench/go.sum @@ -108,8 +108,8 @@ github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad h1:lGKxsEwdE0pVXzH github.com/pingcap/kvproto v0.0.0-20220818063303-5c20f55db5ad/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= -github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= +github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= From fcfcda3461c6f4c11b7bca9be751916d652d4d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B7=B7=E6=B2=8CDM?= Date: Fri, 11 Nov 2022 15:13:53 +0800 Subject: [PATCH 45/67] scheduler: fix the bug that balance-hot-region-scheduler does not store config when it starts (#5702) close tikv/pd#5701 Signed-off-by: HunDunDM --- server/cluster/coordinator_test.go | 9 +++++++++ server/schedulers/hot_region.go | 4 ++++ server/schedulers/hot_region_test.go | 14 ++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/server/cluster/coordinator_test.go b/server/cluster/coordinator_test.go index 84c43b7e60bf..1da1df9535a7 100644 --- a/server/cluster/coordinator_test.go +++ b/server/cluster/coordinator_test.go @@ -709,6 +709,15 @@ func TestAddScheduler(t *testing.T) { re.NoError(err) re.NoError(co.addScheduler(gls)) + hb, err := schedule.CreateScheduler(schedulers.HotRegionType, oc, storage.NewStorageWithMemoryBackend(), schedule.ConfigJSONDecoder([]byte("{}"))) + re.NoError(err) + conf, err = hb.EncodeConfig() + re.NoError(err) + data = make(map[string]interface{}) + re.NoError(json.Unmarshal(conf, &data)) + re.Contains(data, "enable-for-tiflash") + re.Equal("true", data["enable-for-tiflash"].(string)) + // Transfer all leaders to store 1. waitOperator(re, co, 2) region2 := tc.GetRegion(2) diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index 4386d5daa267..0916f7d6cb12 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -134,6 +134,10 @@ func (h *hotScheduler) GetType() string { return HotRegionType } +func (h *hotScheduler) EncodeConfig() ([]byte, error) { + return h.conf.EncodeConfig() +} + func (h *hotScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { h.conf.ServeHTTP(w, r) } diff --git a/server/schedulers/hot_region_test.go b/server/schedulers/hot_region_test.go index a4f11ac67d53..b4e148aaac45 100644 --- a/server/schedulers/hot_region_test.go +++ b/server/schedulers/hot_region_test.go @@ -2618,3 +2618,17 @@ func TestExpect(t *testing.T) { re.Equal(testCase.allow, bs.checkDstByPriorityAndTolerance(srcToDst(testCase.load), srcToDst(testCase.expect), toleranceRatio)) } } + +// ref https://github.com/tikv/pd/issues/5701 +func TestEncodeConfig(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + opt := config.NewTestOptions() + tc := mockcluster.NewCluster(ctx, opt) + sche, err := schedule.CreateScheduler(HotRegionType, schedule.NewOperatorController(ctx, tc, nil), storage.NewStorageWithMemoryBackend(), schedule.ConfigJSONDecoder([]byte("null"))) + re.NoError(err) + data, err := sche.EncodeConfig() + re.NoError(err) + re.NotEqual("null", string(data)) +} From 674375a99f83f530af28007f58ac5ffd15b129d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B7=B7=E6=B2=8CDM?= Date: Fri, 11 Nov 2022 15:25:53 +0800 Subject: [PATCH 46/67] scheduler: keep rank-formula-version at v1 when upgrading from a lower version (#5699) close tikv/pd#5698 Signed-off-by: HunDunDM Co-authored-by: Ti Chi Robot --- server/schedulers/hot_region.go | 4 +++ server/schedulers/hot_region_test.go | 42 ++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index 0916f7d6cb12..a504a29f4fa5 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -53,8 +53,12 @@ func init() { } if len(data) != 0 { // After upgrading, use compatible config. + // For clusters with the initial version >= v5.2, it will be overwritten by the default config. conf.applyPrioritiesConfig(compatiblePrioritiesConfig) + // For clusters with the initial version >= v6.4, it will be overwritten by the default config. + conf.SetRankFormulaVersion("") + if err := decoder(conf); err != nil { return nil, err } diff --git a/server/schedulers/hot_region_test.go b/server/schedulers/hot_region_test.go index b4e148aaac45..4e937c073027 100644 --- a/server/schedulers/hot_region_test.go +++ b/server/schedulers/hot_region_test.go @@ -67,6 +67,48 @@ func clearPendingInfluence(h *hotScheduler) { h.regionPendings = make(map[uint64]*pendingInfluence) } +func TestUpgrade(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + opt := config.NewTestOptions() + tc := mockcluster.NewCluster(ctx, opt) + // new + sche, err := schedule.CreateScheduler(HotRegionType, schedule.NewOperatorController(ctx, tc, nil), storage.NewStorageWithMemoryBackend(), schedule.ConfigSliceDecoder(HotRegionType, nil)) + re.NoError(err) + hb := sche.(*hotScheduler) + re.Equal([]string{statistics.QueryPriority, statistics.BytePriority}, hb.conf.GetReadPriorities()) + re.Equal([]string{statistics.QueryPriority, statistics.BytePriority}, hb.conf.GetWriteLeaderPriorities()) + re.Equal([]string{statistics.BytePriority, statistics.KeyPriority}, hb.conf.GetWritePeerPriorities()) + re.Equal("v2", hb.conf.GetRankFormulaVersion()) + // upgrade from json(null) + sche, err = schedule.CreateScheduler(HotRegionType, schedule.NewOperatorController(ctx, tc, nil), storage.NewStorageWithMemoryBackend(), schedule.ConfigJSONDecoder([]byte("null"))) + re.NoError(err) + hb = sche.(*hotScheduler) + re.Equal([]string{statistics.QueryPriority, statistics.BytePriority}, hb.conf.GetReadPriorities()) + re.Equal([]string{statistics.QueryPriority, statistics.BytePriority}, hb.conf.GetWriteLeaderPriorities()) + re.Equal([]string{statistics.BytePriority, statistics.KeyPriority}, hb.conf.GetWritePeerPriorities()) + re.Equal("v2", hb.conf.GetRankFormulaVersion()) + // upgrade from < 5.2 + config51 := `{"min-hot-byte-rate":100,"min-hot-key-rate":10,"min-hot-query-rate":10,"max-zombie-rounds":5,"max-peer-number":1000,"byte-rate-rank-step-ratio":0.05,"key-rate-rank-step-ratio":0.05,"query-rate-rank-step-ratio":0.05,"count-rank-step-ratio":0.01,"great-dec-ratio":0.95,"minor-dec-ratio":0.99,"src-tolerance-ratio":1.05,"dst-tolerance-ratio":1.05,"strict-picking-store":"true","enable-for-tiflash":"true"}` + sche, err = schedule.CreateScheduler(HotRegionType, schedule.NewOperatorController(ctx, tc, nil), storage.NewStorageWithMemoryBackend(), schedule.ConfigJSONDecoder([]byte(config51))) + re.NoError(err) + hb = sche.(*hotScheduler) + re.Equal([]string{statistics.BytePriority, statistics.KeyPriority}, hb.conf.GetReadPriorities()) + re.Equal([]string{statistics.KeyPriority, statistics.BytePriority}, hb.conf.GetWriteLeaderPriorities()) + re.Equal([]string{statistics.BytePriority, statistics.KeyPriority}, hb.conf.GetWritePeerPriorities()) + re.Equal("v1", hb.conf.GetRankFormulaVersion()) + // upgrade from < 6.4 + config54 := `{"min-hot-byte-rate":100,"min-hot-key-rate":10,"min-hot-query-rate":10,"max-zombie-rounds":5,"max-peer-number":1000,"byte-rate-rank-step-ratio":0.05,"key-rate-rank-step-ratio":0.05,"query-rate-rank-step-ratio":0.05,"count-rank-step-ratio":0.01,"great-dec-ratio":0.95,"minor-dec-ratio":0.99,"src-tolerance-ratio":1.05,"dst-tolerance-ratio":1.05,"read-priorities":["query","byte"],"write-leader-priorities":["query","byte"],"write-peer-priorities":["byte","key"],"strict-picking-store":"true","enable-for-tiflash":"true","forbid-rw-type":"none"}` + sche, err = schedule.CreateScheduler(HotRegionType, schedule.NewOperatorController(ctx, tc, nil), storage.NewStorageWithMemoryBackend(), schedule.ConfigJSONDecoder([]byte(config54))) + re.NoError(err) + hb = sche.(*hotScheduler) + re.Equal([]string{statistics.QueryPriority, statistics.BytePriority}, hb.conf.GetReadPriorities()) + re.Equal([]string{statistics.QueryPriority, statistics.BytePriority}, hb.conf.GetWriteLeaderPriorities()) + re.Equal([]string{statistics.BytePriority, statistics.KeyPriority}, hb.conf.GetWritePeerPriorities()) + re.Equal("v1", hb.conf.GetRankFormulaVersion()) +} + func TestGCPendingOpInfos(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) From e6eef7cecd59afa6425a39a85f92d4742b212168 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 14 Nov 2022 11:51:54 +0800 Subject: [PATCH 47/67] tools: fix the inconsistency of available space (#5670) ref tikv/pd#5468 Signed-off-by: Ryan Leung --- .../pd-simulator/simulator/cases/add_nodes.go | 7 +--- .../simulator/cases/add_nodes_dynamic.go | 7 +--- .../simulator/cases/balance_leader.go | 7 +--- ...nt_balance_region.go => balance_region.go} | 23 ++++------- tools/pd-simulator/simulator/cases/cases.go | 16 ++++---- .../simulator/cases/delete_nodes.go | 7 +--- .../cases/diagnose_label_isolation.go | 41 ++++++------------- .../simulator/cases/diagnose_rule.go | 14 ++----- .../pd-simulator/simulator/cases/hot_read.go | 7 +--- .../pd-simulator/simulator/cases/hot_write.go | 7 +--- .../simulator/cases/import_data.go | 7 +--- .../simulator/cases/makeup_down_replica.go | 7 +--- .../simulator/cases/region_merge.go | 7 +--- .../simulator/cases/region_split.go | 7 +--- tools/pd-simulator/simulator/config.go | 10 +++-- tools/pd-simulator/simulator/drive.go | 12 ++++++ tools/pd-simulator/simulator/event.go | 9 ++-- tools/pd-simulator/simulator/node.go | 5 ++- 18 files changed, 77 insertions(+), 123 deletions(-) rename tools/pd-simulator/simulator/cases/{redundant_balance_region.go => balance_region.go} (82%) diff --git a/tools/pd-simulator/simulator/cases/add_nodes.go b/tools/pd-simulator/simulator/cases/add_nodes.go index 71e4cec0da96..22aa935ab71c 100644 --- a/tools/pd-simulator/simulator/cases/add_nodes.go +++ b/tools/pd-simulator/simulator/cases/add_nodes.go @@ -34,11 +34,8 @@ func newAddNodes() *Case { for i := 1; i <= storeNum; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/add_nodes_dynamic.go b/tools/pd-simulator/simulator/cases/add_nodes_dynamic.go index a1dd8394d6c6..76b6e0c8339d 100644 --- a/tools/pd-simulator/simulator/cases/add_nodes_dynamic.go +++ b/tools/pd-simulator/simulator/cases/add_nodes_dynamic.go @@ -34,11 +34,8 @@ func newAddNodesDynamic() *Case { for i := 1; i <= int(noEmptyStoreNum); i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/balance_leader.go b/tools/pd-simulator/simulator/cases/balance_leader.go index 5c78ccec2aa9..4de326bfa52b 100644 --- a/tools/pd-simulator/simulator/cases/balance_leader.go +++ b/tools/pd-simulator/simulator/cases/balance_leader.go @@ -30,11 +30,8 @@ func newBalanceLeader() *Case { for i := 1; i <= storeNum; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/redundant_balance_region.go b/tools/pd-simulator/simulator/cases/balance_region.go similarity index 82% rename from tools/pd-simulator/simulator/cases/redundant_balance_region.go rename to tools/pd-simulator/simulator/cases/balance_region.go index 9037f28e073f..15cc545f5558 100644 --- a/tools/pd-simulator/simulator/cases/redundant_balance_region.go +++ b/tools/pd-simulator/simulator/cases/balance_region.go @@ -29,29 +29,20 @@ func newRedundantBalanceRegion() *Case { var simCase Case storeNum := simutil.CaseConfigure.StoreNum - regionNum := simutil.CaseConfigure.RegionNum * storeNum / 3 + regionNum := simutil.CaseConfigure.RegionNum if storeNum == 0 || regionNum == 0 { storeNum, regionNum = 6, 4000 } for i := 0; i < storeNum; i++ { + s := &Store{ + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, + } if i%2 == 1 { - simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 980 * units.GiB, - Version: "2.1.0", - }) - } else { - simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 1 * units.TiB, - Version: "2.1.0", - }) + s.HasExtraUsedSpace = true } + simCase.Stores = append(simCase.Stores, s) } for i := 0; i < regionNum; i++ { diff --git a/tools/pd-simulator/simulator/cases/cases.go b/tools/pd-simulator/simulator/cases/cases.go index c944358f4ebc..dd1c92e8a2a3 100644 --- a/tools/pd-simulator/simulator/cases/cases.go +++ b/tools/pd-simulator/simulator/cases/cases.go @@ -25,14 +25,14 @@ import ( // Store is used to simulate tikv. type Store struct { - ID uint64 - Status metapb.StoreState - Labels []*metapb.StoreLabel - Capacity uint64 - Available uint64 - LeaderWeight float32 - RegionWeight float32 - Version string + ID uint64 + Status metapb.StoreState + Labels []*metapb.StoreLabel + Capacity uint64 + LeaderWeight float32 + RegionWeight float32 + Version string + HasExtraUsedSpace bool } // Region is used to simulate a region. diff --git a/tools/pd-simulator/simulator/cases/delete_nodes.go b/tools/pd-simulator/simulator/cases/delete_nodes.go index b94e55844f1a..bc2b7ba85dd1 100644 --- a/tools/pd-simulator/simulator/cases/delete_nodes.go +++ b/tools/pd-simulator/simulator/cases/delete_nodes.go @@ -32,11 +32,8 @@ func newDeleteNodes() *Case { noEmptyStoreNum := storeNum - 1 for i := 1; i <= storeNum; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/diagnose_label_isolation.go b/tools/pd-simulator/simulator/cases/diagnose_label_isolation.go index 7fde1984b04f..e237fb27d097 100644 --- a/tools/pd-simulator/simulator/cases/diagnose_label_isolation.go +++ b/tools/pd-simulator/simulator/cases/diagnose_label_isolation.go @@ -35,20 +35,14 @@ func newLabelNotMatch1() *Case { for i := 0; i < num1; i++ { id := IDAllocator.nextID() simCase.Stores = append(simCase.Stores, &Store{ - ID: id, - Status: metapb.StoreState_Up, - Capacity: 100 * units.GiB, - Available: 50 * units.GiB, - Version: "2.1.0", - Labels: []*metapb.StoreLabel{{Key: "host", Value: fmt.Sprintf("host%d", id)}}, + ID: id, + Status: metapb.StoreState_Up, + Labels: []*metapb.StoreLabel{{Key: "host", Value: fmt.Sprintf("host%d", id)}}, }) } simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 100 * units.GiB, - Available: 50 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) for i := 0; i < regionNum; i++ { @@ -103,23 +97,17 @@ func newLabelIsolation1() *Case { for i := 0; i < num1; i++ { id := IDAllocator.nextID() simCase.Stores = append(simCase.Stores, &Store{ - ID: id, - Status: metapb.StoreState_Up, - Capacity: 1000 * units.GiB, - Available: 500 * units.GiB, - Version: "2.1.0", - Labels: []*metapb.StoreLabel{{Key: "host", Value: fmt.Sprintf("host%d", id)}}, + ID: id, + Status: metapb.StoreState_Up, + Labels: []*metapb.StoreLabel{{Key: "host", Value: fmt.Sprintf("host%d", id)}}, }) } id := IDAllocator.GetID() + 1 for i := 0; i < num2; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1000 * units.GiB, - Available: 500 * units.GiB, - Version: "2.1.0", - Labels: []*metapb.StoreLabel{{Key: "host", Value: fmt.Sprintf("host%d", id)}}, + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, + Labels: []*metapb.StoreLabel{{Key: "host", Value: fmt.Sprintf("host%d", id)}}, }) } @@ -174,11 +162,8 @@ func newLabelIsolation2() *Case { for i := 0; i < storeNum; i++ { id := IDAllocator.nextID() simCase.Stores = append(simCase.Stores, &Store{ - ID: id, - Status: metapb.StoreState_Up, - Capacity: 100 * units.GiB, - Available: 50 * units.GiB, - Version: "2.1.0", + ID: id, + Status: metapb.StoreState_Up, }) } simCase.Stores[0].Labels = []*metapb.StoreLabel{{Key: "dc", Value: "dc1"}, {Key: "zone", Value: "zone1"}, {Key: "host", Value: "host1"}} diff --git a/tools/pd-simulator/simulator/cases/diagnose_rule.go b/tools/pd-simulator/simulator/cases/diagnose_rule.go index f5fd9f21e417..63831b56fd7c 100644 --- a/tools/pd-simulator/simulator/cases/diagnose_rule.go +++ b/tools/pd-simulator/simulator/cases/diagnose_rule.go @@ -66,11 +66,8 @@ func newRule1() *Case { for i := 0; i < storeNum; i++ { id := IDAllocator.nextID() simCase.Stores = append(simCase.Stores, &Store{ - ID: id, - Status: metapb.StoreState_Up, - Capacity: 1000 * units.GiB, - Available: 500 * units.GiB, - Version: "2.1.0", + ID: id, + Status: metapb.StoreState_Up, }) } simCase.Stores[0].Labels = []*metapb.StoreLabel{{Key: "region", Value: "region2"}, {Key: "idc", Value: "idc1"}} @@ -154,11 +151,8 @@ func newRule2() *Case { for i := 0; i < storeNum; i++ { id := IDAllocator.nextID() simCase.Stores = append(simCase.Stores, &Store{ - ID: id, - Status: metapb.StoreState_Up, - Capacity: 1000 * units.GiB, - Available: 500 * units.GiB, - Version: "2.1.0", + ID: id, + Status: metapb.StoreState_Up, }) } simCase.Stores[0].Labels = []*metapb.StoreLabel{{Key: "region", Value: "region1"}} diff --git a/tools/pd-simulator/simulator/cases/hot_read.go b/tools/pd-simulator/simulator/cases/hot_read.go index e226aaf13edf..659aaaded4d0 100644 --- a/tools/pd-simulator/simulator/cases/hot_read.go +++ b/tools/pd-simulator/simulator/cases/hot_read.go @@ -33,11 +33,8 @@ func newHotRead() *Case { // Initialize the cluster for i := 1; i <= storeNum; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/hot_write.go b/tools/pd-simulator/simulator/cases/hot_write.go index b74113a98c2c..645f647e26c6 100644 --- a/tools/pd-simulator/simulator/cases/hot_write.go +++ b/tools/pd-simulator/simulator/cases/hot_write.go @@ -32,11 +32,8 @@ func newHotWrite() *Case { // Initialize the cluster for i := 1; i <= storeNum; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/import_data.go b/tools/pd-simulator/simulator/cases/import_data.go index ce6ada832b3a..7b02e1e01230 100644 --- a/tools/pd-simulator/simulator/cases/import_data.go +++ b/tools/pd-simulator/simulator/cases/import_data.go @@ -36,11 +36,8 @@ func newImportData() *Case { // Initialize the cluster for i := 1; i <= 10; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/makeup_down_replica.go b/tools/pd-simulator/simulator/cases/makeup_down_replica.go index c952d9cc20a6..97796755e4db 100644 --- a/tools/pd-simulator/simulator/cases/makeup_down_replica.go +++ b/tools/pd-simulator/simulator/cases/makeup_down_replica.go @@ -29,11 +29,8 @@ func newMakeupDownReplicas() *Case { noEmptyStoreNum := storeNum - 1 for i := 1; i <= storeNum; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/region_merge.go b/tools/pd-simulator/simulator/cases/region_merge.go index d77a6fea18b5..09790cf25f9e 100644 --- a/tools/pd-simulator/simulator/cases/region_merge.go +++ b/tools/pd-simulator/simulator/cases/region_merge.go @@ -31,11 +31,8 @@ func newRegionMerge() *Case { storeNum, regionNum := getStoreNum(), getRegionNum() for i := 1; i <= storeNum; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: IDAllocator.nextID(), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: IDAllocator.nextID(), + Status: metapb.StoreState_Up, }) } diff --git a/tools/pd-simulator/simulator/cases/region_split.go b/tools/pd-simulator/simulator/cases/region_split.go index 8c241c647cf0..c365a4a73c53 100644 --- a/tools/pd-simulator/simulator/cases/region_split.go +++ b/tools/pd-simulator/simulator/cases/region_split.go @@ -29,11 +29,8 @@ func newRegionSplit() *Case { storeNum := getStoreNum() for i := 1; i <= storeNum; i++ { simCase.Stores = append(simCase.Stores, &Store{ - ID: uint64(i), - Status: metapb.StoreState_Up, - Capacity: 1 * units.TiB, - Available: 900 * units.GiB, - Version: "2.1.0", + ID: uint64(i), + Status: metapb.StoreState_Up, }) } peers := []*metapb.Peer{ diff --git a/tools/pd-simulator/simulator/config.go b/tools/pd-simulator/simulator/config.go index 0d6f38c1c5c0..2f39df00f53f 100644 --- a/tools/pd-simulator/simulator/config.go +++ b/tools/pd-simulator/simulator/config.go @@ -25,6 +25,7 @@ import ( "github.com/tikv/pd/pkg/typeutil" "github.com/tikv/pd/server/config" "github.com/tikv/pd/server/schedule/placement" + "github.com/tikv/pd/server/versioninfo" ) const ( @@ -32,12 +33,12 @@ const ( defaultSimTickInterval = 100 * time.Millisecond // store defaultStoreIOMBPerSecond = 40 - defaultStoreVersion = "2.1.0" defaultStoreHeartbeat = 10 * time.Second defaultRegionHeartbeat = 1 * time.Minute defaultRegionSplitKeys = 960000 defaultRegionSplitSize = 96 * units.MiB - defaultCapacity = 3000 * units.GiB + defaultCapacity = 1 * units.TiB + defaultExtraUsedSpace = 0 // server defaultLeaderLease = 3 defaultTSOSaveInterval = 200 * time.Millisecond @@ -63,7 +64,7 @@ type SimConfig struct { // RaftStore the configuration for raft store. type RaftStore struct { Capacity typeutil.ByteSize `toml:"capacity" json:"capacity"` - Available typeutil.ByteSize `toml:"available" json:"available"` + ExtraUsedSpace typeutil.ByteSize `toml:"extra-used-space" json:"extra-used-space"` RegionHeartBeatInterval typeutil.Duration `toml:"pd-heartbeat-tick-interval" json:"pd-heartbeat-tick-interval"` StoreHeartBeatInterval typeutil.Duration `toml:"pd-store-heartbeat-tick-interval" json:"pd-store-heartbeat-tick-interval"` } @@ -125,10 +126,11 @@ func adjustByteSize(v *typeutil.ByteSize, defValue typeutil.ByteSize) { func (sc *SimConfig) Adjust(meta *toml.MetaData) error { adjustDuration(&sc.SimTickInterval, defaultSimTickInterval) adjustInt64(&sc.StoreIOMBPerSecond, defaultStoreIOMBPerSecond) - adjustString(&sc.StoreVersion, defaultStoreVersion) + adjustString(&sc.StoreVersion, versioninfo.PDReleaseVersion) adjustDuration(&sc.RaftStore.RegionHeartBeatInterval, defaultRegionHeartbeat) adjustDuration(&sc.RaftStore.StoreHeartBeatInterval, defaultStoreHeartbeat) adjustByteSize(&sc.RaftStore.Capacity, defaultCapacity) + adjustByteSize(&sc.RaftStore.ExtraUsedSpace, defaultExtraUsedSpace) adjustUint64(&sc.Coprocessor.RegionSplitKey, defaultRegionSplitKeys) adjustByteSize(&sc.Coprocessor.RegionSplitSize, defaultRegionSplitSize) diff --git a/tools/pd-simulator/simulator/drive.go b/tools/pd-simulator/simulator/drive.go index 312b4f6a3d4b..6968520c1bd9 100644 --- a/tools/pd-simulator/simulator/drive.go +++ b/tools/pd-simulator/simulator/drive.go @@ -74,6 +74,8 @@ func (d *Driver) Prepare() error { d.raftEngine = NewRaftEngine(d.simCase, d.conn, d.simConfig) d.eventRunner = NewEventRunner(d.simCase.Events, d.raftEngine) + d.updateNodeAvailable() + // Bootstrap. store, region, err := d.GetBootstrapInfo(d.raftEngine) if err != nil { @@ -218,3 +220,13 @@ func (d *Driver) GetBootstrapInfo(r *RaftEngine) (*metapb.Store, *metapb.Region, } return store.Store, region.GetMeta(), nil } + +func (d *Driver) updateNodeAvailable() { + for storeID, n := range d.conn.Nodes { + if n.hasExtraUsedSpace { + n.stats.StoreStats.Available = n.stats.StoreStats.Capacity - uint64(d.raftEngine.regionsInfo.GetStoreRegionSize(storeID)) - uint64(d.simConfig.RaftStore.ExtraUsedSpace) + } else { + n.stats.StoreStats.Available = n.stats.StoreStats.Capacity - uint64(d.raftEngine.regionsInfo.GetStoreRegionSize(storeID)) + } + } +} diff --git a/tools/pd-simulator/simulator/event.go b/tools/pd-simulator/simulator/event.go index 96e71d647767..3433a57fa649 100644 --- a/tools/pd-simulator/simulator/event.go +++ b/tools/pd-simulator/simulator/event.go @@ -145,11 +145,10 @@ func (e *AddNodes) Run(raft *RaftEngine, tickCount int64) bool { config := raft.storeConfig s := &cases.Store{ - ID: id, - Status: metapb.StoreState_Up, - Capacity: uint64(config.RaftStore.Capacity), - Available: uint64(config.RaftStore.Available), - Version: config.StoreVersion, + ID: id, + Status: metapb.StoreState_Up, + Capacity: uint64(config.RaftStore.Capacity), + Version: config.StoreVersion, } n, err := NewNode(s, raft.conn.pdAddr, config) if err != nil { diff --git a/tools/pd-simulator/simulator/node.go b/tools/pd-simulator/simulator/node.go index 7cf84c4e9417..cd76d80b3c47 100644 --- a/tools/pd-simulator/simulator/node.go +++ b/tools/pd-simulator/simulator/node.go @@ -51,6 +51,7 @@ type Node struct { raftEngine *RaftEngine limiter *ratelimit.RateLimiter sizeMutex sync.Mutex + hasExtraUsedSpace bool } // NewNode returns a Node. @@ -59,7 +60,7 @@ func NewNode(s *cases.Store, pdAddr string, config *SimConfig) (*Node, error) { store := &metapb.Store{ Id: s.ID, Address: fmt.Sprintf("mock:://tikv-%d", s.ID), - Version: s.Version, + Version: config.StoreVersion, Labels: s.Labels, State: s.Status, } @@ -67,7 +68,6 @@ func NewNode(s *cases.Store, pdAddr string, config *SimConfig) (*Node, error) { StoreStats: pdpb.StoreStats{ StoreId: s.ID, Capacity: uint64(config.RaftStore.Capacity), - Available: uint64(config.RaftStore.Available), StartTime: uint32(time.Now().Unix()), }, } @@ -103,6 +103,7 @@ func NewNode(s *cases.Store, pdAddr string, config *SimConfig) (*Node, error) { receiveRegionHeartbeatCh: receiveRegionHeartbeatCh, limiter: ratelimit.NewRateLimiter(float64(speed), int(speed)), tick: uint64(rand.Intn(storeHeartBeatPeriod)), + hasExtraUsedSpace: s.HasExtraUsedSpace, }, nil } From 8690b96e5e6ac4a137b567e8c8990bbf6386f142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B7=B7=E6=B2=8CDM?= Date: Tue, 15 Nov 2022 10:57:55 +0800 Subject: [PATCH 48/67] simulator: fix some logic bugs (#5703) ref tikv/pd#5323, ref tikv/pd#5468, ref tikv/pd#5609 Signed-off-by: HunDunDM --- tools/pd-simulator/simulator/task.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/pd-simulator/simulator/task.go b/tools/pd-simulator/simulator/task.go index 3ad9c0af6003..14919d2768c4 100644 --- a/tools/pd-simulator/simulator/task.go +++ b/tools/pd-simulator/simulator/task.go @@ -138,7 +138,7 @@ func changePeerToOperator(region *core.RegionInfo, cp *pdpb.ChangePeer) (operato peer := cp.GetPeer() switch cp.GetChangeType() { case eraftpb.ConfChangeType_AddNode: - if region.GetPeer(peer.GetId()) != nil { + if region.GetStoreLearner(peer.GetStoreId()) != nil { return &promoteLearner{peer: peer}, fmt.Sprintf("promote learner %+v for region %d", peer, regionID) } return &addPeer{ @@ -149,7 +149,7 @@ func changePeerToOperator(region *core.RegionInfo, cp *pdpb.ChangePeer) (operato receivingStat: newSnapshotState(region.GetApproximateSize(), receive), }, fmt.Sprintf("add voter %+v for region %d", peer, regionID) case eraftpb.ConfChangeType_AddLearnerNode: - if region.GetPeer(peer.GetId()) != nil { + if region.GetStoreVoter(peer.GetStoreId()) != nil { return &demoteVoter{peer: peer}, fmt.Sprintf("demote voter %+v for region %d", peer, regionID) } return &addPeer{ @@ -211,6 +211,7 @@ func (t *Task) Step(engine *RaftEngine) (isFinished bool) { newRegion, t.isFinished = t.tick(engine, region) if newRegion != nil { + t.epoch = newRegion.GetRegionEpoch() engine.SetRegion(newRegion) engine.recordRegionChange(newRegion) } @@ -374,7 +375,7 @@ func (cl *changePeerV2Leave) tick(engine *RaftEngine, region *core.RegionInfo) ( case metapb.PeerRole_IncomingVoter: opts = append(opts, checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_IncomingVoter, metapb.PeerRole_Voter)...) case metapb.PeerRole_DemotingVoter: - opts = append(opts, checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_IncomingVoter, metapb.PeerRole_Voter)...) + opts = append(opts, checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_DemotingVoter, metapb.PeerRole_Learner)...) } } if len(opts) < 4 { @@ -529,7 +530,7 @@ func processSnapshot(n *Node, stat *snapshotStat) bool { // store should Generate/Receive snapshot by chunk size. // todo: the process of snapshot is single thread, the later snapshot task must wait the first one. - for n.limiter.AllowN(int(chunkSize)) { + for stat.remainSize > 0 && n.limiter.AllowN(chunkSize) { stat.remainSize -= chunkSize } From 98037f6f2531cec08e24d668c9248f02ca03df3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B7=B7=E6=B2=8CDM?= Date: Tue, 15 Nov 2022 11:09:54 +0800 Subject: [PATCH 49/67] statistics: fix wrong is-learner (#5704) ref tikv/pd#4399 Signed-off-by: HunDunDM Co-authored-by: Ti Chi Robot --- server/statistics/hot_peer_cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/statistics/hot_peer_cache.go b/server/statistics/hot_peer_cache.go index f0412c431b4b..9671935f1f7f 100644 --- a/server/statistics/hot_peer_cache.go +++ b/server/statistics/hot_peer_cache.go @@ -185,7 +185,7 @@ func (f *hotPeerCache) checkPeerFlow(peer *core.PeerInfo, region *core.RegionInf Loads: f.kind.GetLoadRatesFromPeer(peer), LastUpdateTime: time.Now(), isLeader: region.GetLeader().GetStoreId() == storeID, - isLearner: core.IsLearner(region.GetPeer(storeID)), + isLearner: region.GetStoreLearner(storeID) != nil, interval: interval, peers: region.GetPeers(), actionType: Update, From 0e0f544a48bf79e75cd32c325f9e66aa11f8a625 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 15 Nov 2022 17:43:55 +0800 Subject: [PATCH 50/67] *: move update store to another place (#5652) ref tikv/pd#5648 Signed-off-by: Ryan Leung --- server/api/trend_test.go | 1 + server/cluster/cluster.go | 46 ++++++++++++++++++---------- server/cluster/cluster_test.go | 21 ++++++++----- server/cluster/metrics.go | 9 ++++++ tests/server/cluster/cluster_test.go | 2 ++ 5 files changed, 55 insertions(+), 24 deletions(-) diff --git a/server/api/trend_test.go b/server/api/trend_test.go index cf74a187f87b..6f32c1584420 100644 --- a/server/api/trend_test.go +++ b/server/api/trend_test.go @@ -73,6 +73,7 @@ func TestTrend(t *testing.T) { mustRegionHeartbeat(re, svr, region6) region6 = region6.Clone(core.WithRole(newPeerID, metapb.PeerRole_Voter), core.WithLeader(region6.GetStorePeer(2)), core.WithRemoveStorePeer(1), core.WithIncConfVer()) mustRegionHeartbeat(re, svr, region6) + time.Sleep(50 * time.Millisecond) var trend Trend err = tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s%s/api/v1/trend", svr.GetAddr(), apiPrefix), &trend) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 091af969af64..21926540f24d 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -75,6 +75,7 @@ const ( nodeStateCheckJobInterval = 10 * time.Second // metricsCollectionJobInterval is the interval to run metrics collection job. metricsCollectionJobInterval = 10 * time.Second + updateStoreStatsInterval = 9 * time.Millisecond clientTimeout = 3 * time.Second defaultChangedRegionsLimit = 10000 // persistLimitRetryTimes is used to reduce the probability of the persistent error @@ -279,7 +280,7 @@ func (c *RaftCluster) Start(s Server) error { log.Error("load external timestamp meets error", zap.Error(err)) } - c.wg.Add(8) + c.wg.Add(9) go c.runCoordinator() go c.runMetricsCollectionJob() go c.runNodeStateCheckJob() @@ -288,6 +289,7 @@ func (c *RaftCluster) Start(s Server) error { go c.runReplicationMode() go c.runMinResolvedTSJob() go c.runSyncConfig() + go c.runUpdateStoreStats() c.running = true return nil @@ -443,6 +445,33 @@ func (c *RaftCluster) runStatsBackgroundJobs() { } } +func (c *RaftCluster) runUpdateStoreStats() { + defer logutil.LogPanic() + defer c.wg.Done() + + ticker := time.NewTicker(updateStoreStatsInterval) + defer ticker.Stop() + + for { + select { + case <-c.ctx.Done(): + log.Info("update store stats background jobs has been stopped") + return + case <-ticker.C: + // Update related stores. + start := time.Now() + stores := c.GetStores() + for _, store := range stores { + if store.IsRemoved() { + continue + } + c.core.UpdateStoreStatus(store.GetID()) + } + updateStoreStatsGauge.Set(time.Since(start).Seconds()) + } + } +} + func (c *RaftCluster) runCoordinator() { defer logutil.LogPanic() defer c.wg.Done() @@ -845,21 +874,6 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { } c.labelLevelStats.ClearDefunctRegion(item.GetID()) } - - // Update related stores. - storeMap := make(map[uint64]struct{}) - for _, p := range region.GetPeers() { - storeMap[p.GetStoreId()] = struct{}{} - } - if origin != nil { - for _, p := range origin.GetPeers() { - storeMap[p.GetStoreId()] = struct{}{} - } - } - for key := range storeMap { - c.core.UpdateStoreStatus(key) - } - regionUpdateCacheEventCounter.Inc() } diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index fb607d6e4e15..fe922a75236f 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -696,9 +696,9 @@ func TestRegionHeartbeat(t *testing.T) { re.NoError(err) cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) cluster.coordinator = newCoordinator(ctx, cluster, nil) - n, np := uint64(3), uint64(3) - + cluster.wg.Add(1) + go cluster.runUpdateStoreStats() stores := newTestStores(3, "2.0.0") regions := newTestRegions(n, n, np) @@ -856,11 +856,12 @@ func TestRegionHeartbeat(t *testing.T) { } } - for _, store := range cluster.core.Stores.GetStores() { - re.Equal(cluster.core.Regions.RegionsInfo.GetStoreLeaderCount(store.GetID()), store.GetLeaderCount()) - re.Equal(cluster.core.Regions.RegionsInfo.GetStoreRegionCount(store.GetID()), store.GetRegionCount()) - re.Equal(cluster.core.Regions.RegionsInfo.GetStoreLeaderRegionSize(store.GetID()), store.GetLeaderSize()) - re.Equal(cluster.core.Regions.RegionsInfo.GetStoreRegionSize(store.GetID()), store.GetRegionSize()) + time.Sleep(50 * time.Millisecond) + for _, store := range cluster.GetStores() { + re.Equal(cluster.core.GetStoreLeaderCount(store.GetID()), store.GetLeaderCount()) + re.Equal(cluster.core.GetStoreRegionCount(store.GetID()), store.GetRegionCount()) + re.Equal(cluster.core.GetStoreLeaderRegionSize(store.GetID()), store.GetLeaderSize()) + re.Equal(cluster.core.GetStoreRegionSize(store.GetID()), store.GetRegionSize()) } // Test with storage. @@ -1329,6 +1330,8 @@ func TestUpdateStorePendingPeerCount(t *testing.T) { for _, s := range stores { re.NoError(tc.putStoreLocked(s)) } + tc.RaftCluster.wg.Add(1) + go tc.RaftCluster.runUpdateStoreStats() peers := []*metapb.Peer{ { Id: 2, @@ -1349,9 +1352,11 @@ func TestUpdateStorePendingPeerCount(t *testing.T) { } origin := core.NewRegionInfo(&metapb.Region{Id: 1, Peers: peers[:3]}, peers[0], core.WithPendingPeers(peers[1:3])) re.NoError(tc.processRegionHeartbeat(origin)) + time.Sleep(50 * time.Millisecond) checkPendingPeerCount([]int{0, 1, 1, 0}, tc.RaftCluster, re) newRegion := core.NewRegionInfo(&metapb.Region{Id: 1, Peers: peers[1:]}, peers[1], core.WithPendingPeers(peers[3:4])) re.NoError(tc.processRegionHeartbeat(newRegion)) + time.Sleep(50 * time.Millisecond) checkPendingPeerCount([]int{0, 0, 0, 1}, tc.RaftCluster, re) } @@ -1947,7 +1952,7 @@ func checkRegions(re *require.Assertions, cache *core.RegionsInfo, regions []*co func checkPendingPeerCount(expect []int, cluster *RaftCluster, re *require.Assertions) { for i, e := range expect { - s := cluster.core.Stores.GetStore(uint64(i + 1)) + s := cluster.GetStore(uint64(i + 1)) re.Equal(e, s.GetPendingPeerCount()) } } diff --git a/server/cluster/metrics.go b/server/cluster/metrics.go index 8ebafcab46f4..8c0bceb94caa 100644 --- a/server/cluster/metrics.go +++ b/server/cluster/metrics.go @@ -73,6 +73,14 @@ var ( Help: "Time spent of patrol checks region.", }) + updateStoreStatsGauge = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "cluster", + Name: "update_stores_stats_time", + Help: "Time spent of updating store stats.", + }) + clusterStateCPUGauge = prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: "pd", @@ -143,4 +151,5 @@ func init() { prometheus.MustRegister(storesSpeedGauge) prometheus.MustRegister(storesETAGauge) prometheus.MustRegister(storeSyncConfigEvent) + prometheus.MustRegister(updateStoreStatsGauge) } diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index f92eb716f484..a2b20fdd790f 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -1249,6 +1249,8 @@ func putRegionWithLeader(re *require.Assertions, rc *cluster.RaftCluster, id id. } rc.HandleRegionHeartbeat(core.NewRegionInfo(region, region.Peers[0])) } + + time.Sleep(50 * time.Millisecond) re.Equal(3, rc.GetStore(storeID).GetLeaderCount()) } From 7f2e8ad1c5022f5cd1ac47fa040ea4f157271741 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Wed, 16 Nov 2022 18:01:56 +0800 Subject: [PATCH 51/67] tools: support using tiup playground to bench region heartbeat (#5688) close tikv/pd#5690 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- tools/pd-heartbeat-bench/config-template.toml | 14 + tools/pd-heartbeat-bench/config/config.go | 172 ++++++++ tools/pd-heartbeat-bench/main.go | 388 +++++++++++------- 3 files changed, 432 insertions(+), 142 deletions(-) create mode 100644 tools/pd-heartbeat-bench/config-template.toml create mode 100644 tools/pd-heartbeat-bench/config/config.go diff --git a/tools/pd-heartbeat-bench/config-template.toml b/tools/pd-heartbeat-bench/config-template.toml new file mode 100644 index 000000000000..3838cb9612b1 --- /dev/null +++ b/tools/pd-heartbeat-bench/config-template.toml @@ -0,0 +1,14 @@ +round = 5 + +store-count = 100 +region-count = 2000000 + +key-length = 56 +replica = 3 + +leader-update-ratio = 0.06 +epoch-update-ratio = 0.04 +space-update-ratio = 0.15 +flow-update-ratio = 0.35 + +sample = false diff --git a/tools/pd-heartbeat-bench/config/config.go b/tools/pd-heartbeat-bench/config/config.go new file mode 100644 index 000000000000..5d59bb529745 --- /dev/null +++ b/tools/pd-heartbeat-bench/config/config.go @@ -0,0 +1,172 @@ +package config + +import ( + "github.com/BurntSushi/toml" + "github.com/pingcap/errors" + "github.com/pingcap/log" + flag "github.com/spf13/pflag" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +const ( + defaultStoreCount = 50 + defaultRegionCount = 1000000 + defaultKeyLength = 56 + defaultReplica = 3 + defaultLeaderUpdateRatio = 0.06 + defaultEpochUpdateRatio = 0.04 + defaultSpaceUpdateRatio = 0.15 + defaultFlowUpdateRatio = 0.35 + defaultRound = 0 + defaultSample = false + + defaultLogFormat = "text" +) + +// Config is the heartbeat-bench configuration. +type Config struct { + flagSet *flag.FlagSet + configFile string + PDAddr string + StatusAddr string + + Log log.Config `toml:"log" json:"log"` + logger *zap.Logger + logProps *log.ZapProperties + + StoreCount int `toml:"store-count" json:"store-count"` + RegionCount int `toml:"region-count" json:"region-count"` + KeyLength int `toml:"key-length" json:"key-length"` + Replica int `toml:"replica" json:"replica"` + LeaderUpdateRatio float64 `toml:"leader-update-ratio" json:"leader-update-ratio"` + EpochUpdateRatio float64 `toml:"epoch-update-ratio" json:"epoch-update-ratio"` + SpaceUpdateRatio float64 `toml:"space-update-ratio" json:"space-update-ratio"` + FlowUpdateRatio float64 `toml:"flow-update-ratio" json:"flow-update-ratio"` + Sample bool `toml:"sample" json:"sample"` + Round int `toml:"round" json:"round"` +} + +// NewConfig return a set of settings. +func NewConfig() *Config { + cfg := &Config{} + cfg.flagSet = flag.NewFlagSet("heartbeat-bench", flag.ContinueOnError) + fs := cfg.flagSet + fs.ParseErrorsWhitelist.UnknownFlags = true + fs.StringVar(&cfg.configFile, "config", "", "config file") + fs.StringVar(&cfg.PDAddr, "pd", "http://127.0.0.1:2379", "pd address") + fs.StringVar(&cfg.StatusAddr, "status-addr", "http://127.0.0.1:20180", "status address") + + return cfg +} + +// Parse parses flag definitions from the argument list. +func (c *Config) Parse(arguments []string) error { + // Parse first to get config file. + err := c.flagSet.Parse(arguments) + if err != nil { + return errors.WithStack(err) + } + + // Load config file if specified. + var meta *toml.MetaData + if c.configFile != "" { + meta, err = c.configFromFile(c.configFile) + if err != nil { + return err + } + } + + // Parse again to replace with command line options. + err = c.flagSet.Parse(arguments) + if err != nil { + return errors.WithStack(err) + } + + if len(c.flagSet.Args()) != 0 { + return errors.Errorf("'%s' is an invalid flag", c.flagSet.Arg(0)) + } + + c.Adjust(meta) + return nil +} + +// Adjust is used to adjust configurations +func (c *Config) Adjust(meta *toml.MetaData) { + if len(c.Log.Format) == 0 { + c.Log.Format = defaultLogFormat + } + if !meta.IsDefined("round") { + adjustInt(&c.Round, defaultRound) + } + + if !meta.IsDefined("store-count") { + adjustInt(&c.StoreCount, defaultStoreCount) + } + if !meta.IsDefined("region-count") { + adjustInt(&c.RegionCount, defaultRegionCount) + } + + if !meta.IsDefined("key-length") { + adjustInt(&c.KeyLength, defaultKeyLength) + } + + if !meta.IsDefined("replica") { + adjustInt(&c.Replica, defaultReplica) + } + + if !meta.IsDefined("leader-update-ratio") { + adjustFloat64(&c.LeaderUpdateRatio, defaultLeaderUpdateRatio) + } + if !meta.IsDefined("epoch-update-ratio") { + adjustFloat64(&c.EpochUpdateRatio, defaultEpochUpdateRatio) + } + if !meta.IsDefined("space-update-ratio") { + adjustFloat64(&c.SpaceUpdateRatio, defaultSpaceUpdateRatio) + } + if !meta.IsDefined("flow-update-ratio") { + adjustFloat64(&c.FlowUpdateRatio, defaultFlowUpdateRatio) + } + if !meta.IsDefined("sample") { + c.Sample = defaultSample + } +} + +// configFromFile loads config from file. +func (c *Config) configFromFile(path string) (*toml.MetaData, error) { + meta, err := toml.DecodeFile(path, c) + return &meta, err +} + +// SetupLogger setup the logger. +func (c *Config) SetupLogger() error { + lg, p, err := log.InitLogger(&c.Log, zap.AddStacktrace(zapcore.FatalLevel)) + if err != nil { + return err + } + c.logger = lg + c.logProps = p + return nil +} + +// GetZapLogger gets the created zap logger. +func (c *Config) GetZapLogger() *zap.Logger { + return c.logger +} + +// GetZapLogProperties gets properties of the zap logger. +func (c *Config) GetZapLogProperties() *log.ZapProperties { + return c.logProps +} + +func adjustFloat64(v *float64, defValue float64) { + if *v == 0 { + *v = defValue + } +} + +func adjustInt(v *int, defValue int) { + if *v == 0 { + *v = defValue + } +} diff --git a/tools/pd-heartbeat-bench/main.go b/tools/pd-heartbeat-bench/main.go index 1ae8505e864c..1fd6572e596f 100644 --- a/tools/pd-heartbeat-bench/main.go +++ b/tools/pd-heartbeat-bench/main.go @@ -16,32 +16,29 @@ package main import ( "context" - "flag" "fmt" - "log" + "io" "math/rand" + "os" + "os/signal" + "strings" + "sync" + "syscall" "time" + "github.com/docker/go-units" + "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/log" + "github.com/spf13/pflag" + "github.com/tikv/pd/pkg/logutil" + "github.com/tikv/pd/tools/pd-heartbeat-bench/config" "go.etcd.io/etcd/pkg/report" + "go.uber.org/zap" "google.golang.org/grpc" ) -var ( - pdAddr = flag.String("pd", "127.0.0.1:2379", "pd address") - storeCount = flag.Int("store", 40, "store count") - regionCount = flag.Int("region", 1000000, "region count") - keyLen = flag.Int("key-len", 56, "key length") - replica = flag.Int("replica", 3, "replica count") - leaderUpdateRatio = flag.Float64("leader", 0.06, "ratio of the region leader need to update, they need save-tree") - epochUpdateRatio = flag.Float64("epoch", 0.04, "ratio of the region epoch need to update, they need save-kv") - spaceUpdateRatio = flag.Float64("space", 0.15, "ratio of the region space need to update") - flowUpdateRatio = flag.Float64("flow", 0.35, "ratio of the region flow need to update") - sample = flag.Bool("sample", false, "sample per second") - heartbeatRounds = flag.Int("heartbeat-rounds", 4, "total rounds of heartbeat") -) - const ( bytesUnit = 1 << 23 // 8MB keysUint = 1 << 13 // 8K @@ -50,32 +47,33 @@ const ( var clusterID uint64 -func newClient() pdpb.PDClient { - cc, err := grpc.Dial(*pdAddr, grpc.WithInsecure()) - if err != nil { - log.Fatal(err) - } - return pdpb.NewPDClient(cc) +func trimHTTPPrefix(str string) string { + str = strings.TrimPrefix(str, "http://") + str = strings.TrimPrefix(str, "https://") + return str } -func newReport() report.Report { - p := "%4.4f" - if *sample { - return report.NewReportSample(p) +func newClient(cfg *config.Config) pdpb.PDClient { + addr := trimHTTPPrefix(cfg.PDAddr) + cc, err := grpc.Dial(addr, grpc.WithInsecure()) + if err != nil { + log.Fatal("failed to create gRPC connection", zap.Error(err)) } - return report.NewReport(p) + return pdpb.NewPDClient(cc) } -func initClusterID(cli pdpb.PDClient) { - res, err := cli.GetMembers(context.TODO(), &pdpb.GetMembersRequest{}) +func initClusterID(ctx context.Context, cli pdpb.PDClient) { + cctx, cancel := context.WithCancel(ctx) + res, err := cli.GetMembers(cctx, &pdpb.GetMembersRequest{}) + cancel() if err != nil { - log.Fatal(err) + log.Fatal("failed to get members", zap.Error(err)) } if res.GetHeader().GetError() != nil { - log.Fatal(res.GetHeader().GetError()) + log.Fatal("failed to get members", zap.String("err", res.GetHeader().GetError().String())) } clusterID = res.GetHeader().GetClusterId() - log.Println("ClusterID:", clusterID) + log.Info("init cluster ID successfully", zap.Uint64("cluster-id", clusterID)) } func header() *pdpb.RequestHeader { @@ -84,19 +82,22 @@ func header() *pdpb.RequestHeader { } } -func bootstrap(cli pdpb.PDClient) { - isBootstrapped, err := cli.IsBootstrapped(context.TODO(), &pdpb.IsBootstrappedRequest{Header: header()}) +func bootstrap(ctx context.Context, cli pdpb.PDClient) { + cctx, cancel := context.WithCancel(ctx) + isBootstrapped, err := cli.IsBootstrapped(cctx, &pdpb.IsBootstrappedRequest{Header: header()}) + cancel() if err != nil { - log.Fatal(err) + log.Fatal("check if cluster has already bootstrapped failed", zap.Error(err)) } if isBootstrapped.GetBootstrapped() { - log.Println("already bootstrapped") + log.Info("already bootstrapped") return } store := &metapb.Store{ Id: 1, - Address: fmt.Sprintf("localhost:%d", 1), + Address: fmt.Sprintf("localhost:%d", 2), + Version: "6.4.0-alpha", } region := &metapb.Region{ Id: 1, @@ -108,40 +109,63 @@ func bootstrap(cli pdpb.PDClient) { Store: store, Region: region, } - resp, err := cli.Bootstrap(context.TODO(), req) + cctx, cancel = context.WithCancel(ctx) + resp, err := cli.Bootstrap(cctx, req) + cancel() if err != nil { - log.Fatal(err) + log.Fatal("failed to bootstrap the cluster", zap.Error(err)) } if resp.GetHeader().GetError() != nil { - log.Fatalf("bootstrap failed: %s", resp.GetHeader().GetError().String()) + log.Fatal("failed to bootstrap the cluster", zap.String("err", resp.GetHeader().GetError().String())) } - log.Println("bootstrapped") + log.Info("bootstrapped") } -func putStores(cli pdpb.PDClient) { - for i := uint64(1); i <= uint64(*storeCount); i++ { +func putStores(ctx context.Context, cfg *config.Config, cli pdpb.PDClient) { + for i := uint64(1); i <= uint64(cfg.StoreCount); i++ { store := &metapb.Store{ Id: i, Address: fmt.Sprintf("localhost:%d", i), + Version: "6.4.0-alpha", } - resp, err := cli.PutStore(context.TODO(), &pdpb.PutStoreRequest{Header: header(), Store: store}) + cctx, cancel := context.WithCancel(ctx) + resp, err := cli.PutStore(cctx, &pdpb.PutStoreRequest{Header: header(), Store: store}) + cancel() if err != nil { - log.Fatal(err) + log.Fatal("failed to put store", zap.Uint64("store-id", i), zap.Error(err)) } if resp.GetHeader().GetError() != nil { - log.Fatalf("put store failed: %s", resp.GetHeader().GetError().String()) + log.Fatal("failed to put store", zap.Uint64("store-id", i), zap.String("err", resp.GetHeader().GetError().String())) } + go func(ctx context.Context, storeID uint64) { + var heartbeatTicker = time.NewTicker(10 * time.Second) + defer heartbeatTicker.Stop() + for { + select { + case <-heartbeatTicker.C: + cctx, cancel := context.WithCancel(ctx) + cli.StoreHeartbeat(cctx, &pdpb.StoreHeartbeatRequest{Header: header(), Stats: &pdpb.StoreStats{ + StoreId: storeID, + Capacity: 2 * units.TiB, + Available: 1.5 * units.TiB, + }}) + cancel() + case <-ctx.Done(): + return + } + } + }(ctx, i) } } -func newStartKey(id uint64) []byte { - k := make([]byte, *keyLen) +func newStartKey(id uint64, keyLen int) []byte { + k := make([]byte, keyLen) copy(k, fmt.Sprintf("%010d", id)) return k } -func newEndKey(id uint64) []byte { - k := newStartKey(id) +func newEndKey(id uint64, keyLen int) []byte { + k := newStartKey(id, keyLen) k[len(k)-1]++ return k } @@ -158,21 +182,22 @@ type Regions struct { updateFlow []int } -func (rs *Regions) init() { - rs.regions = make([]*pdpb.RegionHeartbeatRequest, 0, *regionCount) +func (rs *Regions) init(cfg *config.Config) { + rs.regions = make([]*pdpb.RegionHeartbeatRequest, 0, cfg.RegionCount) rs.updateRound = 0 // Generate regions id := uint64(1) now := uint64(time.Now().Unix()) - for i := 0; i < *regionCount; i++ { + keyLen := cfg.KeyLength + for i := 0; i < cfg.RegionCount; i++ { region := &pdpb.RegionHeartbeatRequest{ Header: header(), Region: &metapb.Region{ Id: id, - StartKey: newStartKey(id), - EndKey: newEndKey(id), + StartKey: newStartKey(id, keyLen), + EndKey: newEndKey(id, keyLen), RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: 1}, }, ApproximateSize: bytesUnit, @@ -185,9 +210,9 @@ func (rs *Regions) init() { } id += 1 - peers := make([]*metapb.Peer, 0, *replica) - for j := 0; j < *replica; j++ { - peers = append(peers, &metapb.Peer{Id: id, StoreId: uint64((i+j)%*storeCount + 1)}) + peers := make([]*metapb.Peer, 0, cfg.Replica) + for j := 0; j < cfg.Replica; j++ { + peers = append(peers, &metapb.Peer{Id: id, StoreId: uint64((i+j)%cfg.StoreCount + 1)}) id += 1 } @@ -197,32 +222,32 @@ func (rs *Regions) init() { } // Generate sample index - slice := make([]int, *regionCount) + slice := make([]int, cfg.RegionCount) for i := range slice { slice[i] = i } rand.Seed(0) // Ensure consistent behavior multiple times pick := func(ratio float64) []int { - rand.Shuffle(*regionCount, func(i, j int) { + rand.Shuffle(cfg.RegionCount, func(i, j int) { slice[i], slice[j] = slice[j], slice[i] }) - return append(slice[:0:0], slice[0:int(float64(*regionCount)*ratio)]...) + return append(slice[:0:0], slice[0:int(float64(cfg.RegionCount)*ratio)]...) } - rs.updateLeader = pick(*leaderUpdateRatio) - rs.updateEpoch = pick(*epochUpdateRatio) - rs.updateSpace = pick(*spaceUpdateRatio) - rs.updateFlow = pick(*flowUpdateRatio) + rs.updateLeader = pick(cfg.LeaderUpdateRatio) + rs.updateEpoch = pick(cfg.EpochUpdateRatio) + rs.updateSpace = pick(cfg.SpaceUpdateRatio) + rs.updateFlow = pick(cfg.FlowUpdateRatio) } -func (rs *Regions) update() { +func (rs *Regions) update(replica int) { rs.updateRound += 1 // update leader for _, i := range rs.updateLeader { region := rs.regions[i] - region.Leader = region.Region.Peers[rs.updateRound%*replica] + region.Leader = region.Region.Peers[rs.updateRound%replica] } // update epoch for _, i := range rs.updateEpoch { @@ -250,37 +275,163 @@ func (rs *Regions) update() { } } -func (rs *Regions) send(storeID uint64, startNotifier chan report.Report, endNotifier chan struct{}) { - cli := newClient() - stream, err := cli.RegionHeartbeat(context.TODO()) +func createHeartbeatStream(ctx context.Context, cfg *config.Config) pdpb.PD_RegionHeartbeatClient { + cli := newClient(cfg) + stream, err := cli.RegionHeartbeat(ctx) if err != nil { - log.Fatal(err) + log.Fatal("create stream error", zap.Error(err)) } - for r := range startNotifier { - startTime := time.Now() - count := 0 - for _, region := range rs.regions { - if region.Leader.StoreId != storeID { - continue - } - count += 1 - reqStart := time.Now() - err = stream.Send(region) - r.Results() <- report.Result{Start: reqStart, End: time.Now(), Err: err} + go func() { + // do nothing + for { + stream.Recv() + } + }() + return stream +} + +func (rs *Regions) handleRegionHeartbeat(wg *sync.WaitGroup, stream pdpb.PD_RegionHeartbeatClient, storeID uint64, rep report.Report) { + defer wg.Done() + var regions []*pdpb.RegionHeartbeatRequest + for _, region := range rs.regions { + if region.Leader.StoreId != storeID { + continue + } + regions = append(regions, region) + } + + start := time.Now() + var err error + for _, region := range regions { + err = stream.Send(region) + rep.Results() <- report.Result{Start: start, End: time.Now(), Err: err} + if err == io.EOF { + log.Error("receive eof error", zap.Uint64("store-id", storeID), zap.Error(err)) + err := stream.CloseSend() if err != nil { - log.Fatal(err) + log.Error("fail to close stream", zap.Uint64("store-id", storeID), zap.Error(err)) } + return + } + if err != nil { + log.Error("send result error", zap.Uint64("store-id", storeID), zap.Error(err)) + return } - log.Printf("store %v finish heartbeat, count: %v, cost time: %v", storeID, count, time.Since(startTime)) - endNotifier <- struct{}{} } + log.Info("store finish one round region heartbeat", zap.Uint64("store-id", storeID), zap.Duration("cost-time", time.Since(start))) } -func (rs *Regions) result(sec float64) string { +func main() { + cfg := config.NewConfig() + err := cfg.Parse(os.Args[1:]) + defer logutil.LogPanic() + + switch errors.Cause(err) { + case nil: + case pflag.ErrHelp: + exit(0) + default: + log.Fatal("parse cmd flags error", zap.Error(err)) + } + + // New zap logger + err = cfg.SetupLogger() + if err == nil { + log.ReplaceGlobals(cfg.GetZapLogger(), cfg.GetZapLogProperties()) + } else { + log.Fatal("initialize logger error", zap.Error(err)) + } + + // let PD have enough time to start + time.Sleep(5 * time.Second) + ctx, cancel := context.WithCancel(context.Background()) + sc := make(chan os.Signal, 1) + signal.Notify(sc, + syscall.SIGHUP, + syscall.SIGINT, + syscall.SIGTERM, + syscall.SIGQUIT) + + var sig os.Signal + go func() { + sig = <-sc + cancel() + }() + cli := newClient(cfg) + initClusterID(ctx, cli) + bootstrap(ctx, cli) + putStores(ctx, cfg, cli) + log.Info("finish put stores") + regions := new(Regions) + regions.init(cfg) + log.Info("finish init regions") + + streams := make(map[uint64]pdpb.PD_RegionHeartbeatClient, cfg.StoreCount) + for i := 1; i <= cfg.StoreCount; i++ { + streams[uint64(i)] = createHeartbeatStream(ctx, cfg) + } + var heartbeatTicker = time.NewTicker(60 * time.Second) + defer heartbeatTicker.Stop() + for { + select { + case <-heartbeatTicker.C: + if cfg.Round != 0 && regions.updateRound > cfg.Round { + exit(0) + } + rep := newReport(cfg) + r := rep.Stats() + + startTime := time.Now() + wg := &sync.WaitGroup{} + for i := 1; i <= cfg.StoreCount; i++ { + id := uint64(i) + wg.Add(1) + go regions.handleRegionHeartbeat(wg, streams[id], id, rep) + } + wg.Wait() + + since := time.Since(startTime).Seconds() + close(rep.Results()) + regions.result(cfg.RegionCount, since) + stats := <-r + log.Info("region heartbeat stats", zap.String("total", fmt.Sprintf("%.4fs", stats.Total.Seconds())), + zap.String("slowest", fmt.Sprintf("%.4fs", stats.Slowest)), + zap.String("fastest", fmt.Sprintf("%.4fs", stats.Fastest)), + zap.String("average", fmt.Sprintf("%.4fs", stats.Average)), + zap.String("stddev", fmt.Sprintf("%.4fs", stats.Stddev)), + zap.String("rps", fmt.Sprintf("%.4f", stats.RPS)), + ) + log.Info("store heartbeat stats", zap.String("max", fmt.Sprintf("%.4fs", since))) + regions.update(cfg.Replica) + case <-ctx.Done(): + log.Info("Got signal to exit") + switch sig { + case syscall.SIGTERM: + exit(0) + default: + exit(1) + } + } + } +} + +func exit(code int) { + os.Exit(code) +} + +func newReport(cfg *config.Config) report.Report { + p := "%4.4f" + if cfg.Sample { + return report.NewReportSample(p) + } + return report.NewReport(p) +} + +func (rs *Regions) result(regionCount int, sec float64) { if rs.updateRound == 0 { // There was no difference in the first round - return "" + return } updated := make(map[int]struct{}) @@ -296,59 +447,12 @@ func (rs *Regions) result(sec float64) string { for _, i := range rs.updateFlow { updated[i] = struct{}{} } - inactiveCount := *regionCount - len(updated) - - ret := "Update speed of each category:\n" - ret += fmt.Sprintf(" Requests/sec: %12.4f\n", float64(*regionCount)/sec) - ret += fmt.Sprintf(" Save-Tree/sec: %12.4f\n", float64(len(rs.updateLeader))/sec) - ret += fmt.Sprintf(" Save-KV/sec: %12.4f\n", float64(len(rs.updateEpoch))/sec) - ret += fmt.Sprintf(" Save-Space/sec: %12.4f\n", float64(len(rs.updateSpace))/sec) - ret += fmt.Sprintf(" Save-Flow/sec: %12.4f\n", float64(len(rs.updateFlow))/sec) - ret += fmt.Sprintf(" Skip/sec: %12.4f\n", float64(inactiveCount)/sec) - return ret -} - -func main() { - log.SetFlags(0) - flag.Parse() - - cli := newClient() - initClusterID(cli) - bootstrap(cli) - putStores(cli) - - log.Println("finish put stores") - groupStartNotify := make([]chan report.Report, *storeCount+1) - groupEndNotify := make([]chan struct{}, *storeCount+1) - regions := new(Regions) - regions.init() - - for i := 1; i <= *storeCount; i++ { - startNotifier := make(chan report.Report) - endNotifier := make(chan struct{}) - groupStartNotify[i] = startNotifier - groupEndNotify[i] = endNotifier - go regions.send(uint64(i), startNotifier, endNotifier) - } - - for i := 0; i < *heartbeatRounds; i++ { - log.Printf("\n--------- Bench heartbeat (Round %d) ----------\n", i+1) - repo := newReport() - rs := repo.Run() - // All stores start heartbeat. - startTime := time.Now() - for storeID := 1; storeID <= *storeCount; storeID++ { - startNotifier := groupStartNotify[storeID] - startNotifier <- repo - } - // All stores finished heartbeat once. - for storeID := 1; storeID <= *storeCount; storeID++ { - <-groupEndNotify[storeID] - } - since := time.Since(startTime).Seconds() - close(repo.Results()) - log.Println(<-rs) - log.Println(regions.result(since)) - regions.update() - } + inactiveCount := regionCount - len(updated) + + log.Info("update speed of each category", zap.String("rps", fmt.Sprintf("%.4f", float64(regionCount)/sec)), + zap.String("save-tree", fmt.Sprintf("%.4f", float64(len(rs.updateLeader))/sec)), + zap.String("save-kv", fmt.Sprintf("%.4f", float64(len(rs.updateEpoch))/sec)), + zap.String("save-space", fmt.Sprintf("%.4f", float64(len(rs.updateSpace))/sec)), + zap.String("save-flow", fmt.Sprintf("%.4f", float64(len(rs.updateFlow))/sec)), + zap.String("skip", fmt.Sprintf("%.4f", float64(inactiveCount)/sec))) } From 6ca67e3fb024c8c113125757fed53dafc98354dc Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Fri, 18 Nov 2022 23:45:56 +0800 Subject: [PATCH 52/67] *: make `TestMember` stable (#5715) close tikv/pd#5714 Signed-off-by: Ryan Leung --- tests/pdctl/member/member_test.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/pdctl/member/member_test.go b/tests/pdctl/member/member_test.go index 2c93a9c6c533..7d80eca6a4a1 100644 --- a/tests/pdctl/member/member_test.go +++ b/tests/pdctl/member/member_test.go @@ -93,15 +93,19 @@ func TestMember(t *testing.T) { args = []string{"-u", pdAddr, "member", "delete", "name", name} _, err = pdctl.ExecuteCommand(cmd, args...) re.NoError(err) - members, err = etcdutil.ListEtcdMembers(client) - re.NoError(err) - re.Len(members.Members, 2) + testutil.Eventually(re, func() bool { + members, err = etcdutil.ListEtcdMembers(client) + re.NoError(err) + return len(members.Members) == 2 + }) // member delete id args = []string{"-u", pdAddr, "member", "delete", "id", fmt.Sprint(id)} _, err = pdctl.ExecuteCommand(cmd, args...) re.NoError(err) - members, err = etcdutil.ListEtcdMembers(client) - re.NoError(err) - re.Len(members.Members, 2) + testutil.Eventually(re, func() bool { + members, err = etcdutil.ListEtcdMembers(client) + re.NoError(err) + return len(members.Members) == 2 + }) } From 3b0681834fbfa52749ff1cc475a84e1f6b12af87 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Mon, 21 Nov 2022 11:39:57 +0800 Subject: [PATCH 53/67] statistics: reduce unnecessary clone and init in hot cache (#5716) ref tikv/pd#5692 Signed-off-by: lhy1024 Co-authored-by: Ti Chi Robot --- pkg/movingaverage/avg_over_time.go | 4 +- pkg/movingaverage/queue.go | 9 +-- server/statistics/hot_peer_cache.go | 52 ++++++++++-------- server/statistics/hot_peer_cache_test.go | 70 ++++++++++++++++-------- 4 files changed, 82 insertions(+), 53 deletions(-) diff --git a/pkg/movingaverage/avg_over_time.go b/pkg/movingaverage/avg_over_time.go index 35f85ab82538..f7df1004eba1 100644 --- a/pkg/movingaverage/avg_over_time.go +++ b/pkg/movingaverage/avg_over_time.go @@ -61,7 +61,9 @@ func (aot *AvgOverTime) Get() float64 { // Clear clears the AvgOverTime. func (aot *AvgOverTime) Clear() { - aot.que.Init() + for aot.que.que.Len() > 0 { + aot.que.PopFront() + } aot.margin = deltaWithInterval{ delta: 0, interval: 0, diff --git a/pkg/movingaverage/queue.go b/pkg/movingaverage/queue.go index 0b5e26337e4b..6ac19996d0d7 100644 --- a/pkg/movingaverage/queue.go +++ b/pkg/movingaverage/queue.go @@ -32,13 +32,6 @@ func NewSafeQueue() *SafeQueue { return sq } -// Init implement init -func (sq *SafeQueue) Init() { - sq.mu.Lock() - defer sq.mu.Unlock() - sq.que.Init() -} - // PushBack implement PushBack func (sq *SafeQueue) PushBack(v interface{}) { sq.mu.Lock() @@ -57,7 +50,7 @@ func (sq *SafeQueue) PopFront() interface{} { func (sq *SafeQueue) Clone() *SafeQueue { sq.mu.Lock() defer sq.mu.Unlock() - q := queue.New().Init() + q := queue.New() for i := 0; i < sq.que.Len(); i++ { v := sq.que.PopFront() sq.que.PushBack(v) diff --git a/server/statistics/hot_peer_cache.go b/server/statistics/hot_peer_cache.go index 9671935f1f7f..2d296146c4c8 100644 --- a/server/statistics/hot_peer_cache.go +++ b/server/statistics/hot_peer_cache.go @@ -167,17 +167,39 @@ func (f *hotPeerCache) collectExpiredItems(region *core.RegionInfo) []*HotPeerSt // checkPeerFlow will update oldItem's rollingLoads into newItem, thus we should use write lock here. func (f *hotPeerCache) checkPeerFlow(peer *core.PeerInfo, region *core.RegionInfo) *HotPeerStat { interval := peer.GetInterval() - // for test or simulator purpose - if Denoising && interval < HotRegionReportMinInterval { + if Denoising && interval < HotRegionReportMinInterval { // for test or simulator purpose return nil } storeID := peer.GetStoreID() deltaLoads := peer.GetLoads() - // update metrics - f.collectPeerMetrics(deltaLoads, interval) + f.collectPeerMetrics(deltaLoads, interval) // update metrics regionID := region.GetID() oldItem := f.getOldHotPeerStat(regionID, storeID) thresholds := f.calcHotThresholds(storeID) + + // check whether the peer is allowed to be inherited + source := direct + if oldItem == nil { + for _, storeID := range f.getAllStoreIDs(region) { + oldItem = f.getOldHotPeerStat(regionID, storeID) + if oldItem != nil && oldItem.allowInherited { + source = inherit + break + } + } + } + + // check new item whether is hot + if oldItem == nil { + regionStats := f.kind.RegionStats() + isHot := slice.AnyOf(regionStats, func(i int) bool { + return deltaLoads[regionStats[i]]/float64(interval) >= thresholds[i] + }) + if !isHot { + return nil + } + } + newItem := &HotPeerStat{ StoreID: storeID, RegionID: regionID, @@ -190,17 +212,11 @@ func (f *hotPeerCache) checkPeerFlow(peer *core.PeerInfo, region *core.RegionInf peers: region.GetPeers(), actionType: Update, thresholds: thresholds, - source: direct, + source: source, } if oldItem == nil { - for _, storeID := range f.getAllStoreIDs(region) { - oldItem = f.getOldHotPeerStat(regionID, storeID) - if oldItem != nil && oldItem.allowInherited { - newItem.source = inherit - break - } - } + return f.updateNewHotPeerStat(newItem, deltaLoads, time.Duration(interval)*time.Second) } return f.updateHotPeerStat(region, newItem, oldItem, deltaLoads, time.Duration(interval)*time.Second) } @@ -389,9 +405,6 @@ func (f *hotPeerCache) getHotPeerStat(regionID, storeID uint64) *HotPeerStat { func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldItem *HotPeerStat, deltaLoads []float64, interval time.Duration) *HotPeerStat { regionStats := f.kind.RegionStats() - if oldItem == nil { - return f.updateNewHotPeerStat(regionStats, newItem, deltaLoads, interval) - } if newItem.source == inherit { for _, dim := range oldItem.rollingLoads { @@ -450,14 +463,9 @@ func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldIt return newItem } -func (f *hotPeerCache) updateNewHotPeerStat(regionStats []RegionStatKind, newItem *HotPeerStat, deltaLoads []float64, interval time.Duration) *HotPeerStat { +func (f *hotPeerCache) updateNewHotPeerStat(newItem *HotPeerStat, deltaLoads []float64, interval time.Duration) *HotPeerStat { + regionStats := f.kind.RegionStats() // interval is not 0 which is guaranteed by the caller. - isHot := slice.AnyOf(regionStats, func(i int) bool { - return deltaLoads[regionStats[i]]/interval.Seconds() >= newItem.thresholds[i] - }) - if !isHot { - return nil - } if interval.Seconds() >= float64(f.reportIntervalSecs) { initItem(newItem) } diff --git a/server/statistics/hot_peer_cache_test.go b/server/statistics/hot_peer_cache_test.go index 412c006c40ab..9baf8c9a3005 100644 --- a/server/statistics/hot_peer_cache_test.go +++ b/server/statistics/hot_peer_cache_test.go @@ -307,56 +307,77 @@ func newPeers(n int, pid genID, sid genID) []*metapb.Peer { func TestUpdateHotPeerStat(t *testing.T) { re := require.New(t) cache := NewHotPeerCache(Read) + storeID, regionID := uint64(1), uint64(2) + peer := &metapb.Peer{StoreId: storeID} + region := core.NewRegionInfo(&metapb.Region{Id: regionID, Peers: []*metapb.Peer{peer}}, peer) // we statistic read peer info from store heartbeat rather than region heartbeat m := RegionHeartBeatReportInterval / StoreHeartBeatReportInterval // skip interval=0 - newItem := &HotPeerStat{actionType: Update, thresholds: []float64{0.0, 0.0, 0.0}, Kind: Read} - newItem = cache.updateHotPeerStat(nil, newItem, nil, []float64{0.0, 0.0, 0.0}, 0) + interval := 0 + deltaLoads := []float64{0.0, 0.0, 0.0} + MinHotThresholds[RegionReadBytes] = 0.0 + MinHotThresholds[RegionReadKeys] = 0.0 + MinHotThresholds[RegionReadQueryNum] = 0.0 + + newItem := cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.Nil(newItem) // new peer, interval is larger than report interval, but no hot - newItem = &HotPeerStat{actionType: Update, thresholds: []float64{1.0, 1.0, 1.0}, Kind: Read} - newItem = cache.updateHotPeerStat(nil, newItem, nil, []float64{0.0, 0.0, 0.0}, 10*time.Second) + interval = 10 + deltaLoads = []float64{0.0, 0.0, 0.0} + MinHotThresholds[RegionReadBytes] = 1.0 + MinHotThresholds[RegionReadKeys] = 1.0 + MinHotThresholds[RegionReadQueryNum] = 1.0 + newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.Nil(newItem) // new peer, interval is less than report interval - newItem = &HotPeerStat{actionType: Update, thresholds: []float64{0.0, 0.0, 0.0}, Kind: Read} - newItem = cache.updateHotPeerStat(nil, newItem, nil, []float64{60.0, 60.0, 60.0}, 4*time.Second) + interval = 4 + deltaLoads = []float64{60.0, 60.0, 60.0} + MinHotThresholds[RegionReadBytes] = 0.0 + MinHotThresholds[RegionReadKeys] = 0.0 + MinHotThresholds[RegionReadQueryNum] = 0.0 + newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.NotNil(newItem) re.Equal(0, newItem.HotDegree) re.Equal(0, newItem.AntiCount) // sum of interval is less than report interval - oldItem := newItem - newItem = cache.updateHotPeerStat(nil, newItem, oldItem, []float64{60.0, 60.0, 60.0}, 4*time.Second) + interval = 4 + deltaLoads = []float64{60.0, 60.0, 60.0} + cache.updateStat(newItem) + newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.Equal(0, newItem.HotDegree) re.Equal(0, newItem.AntiCount) // sum of interval is larger than report interval, and hot - oldItem = newItem - oldItem.AntiCount = oldItem.defaultAntiCount() - newItem = cache.updateHotPeerStat(nil, newItem, oldItem, []float64{60.0, 60.0, 60.0}, 4*time.Second) + newItem.AntiCount = newItem.defaultAntiCount() + cache.updateStat(newItem) + newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.Equal(1, newItem.HotDegree) re.Equal(2*m, newItem.AntiCount) // sum of interval is less than report interval - oldItem = newItem - newItem = cache.updateHotPeerStat(nil, newItem, oldItem, []float64{60.0, 60.0, 60.0}, 4*time.Second) + cache.updateStat(newItem) + newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.Equal(1, newItem.HotDegree) re.Equal(2*m, newItem.AntiCount) // sum of interval is larger than report interval, and hot - oldItem = newItem - newItem = cache.updateHotPeerStat(nil, newItem, oldItem, []float64{60.0, 60.0, 60.0}, 10*time.Second) + interval = 10 + cache.updateStat(newItem) + newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.Equal(2, newItem.HotDegree) re.Equal(2*m, newItem.AntiCount) // sum of interval is larger than report interval, and cold - oldItem = newItem - newItem.thresholds = []float64{10.0, 10.0, 10.0} - newItem = cache.updateHotPeerStat(nil, newItem, oldItem, []float64{60.0, 60.0, 60.0}, 10*time.Second) + MinHotThresholds[RegionReadBytes] = 10.0 + MinHotThresholds[RegionReadKeys] = 10.0 + MinHotThresholds[RegionReadQueryNum] = 10.0 + cache.updateStat(newItem) + newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.Equal(1, newItem.HotDegree) re.Equal(2*m-1, newItem.AntiCount) // sum of interval is larger than report interval, and cold for i := 0; i < 2*m-1; i++ { - oldItem = newItem - newItem = cache.updateHotPeerStat(nil, newItem, oldItem, []float64{60.0, 60.0, 60.0}, 10*time.Second) + cache.updateStat(newItem) + newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) } re.Less(newItem.HotDegree, 0) re.Equal(0, newItem.AntiCount) @@ -380,6 +401,7 @@ func testMetrics(re *require.Assertions, interval, byteRate, expectThreshold flo re.GreaterOrEqual(byteRate, MinHotThresholds[RegionReadBytes]) for i := uint64(1); i < TopNN+10; i++ { var oldItem *HotPeerStat + var item *HotPeerStat for { thresholds := cache.calcHotThresholds(storeID) newItem := &HotPeerStat{ @@ -396,7 +418,12 @@ func testMetrics(re *require.Assertions, interval, byteRate, expectThreshold flo if oldItem != nil && oldItem.rollingLoads[ByteDim].isHot(thresholds[ByteDim]) == true { break } - item := cache.updateHotPeerStat(nil, newItem, oldItem, []float64{byteRate * interval, 0.0, 0.0}, time.Duration(interval)*time.Second) + loads := []float64{byteRate * interval, 0.0, 0.0} + if oldItem == nil { + item = cache.updateNewHotPeerStat(newItem, loads, time.Duration(interval)*time.Second) + } else { + item = cache.updateHotPeerStat(nil, newItem, oldItem, loads, time.Duration(interval)*time.Second) + } cache.updateStat(item) } thresholds := cache.calcHotThresholds(storeID) @@ -663,7 +690,6 @@ func TestHotPeerCacheTopN(t *testing.T) { } re.Contains(cache.peersOfStore, uint64(1)) - println(cache.peersOfStore[1].GetTopNMin(ByteDim).(*HotPeerStat).GetLoad(ByteDim)) re.True(typeutil.Float64Equal(4000, cache.peersOfStore[1].GetTopNMin(ByteDim).(*HotPeerStat).GetLoad(ByteDim))) } From 7a040fd3d64868d1b05d156cdcf334ce19b6acf2 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 21 Nov 2022 16:01:58 +0800 Subject: [PATCH 54/67] core: use map directly (#5719) ref tikv/pd#4399 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- server/core/region.go | 52 ++++++++------------------------------ server/core/region_test.go | 24 +++++++++--------- 2 files changed, 22 insertions(+), 54 deletions(-) diff --git a/server/core/region.go b/server/core/region.go index 14792ab3d46c..2c7c46c2b60c 100644 --- a/server/core/region.go +++ b/server/core/region.go @@ -696,38 +696,10 @@ func GenerateRegionGuideFunc(enableLog bool) RegionGuideFunc { } } -// regionMap wraps a map[uint64]*regionItem and supports randomly pick a region. They are the leaves of regionTree. -type regionMap map[uint64]*regionItem - -func newRegionMap() regionMap { - return make(map[uint64]*regionItem) -} - -func (rm regionMap) Len() int { - return len(rm) -} - -func (rm regionMap) Get(id uint64) *regionItem { - return rm[id] -} - -// AddNew uses RegionInfo to generate a new regionItem. -// If the regionItem already exists, it will be overwritten. -// Note: Do not use this function when you only need to update the RegionInfo and do not need a new regionItem. -func (rm regionMap) AddNew(region *RegionInfo) *regionItem { - item := ®ionItem{RegionInfo: region} - rm[region.GetID()] = item - return item -} - -func (rm regionMap) Delete(id uint64) { - delete(rm, id) -} - // RegionsInfo for export type RegionsInfo struct { tree *regionTree - regions regionMap // regionID -> regionInfo + regions map[uint64]*regionItem // regionID -> regionInfo leaders map[uint64]*regionTree // storeID -> sub regionTree followers map[uint64]*regionTree // storeID -> sub regionTree learners map[uint64]*regionTree // storeID -> sub regionTree @@ -739,7 +711,7 @@ type RegionsInfo struct { func NewRegionsInfo() *RegionsInfo { return &RegionsInfo{ tree: newRegionTree(), - regions: newRegionMap(), + regions: make(map[uint64]*regionItem), leaders: make(map[uint64]*regionTree), followers: make(map[uint64]*regionTree), learners: make(map[uint64]*regionTree), @@ -750,7 +722,7 @@ func NewRegionsInfo() *RegionsInfo { // GetRegion returns the RegionInfo with regionID func (r *RegionsInfo) GetRegion(regionID uint64) *RegionInfo { - if item := r.regions.Get(regionID); item != nil { + if item := r.regions[regionID]; item != nil { return item.RegionInfo } return nil @@ -761,7 +733,7 @@ func (r *RegionsInfo) GetRegion(regionID uint64) *RegionInfo { func (r *RegionsInfo) SetRegion(region *RegionInfo) (overlaps []*RegionInfo) { var item *regionItem // Pointer to the *RegionInfo of this ID. rangeChanged := true // This Region is new, or its range has changed. - if item = r.regions.Get(region.GetID()); item != nil { + if item = r.regions[region.GetID()]; item != nil { // If this ID already exists, use the existing regionItem and pick out the origin. origin := item.RegionInfo rangeChanged = !origin.rangeEqualsTo(region) @@ -788,7 +760,8 @@ func (r *RegionsInfo) SetRegion(region *RegionInfo) (overlaps []*RegionInfo) { item.RegionInfo = region } else { // If this ID does not exist, generate a new regionItem and save it in the regionMap. - item = r.regions.AddNew(region) + item = ®ionItem{RegionInfo: region} + r.regions[region.GetID()] = item } if rangeChanged { @@ -839,11 +812,6 @@ func (r *RegionsInfo) SetRegion(region *RegionInfo) (overlaps []*RegionInfo) { return } -// Len returns the RegionsInfo length -func (r *RegionsInfo) Len() int { - return r.regions.Len() -} - // TreeLen returns the RegionsInfo tree length(now only used in test) func (r *RegionsInfo) TreeLen() int { return r.tree.length() @@ -883,7 +851,7 @@ func (r *RegionsInfo) GetOverlaps(region *RegionInfo) []*RegionInfo { func (r *RegionsInfo) RemoveRegion(region *RegionInfo) { // Remove from tree and regions. r.tree.remove(region) - r.regions.Delete(region.GetID()) + delete(r.regions, region.GetID()) // Remove from leaders and followers. r.removeRegionFromSubTree(region) } @@ -974,7 +942,7 @@ func (r *RegionsInfo) GetPrevRegionByKey(regionKey []byte) *RegionInfo { // GetRegions gets all RegionInfo from regionMap func (r *RegionsInfo) GetRegions() []*RegionInfo { - regions := make([]*RegionInfo, 0, r.regions.Len()) + regions := make([]*RegionInfo, 0, len(r.regions)) for _, item := range r.regions { regions = append(regions, item.RegionInfo) } @@ -1038,7 +1006,7 @@ func (r *RegionsInfo) GetStoreWriteRate(storeID uint64) (bytesRate, keysRate flo // GetMetaRegions gets a set of metapb.Region from regionMap func (r *RegionsInfo) GetMetaRegions() []*metapb.Region { - regions := make([]*metapb.Region, 0, r.regions.Len()) + regions := make([]*metapb.Region, 0, len(r.regions)) for _, item := range r.regions { regions = append(regions, typeutil.DeepClone(item.meta, RegionFactory)) } @@ -1047,7 +1015,7 @@ func (r *RegionsInfo) GetMetaRegions() []*metapb.Region { // GetRegionCount gets the total count of RegionInfo of regionMap func (r *RegionsInfo) GetRegionCount() int { - return r.regions.Len() + return len(r.regions) } // GetStoreRegionCount gets the total count of a store's leader, follower and learner RegionInfo by storeID diff --git a/server/core/region_test.go b/server/core/region_test.go index 93367f8dbbc9..6db10a6bd04c 100644 --- a/server/core/region_test.go +++ b/server/core/region_test.go @@ -367,24 +367,24 @@ func TestNeedSync(t *testing.T) { func TestRegionMap(t *testing.T) { re := require.New(t) - rm := newRegionMap() + rm := make(map[uint64]*regionItem) check(re, rm) - rm.AddNew(regionInfo(1)) + rm[1] = ®ionItem{RegionInfo: regionInfo(1)} check(re, rm, 1) - rm.AddNew(regionInfo(2)) - rm.AddNew(regionInfo(3)) + rm[2] = ®ionItem{RegionInfo: regionInfo(2)} + rm[3] = ®ionItem{RegionInfo: regionInfo(3)} check(re, rm, 1, 2, 3) - rm.AddNew(regionInfo(3)) - rm.Delete(4) + rm[3] = ®ionItem{RegionInfo: regionInfo(3)} + delete(rm, 4) check(re, rm, 1, 2, 3) - rm.Delete(3) - rm.Delete(1) + delete(rm, 3) + delete(rm, 1) check(re, rm, 2) - rm.AddNew(regionInfo(3)) + rm[3] = ®ionItem{RegionInfo: regionInfo(3)} check(re, rm, 2, 3) } @@ -398,13 +398,13 @@ func regionInfo(id uint64) *RegionInfo { } } -func check(re *require.Assertions, rm regionMap, ids ...uint64) { +func check(re *require.Assertions, rm map[uint64]*regionItem, ids ...uint64) { // Check Get. for _, id := range ids { - re.Equal(id, rm.Get(id).GetID()) + re.Equal(id, rm[id].GetID()) } // Check Len. - re.Equal(len(ids), rm.Len()) + re.Equal(len(ids), len(rm)) // Check id set. expect := make(map[uint64]struct{}) for _, id := range ids { From c703be363ebd8bcec39851cd7847cad4ce74eb63 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Mon, 21 Nov 2022 18:47:58 +0800 Subject: [PATCH 55/67] statistics: reduce unnecessary copy when collect hot metrics (#5709) ref tikv/pd#5692 Signed-off-by: lhy1024 Co-authored-by: Ti Chi Robot --- server/cluster/coordinator.go | 2 +- server/cluster/coordinator_test.go | 44 +++++++++++++++++++++- server/statistics/hot_peer.go | 4 +- server/statistics/hot_peer_cache.go | 3 +- server/statistics/store_hot_peers_infos.go | 37 +++++++++++++++++- server/statistics/store_load.go | 12 +++--- 6 files changed, 90 insertions(+), 12 deletions(-) diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index 973821469ce8..b7fe2f4e0fdb 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -556,7 +556,7 @@ func collectHotMetrics(cluster *RaftCluster, stores []*core.StoreInfo, typ stati regionStats = cluster.RegionWriteStats() kind = statistics.Write.String() } - status := statistics.GetHotStatus(stores, cluster.GetStoresLoads(), regionStats, typ, cluster.GetOpts().IsTraceRegionFlow()) + status := statistics.CollectHotPeerInfos(stores, regionStats) // only returns TotalBytesRate,TotalKeysRate,TotalQueryRate,Count for _, s := range stores { storeAddress := s.GetAddress() diff --git a/server/cluster/coordinator_test.go b/server/cluster/coordinator_test.go index 1da1df9535a7..df4bac0f8c5d 100644 --- a/server/cluster/coordinator_test.go +++ b/server/cluster/coordinator_test.go @@ -238,7 +238,7 @@ func dispatchHeartbeat(co *coordinator, region *core.RegionInfo, stream hbstream return nil } -func TestCollectMetrics(t *testing.T) { +func TestCollectMetricsConcurrent(t *testing.T) { re := require.New(t) tc, co, cleanup := prepare(nil, func(tc *testCluster) { @@ -269,6 +269,48 @@ func TestCollectMetrics(t *testing.T) { wg.Wait() } +func TestCollectMetrics(t *testing.T) { + re := require.New(t) + + tc, co, cleanup := prepare(nil, func(tc *testCluster) { + tc.regionStats = statistics.NewRegionStatistics(tc.GetOpts(), nil, tc.storeConfigManager) + }, func(co *coordinator) { co.run() }, re) + defer cleanup() + count := 10 + for i := 0; i <= count; i++ { + for k := 0; k < 200; k++ { + item := &statistics.HotPeerStat{ + StoreID: uint64(i % 5), + RegionID: uint64(i*1000 + k), + Kind: statistics.Write, + Loads: []float64{10, 20, 30}, + HotDegree: 10, + AntiCount: statistics.HotRegionAntiCount, // for write + } + tc.hotStat.HotCache.Update(item) + } + } + for i := 0; i < 1000; i++ { + co.collectHotSpotMetrics() + co.collectSchedulerMetrics() + co.cluster.collectClusterMetrics() + } + stores := co.cluster.GetStores() + regionStats := co.cluster.RegionWriteStats() + status1 := statistics.CollectHotPeerInfos(stores, regionStats) + status2 := statistics.GetHotStatus(stores, co.cluster.GetStoresLoads(), regionStats, statistics.Write, co.cluster.GetOpts().IsTraceRegionFlow()) + for _, s := range status2.AsLeader { + s.Stats = nil + } + for _, s := range status2.AsPeer { + s.Stats = nil + } + re.Equal(status1, status2) + co.resetHotSpotMetrics() + co.resetSchedulerMetrics() + co.cluster.resetClusterMetrics() +} + func prepare(setCfg func(*config.ScheduleConfig), setTc func(*testCluster), run func(*coordinator), re *require.Assertions) (*testCluster, *coordinator, func()) { ctx, cancel := context.WithCancel(context.Background()) cfg, opt, err := newTestScheduleConfig() diff --git a/server/statistics/hot_peer.go b/server/statistics/hot_peer.go index 6a028c0758f6..3de9dbd48304 100644 --- a/server/statistics/hot_peer.go +++ b/server/statistics/hot_peer.go @@ -233,7 +233,7 @@ func (stat *HotPeerStat) IsLearner() bool { func (stat *HotPeerStat) defaultAntiCount() int { if stat.Kind == Read { - return hotRegionAntiCount * (RegionHeartBeatReportInterval / StoreHeartBeatReportInterval) + return HotRegionAntiCount * (RegionHeartBeatReportInterval / StoreHeartBeatReportInterval) } - return hotRegionAntiCount + return HotRegionAntiCount } diff --git a/server/statistics/hot_peer_cache.go b/server/statistics/hot_peer_cache.go index 2d296146c4c8..2df29a3d7908 100644 --- a/server/statistics/hot_peer_cache.go +++ b/server/statistics/hot_peer_cache.go @@ -40,7 +40,8 @@ const ( // HotRegionReportMinInterval is used for the simulator and test HotRegionReportMinInterval = 3 - hotRegionAntiCount = 2 + // HotRegionAntiCount is default value for antiCount + HotRegionAntiCount = 2 queueCap = 20000 ) diff --git a/server/statistics/store_hot_peers_infos.go b/server/statistics/store_hot_peers_infos.go index 9c5c51e9dfb1..b3273eee83ef 100644 --- a/server/statistics/store_hot_peers_infos.go +++ b/server/statistics/store_hot_peers_infos.go @@ -32,6 +32,41 @@ type StoreHotPeersInfos struct { // NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. type StoreHotPeersStat map[uint64]*HotPeersStat +// CollectHotPeerInfos only returns TotalBytesRate,TotalKeysRate,TotalQueryRate,Count +func CollectHotPeerInfos(stores []*core.StoreInfo, regionStats map[uint64][]*HotPeerStat) *StoreHotPeersInfos { + peerLoadSum := make([]float64, DimLen) + collect := func(kind core.ResourceKind) StoreHotPeersStat { + ret := make(StoreHotPeersStat, len(stores)) + for _, store := range stores { + id := store.GetID() + hotPeers, ok := regionStats[id] + if !ok { + continue + } + for i := range peerLoadSum { + peerLoadSum[i] = 0 + } + peers := filterHotPeers(kind, hotPeers) + for _, peer := range peers { + for j := range peerLoadSum { + peerLoadSum[j] += peer.GetLoad(j) + } + } + ret[id] = &HotPeersStat{ + TotalBytesRate: peerLoadSum[ByteDim], + TotalKeysRate: peerLoadSum[KeyDim], + TotalQueryRate: peerLoadSum[QueryDim], + Count: len(peers), + } + } + return ret + } + return &StoreHotPeersInfos{ + AsPeer: collect(core.RegionKind), + AsLeader: collect(core.LeaderKind), + } +} + // GetHotStatus returns the hot status for a given type. func GetHotStatus(stores []*core.StoreInfo, storesLoads map[uint64][]float64, regionStats map[uint64][]*HotPeerStat, typ RWType, isTraceRegionFlow bool) *StoreHotPeersInfos { stInfos := SummaryStoreInfos(stores) @@ -125,7 +160,7 @@ func summaryStoresLoadByEngine( // HotLeaders consider `Write{Bytes,Keys}`, so when we schedule `writeLeader`, all peers are leader. for _, peer := range filterHotPeers(kind, storeHotPeers[id]) { for i := range peerLoadSum { - peerLoadSum[i] += peer.Loads[i] + peerLoadSum[i] += peer.GetLoad(i) } hotPeers = append(hotPeers, peer.Clone()) } diff --git a/server/statistics/store_load.go b/server/statistics/store_load.go index 12c9e92910a9..23f093a3b031 100644 --- a/server/statistics/store_load.go +++ b/server/statistics/store_load.go @@ -48,9 +48,9 @@ func (li *StoreLoadDetail) ToHotPeersStat() *HotPeersStat { for _, peer := range li.HotPeers { if peer.HotDegree > 0 { peers = append(peers, toHotPeerStatShow(peer)) - byteRate += peer.Loads[ByteDim] - keyRate += peer.Loads[KeyDim] - queryRate += peer.Loads[QueryDim] + byteRate += peer.GetLoad(ByteDim) + keyRate += peer.GetLoad(KeyDim) + queryRate += peer.GetLoad(QueryDim) } } @@ -72,9 +72,9 @@ func (li *StoreLoadDetail) IsUniform(dim int, threshold float64) bool { } func toHotPeerStatShow(p *HotPeerStat) HotPeerStatShow { - byteRate := p.Loads[ByteDim] - keyRate := p.Loads[KeyDim] - queryRate := p.Loads[QueryDim] + byteRate := p.GetLoad(ByteDim) + keyRate := p.GetLoad(KeyDim) + queryRate := p.GetLoad(QueryDim) return HotPeerStatShow{ StoreID: p.StoreID, Stores: p.GetStores(), From 36c15320da41dee2d43305f4e2c62cf4ee7a3551 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 22 Nov 2022 17:39:57 +0800 Subject: [PATCH 56/67] *: make TestScheduler stable (#5723) close tikv/pd#4933 Signed-off-by: Ryan Leung --- tests/client/client_test.go | 1 - tests/pdctl/cluster/cluster_test.go | 2 +- tests/pdctl/scheduler/scheduler_test.go | 23 +++++++++++++---------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/client/client_test.go b/tests/client/client_test.go index b2d7c23adace..cde7ee3d7fcd 100644 --- a/tests/client/client_test.go +++ b/tests/client/client_test.go @@ -1404,7 +1404,6 @@ func (suite *clientTestSuite) TestScatterRegion() { testutil.Eventually(re, func() bool { err := suite.client.ScatterRegion(context.Background(), regionID) if err != nil { - fmt.Println(err) return false } resp, err := suite.client.GetOperator(context.Background(), regionID) diff --git a/tests/pdctl/cluster/cluster_test.go b/tests/pdctl/cluster/cluster_test.go index 9d69f89dceff..2b8b8bc8f590 100644 --- a/tests/pdctl/cluster/cluster_test.go +++ b/tests/pdctl/cluster/cluster_test.go @@ -35,6 +35,7 @@ func TestClusterAndPing(t *testing.T) { defer cancel() cluster, err := tests.NewTestCluster(ctx, 1) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() @@ -44,7 +45,6 @@ func TestClusterAndPing(t *testing.T) { i := strings.Index(pdAddr, "//") pdAddr = pdAddr[i+2:] cmd := pdctlCmd.GetRootCmd() - defer cluster.Destroy() // cluster args := []string{"-u", pdAddr, "cluster"} diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index 0819500e52d7..384261f484a3 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -17,7 +17,6 @@ package scheduler_test import ( "context" "encoding/json" - "reflect" "testing" "time" @@ -37,6 +36,7 @@ func TestScheduler(t *testing.T) { defer cancel() cluster, err := tests.NewTestCluster(ctx, 1) re.NoError(err) + defer cluster.Destroy() err = cluster.RunInitialServers() re.NoError(err) cluster.WaitLeader() @@ -129,8 +129,6 @@ func TestScheduler(t *testing.T) { } pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) - defer cluster.Destroy() - time.Sleep(3 * time.Second) // scheduler show command @@ -392,20 +390,25 @@ func TestScheduler(t *testing.T) { mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) re.Equal(expected1, conf1) // test compatibility + re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) for _, store := range stores { version := versioninfo.HotScheduleWithQuery store.Version = versioninfo.MinSupportedVersion(version).String() pdctl.MustPutStore(re, leaderServer.GetServer(), store) - mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) } - conf["read-priorities"] = []interface{}{"query", "byte"} + re.Equal("5.2.0", leaderServer.GetClusterVersion().String()) mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + // After upgrading, we should not use query. + expected1["read-priorities"] = []interface{}{"query", "byte"} + re.NotEqual(expected1, conf1) + expected1["read-priorities"] = []interface{}{"key", "byte"} + re.Equal(expected1, conf1) // cannot set qps as write-peer-priorities - mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) - re.Eventually(func() bool { - mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - return reflect.DeepEqual(expected1, conf1) - }, time.Second*10, time.Millisecond*50) + echo = mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) + re.Contains(echo, "query is not allowed to be set in priorities for write-peer-priorities") + mustExec([]string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + // test remove and add mustExec([]string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) mustExec([]string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) From ff29b53f746a6b51b747fbc3519ed103e167c7c7 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Tue, 22 Nov 2022 18:21:58 +0800 Subject: [PATCH 57/67] scheduler: introduce base class of hot region scheduler (#5708) ref tikv/pd#5691 Signed-off-by: lhy1024 Co-authored-by: Ti Chi Robot --- server/schedulers/grant_hot_region.go | 51 ++--- server/schedulers/hot_region.go | 236 +++++++++++++----------- server/schedulers/shuffle_hot_region.go | 52 +----- 3 files changed, 148 insertions(+), 191 deletions(-) diff --git a/server/schedulers/grant_hot_region.go b/server/schedulers/grant_hot_region.go index f89ec0ebe7bc..20584fc922eb 100644 --- a/server/schedulers/grant_hot_region.go +++ b/server/schedulers/grant_hot_region.go @@ -15,12 +15,10 @@ package schedulers import ( - "math/rand" "net/http" "sort" "strconv" "strings" - "time" "github.com/gorilla/mux" "github.com/pingcap/kvproto/pkg/metapb" @@ -157,27 +155,19 @@ func (conf *grantHotRegionSchedulerConfig) has(storeID uint64) bool { // grantLeaderScheduler transfers all hot peers to peers and transfer leader to the fixed store type grantHotRegionScheduler struct { - *BaseScheduler - r *rand.Rand - conf *grantHotRegionSchedulerConfig - handler http.Handler - types []statistics.RWType - stLoadInfos [resourceTypeLen]map[uint64]*statistics.StoreLoadDetail + *baseHotScheduler + conf *grantHotRegionSchedulerConfig + handler http.Handler } // newGrantHotRegionScheduler creates an admin scheduler that transfers hot region peer to fixed store and hot region leader to one store. func newGrantHotRegionScheduler(opController *schedule.OperatorController, conf *grantHotRegionSchedulerConfig) *grantHotRegionScheduler { - base := NewBaseScheduler(opController) + base := newBaseHotScheduler(opController) handler := newGrantHotRegionHandler(conf) ret := &grantHotRegionScheduler{ - BaseScheduler: base, - conf: conf, - handler: handler, - r: rand.New(rand.NewSource(time.Now().UnixNano())), - types: []statistics.RWType{statistics.Read, statistics.Write}, - } - for ty := resourceType(0); ty < resourceTypeLen; ty++ { - ret.stLoadInfos[ty] = map[uint64]*statistics.StoreLoadDetail{} + baseHotScheduler: base, + conf: conf, + handler: handler, } return ret } @@ -272,32 +262,13 @@ func newGrantHotRegionHandler(config *grantHotRegionSchedulerConfig) http.Handle func (s *grantHotRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() - i := s.r.Int() % len(s.types) - return s.dispatch(s.types[i], cluster), nil + rw := s.randomRWType() + s.prepareForBalance(rw, cluster) + return s.dispatch(rw, cluster), nil } func (s *grantHotRegionScheduler) dispatch(typ statistics.RWType, cluster schedule.Cluster) []*operator.Operator { - storeInfos := statistics.SummaryStoreInfos(cluster.GetStores()) - storesLoads := cluster.GetStoresLoads() - isTraceRegionFlow := cluster.GetOpts().IsTraceRegionFlow() - - var stLoadInfos map[uint64]*statistics.StoreLoadDetail - switch typ { - case statistics.Read: - stLoadInfos = statistics.SummaryStoresLoad( - storeInfos, - storesLoads, - cluster.RegionReadStats(), - isTraceRegionFlow, - statistics.Read, core.RegionKind) - case statistics.Write: - stLoadInfos = statistics.SummaryStoresLoad( - storeInfos, - storesLoads, - cluster.RegionWriteStats(), - isTraceRegionFlow, - statistics.Write, core.RegionKind) - } + stLoadInfos := s.stLoadInfos[buildResourceType(typ, core.RegionKind)] infos := make([]*statistics.StoreLoadDetail, len(stLoadInfos)) index := 0 for _, info := range stLoadInfos { diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index a504a29f4fa5..2b5d99db6ee4 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -38,6 +38,106 @@ import ( "go.uber.org/zap" ) +type baseHotScheduler struct { + *BaseScheduler + // store information, including pending Influence by resource type + // Every time `Schedule()` will recalculate it. + stInfos map[uint64]*statistics.StoreSummaryInfo + // temporary states but exported to API or metrics + // Every time `Schedule()` will recalculate it. + stLoadInfos [resourceTypeLen]map[uint64]*statistics.StoreLoadDetail + // temporary states + // Every time `Schedule()` will recalculate it. + storesLoads map[uint64][]float64 + // regionPendings stores regionID -> pendingInfluence + // this records regionID which have pending Operator by operation type. During filterHotPeers, the hot peers won't + // be selected if its owner region is tracked in this attribute. + regionPendings map[uint64]*pendingInfluence + types []statistics.RWType + r *rand.Rand +} + +func newBaseHotScheduler(opController *schedule.OperatorController) *baseHotScheduler { + base := NewBaseScheduler(opController) + ret := &baseHotScheduler{ + BaseScheduler: base, + types: []statistics.RWType{statistics.Write, statistics.Read}, + regionPendings: make(map[uint64]*pendingInfluence), + r: rand.New(rand.NewSource(time.Now().UnixNano())), + } + for ty := resourceType(0); ty < resourceTypeLen; ty++ { + ret.stLoadInfos[ty] = map[uint64]*statistics.StoreLoadDetail{} + } + return ret +} + +// prepareForBalance calculate the summary of pending Influence for each store and prepare the load detail for +// each store, only update read or write load detail +func (h *baseHotScheduler) prepareForBalance(rw statistics.RWType, cluster schedule.Cluster) { + h.stInfos = statistics.SummaryStoreInfos(cluster.GetStores()) + h.summaryPendingInfluence(cluster) + h.storesLoads = cluster.GetStoresLoads() + isTraceRegionFlow := cluster.GetOpts().IsTraceRegionFlow() + + prepare := func(regionStats map[uint64][]*statistics.HotPeerStat, resource core.ResourceKind) { + ty := buildResourceType(rw, resource) + h.stLoadInfos[ty] = statistics.SummaryStoresLoad( + h.stInfos, + h.storesLoads, + regionStats, + isTraceRegionFlow, + rw, resource) + } + switch rw { + case statistics.Read: + // update read statistics + regionRead := cluster.RegionReadStats() + prepare(regionRead, core.LeaderKind) + prepare(regionRead, core.RegionKind) + case statistics.Write: + // update write statistics + regionWrite := cluster.RegionWriteStats() + prepare(regionWrite, core.LeaderKind) + prepare(regionWrite, core.RegionKind) + } +} + +// summaryPendingInfluence calculate the summary of pending Influence for each store +// and clean the region from regionInfluence if they have ended operator. +// It makes each dim rate or count become `weight` times to the origin value. +func (h *baseHotScheduler) summaryPendingInfluence(cluster schedule.Cluster) { + for id, p := range h.regionPendings { + from := h.stInfos[p.from] + to := h.stInfos[p.to] + maxZombieDur := p.maxZombieDuration + weight, needGC := calcPendingInfluence(p.op, maxZombieDur) + + if needGC { + delete(h.regionPendings, id) + continue + } + + if from != nil && weight > 0 { + from.AddInfluence(&p.origin, -weight) + } + if to != nil && weight > 0 { + to.AddInfluence(&p.origin, weight) + } + } + for storeID, info := range h.stInfos { + storeLabel := strconv.FormatUint(storeID, 10) + if infl := info.PendingSum; infl != nil { + statistics.ForeachRegionStats(func(rwTy statistics.RWType, dim int, kind statistics.RegionStatKind) { + cluster.SetHotPendingInfluenceMetrics(storeLabel, rwTy.String(), statistics.DimToString(dim), infl.Loads[kind]) + }) + } + } +} + +func (h *baseHotScheduler) randomRWType() statistics.RWType { + return h.types[h.r.Int()%len(h.types)] +} + func init() { schedule.RegisterSliceDecoderBuilder(HotRegionType, func(args []string) schedule.ConfigDecoder { return func(v interface{}) error { @@ -91,40 +191,21 @@ var ( type hotScheduler struct { name string - *BaseScheduler + *baseHotScheduler syncutil.RWMutex - types []statistics.RWType - r *rand.Rand - - // regionPendings stores regionID -> pendingInfluence - // this records regionID which have pending Operator by operation type. During filterHotPeers, the hot peers won't - // be selected if its owner region is tracked in this attribute. - regionPendings map[uint64]*pendingInfluence - - // store information, including pending Influence by resource type - // Every time `Schedule()` will recalculate it. - stInfos map[uint64]*statistics.StoreSummaryInfo - // temporary states but exported to API or metrics - // Every time `Schedule()` will recalculate it. - stLoadInfos [resourceTypeLen]map[uint64]*statistics.StoreLoadDetail - // config of hot scheduler conf *hotRegionSchedulerConfig searchRevertRegions [resourceTypeLen]bool // Whether to search revert regions. } func newHotScheduler(opController *schedule.OperatorController, conf *hotRegionSchedulerConfig) *hotScheduler { - base := NewBaseScheduler(opController) + base := newBaseHotScheduler(opController) ret := &hotScheduler{ - name: HotRegionName, - BaseScheduler: base, - types: []statistics.RWType{statistics.Write, statistics.Read}, - r: rand.New(rand.NewSource(time.Now().UnixNano())), - regionPendings: make(map[uint64]*pendingInfluence), - conf: conf, + name: HotRegionName, + baseHotScheduler: base, + conf: conf, } for ty := resourceType(0); ty < resourceTypeLen; ty++ { - ret.stLoadInfos[ty] = map[uint64]*statistics.StoreLoadDetail{} ret.searchRevertRegions[ty] = false } return ret @@ -164,13 +245,13 @@ func (h *hotScheduler) IsScheduleAllowed(cluster schedule.Cluster) bool { func (h *hotScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { schedulerCounter.WithLabelValues(h.GetName(), "schedule").Inc() - return h.dispatch(h.types[h.r.Int()%len(h.types)], cluster), nil + rw := h.randomRWType() + return h.dispatch(rw, cluster), nil } func (h *hotScheduler) dispatch(typ statistics.RWType, cluster schedule.Cluster) []*operator.Operator { h.Lock() defer h.Unlock() - h.prepareForBalance(typ, cluster) // it can not move earlier to support to use api and metrics. if h.conf.IsForbidRWType(typ) { @@ -186,86 +267,6 @@ func (h *hotScheduler) dispatch(typ statistics.RWType, cluster schedule.Cluster) return nil } -// prepareForBalance calculate the summary of pending Influence for each store and prepare the load detail for -// each store -func (h *hotScheduler) prepareForBalance(typ statistics.RWType, cluster schedule.Cluster) { - h.stInfos = statistics.SummaryStoreInfos(cluster.GetStores()) - h.summaryPendingInfluence(cluster) - storesLoads := cluster.GetStoresLoads() - isTraceRegionFlow := cluster.GetOpts().IsTraceRegionFlow() - - switch typ { - case statistics.Read: - // update read statistics - regionRead := cluster.RegionReadStats() - h.stLoadInfos[readLeader] = statistics.SummaryStoresLoad( - h.stInfos, - storesLoads, - regionRead, - isTraceRegionFlow, - statistics.Read, core.LeaderKind) - h.stLoadInfos[readPeer] = statistics.SummaryStoresLoad( - h.stInfos, - storesLoads, - regionRead, - isTraceRegionFlow, - statistics.Read, core.RegionKind) - case statistics.Write: - // update write statistics - regionWrite := cluster.RegionWriteStats() - h.stLoadInfos[writeLeader] = statistics.SummaryStoresLoad( - h.stInfos, - storesLoads, - regionWrite, - isTraceRegionFlow, - statistics.Write, core.LeaderKind) - h.stLoadInfos[writePeer] = statistics.SummaryStoresLoad( - h.stInfos, - storesLoads, - regionWrite, - isTraceRegionFlow, - statistics.Write, core.RegionKind) - } -} - -// summaryPendingInfluence calculate the summary of pending Influence for each store -// and clean the region from regionInfluence if they have ended operator. -// It makes each dim rate or count become `weight` times to the origin value. -func (h *hotScheduler) summaryPendingInfluence(cluster schedule.Cluster) { - for id, p := range h.regionPendings { - from := h.stInfos[p.from] - to := h.stInfos[p.to] - maxZombieDur := p.maxZombieDuration - weight, needGC := h.calcPendingInfluence(p.op, maxZombieDur) - - if needGC { - delete(h.regionPendings, id) - schedulerStatus.WithLabelValues(h.GetName(), "pending_op_infos").Dec() - log.Debug("gc pending influence in hot region scheduler", - zap.Uint64("region-id", id), - zap.Time("create", p.op.GetCreateTime()), - zap.Time("now", time.Now()), - zap.Duration("zombie", maxZombieDur)) - continue - } - - if from != nil && weight > 0 { - from.AddInfluence(&p.origin, -weight) - } - if to != nil && weight > 0 { - to.AddInfluence(&p.origin, weight) - } - } - for storeID, info := range h.stInfos { - storeLabel := strconv.FormatUint(storeID, 10) - if infl := info.PendingSum; infl != nil { - statistics.ForeachRegionStats(func(rwTy statistics.RWType, dim int, kind statistics.RegionStatKind) { - cluster.SetHotPendingInfluenceMetrics(storeLabel, rwTy.String(), statistics.DimToString(dim), infl.Loads[kind]) - }) - } - } -} - func (h *hotScheduler) tryAddPendingInfluence(op *operator.Operator, srcStore, dstStore uint64, infl statistics.Influence, maxZombieDur time.Duration) bool { regionID := op.RegionID() _, ok := h.regionPendings[regionID] @@ -277,7 +278,6 @@ func (h *hotScheduler) tryAddPendingInfluence(op *operator.Operator, srcStore, d influence := newPendingInfluence(op, srcStore, dstStore, infl, maxZombieDur) h.regionPendings[regionID] = influence - schedulerStatus.WithLabelValues(h.GetName(), "pending_op_infos").Inc() statistics.ForeachRegionStats(func(rwTy statistics.RWType, dim int, kind statistics.RegionStatKind) { hotPeerHist.WithLabelValues(h.GetName(), rwTy.String(), statistics.DimToString(dim)).Observe(infl.Loads[kind]) }) @@ -1446,7 +1446,7 @@ func (bs *balanceSolver) logBestSolution() { } // calcPendingInfluence return the calculate weight of one Operator, the value will between [0,1] -func (h *hotScheduler) calcPendingInfluence(op *operator.Operator, maxZombieDur time.Duration) (weight float64, needGC bool) { +func calcPendingInfluence(op *operator.Operator, maxZombieDur time.Duration) (weight float64, needGC bool) { status := op.CheckAndGetStatus() if !operator.IsEndStatus(status) { return 1, false @@ -1517,6 +1517,26 @@ func toResourceType(rwTy statistics.RWType, opTy opType) resourceType { panic(fmt.Sprintf("invalid arguments for toResourceType: rwTy = %v, opTy = %v", rwTy, opTy)) } +func buildResourceType(rwTy statistics.RWType, ty core.ResourceKind) resourceType { + switch rwTy { + case statistics.Write: + switch ty { + case core.RegionKind: + return writePeer + case core.LeaderKind: + return writeLeader + } + case statistics.Read: + switch ty { + case core.RegionKind: + return readPeer + case core.LeaderKind: + return readLeader + } + } + panic(fmt.Sprintf("invalid arguments for buildResourceType: rwTy = %v, ty = %v", rwTy, ty)) +} + func stringToDim(name string) int { switch name { case statistics.BytePriority: diff --git a/server/schedulers/shuffle_hot_region.go b/server/schedulers/shuffle_hot_region.go index de1eb6fefbd6..e6074a8251a9 100644 --- a/server/schedulers/shuffle_hot_region.go +++ b/server/schedulers/shuffle_hot_region.go @@ -15,9 +15,7 @@ package schedulers import ( - "math/rand" "strconv" - "time" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" @@ -78,24 +76,16 @@ type shuffleHotRegionSchedulerConfig struct { // to a random store, and then transfer the leader to // the hot peer. type shuffleHotRegionScheduler struct { - *BaseScheduler - stLoadInfos [resourceTypeLen]map[uint64]*statistics.StoreLoadDetail - r *rand.Rand - conf *shuffleHotRegionSchedulerConfig - types []statistics.RWType + *baseHotScheduler + conf *shuffleHotRegionSchedulerConfig } // newShuffleHotRegionScheduler creates an admin scheduler that random balance hot regions func newShuffleHotRegionScheduler(opController *schedule.OperatorController, conf *shuffleHotRegionSchedulerConfig) schedule.Scheduler { - base := NewBaseScheduler(opController) + base := newBaseHotScheduler(opController) ret := &shuffleHotRegionScheduler{ - BaseScheduler: base, - conf: conf, - types: []statistics.RWType{statistics.Read, statistics.Write}, - r: rand.New(rand.NewSource(time.Now().UnixNano())), - } - for ty := resourceType(0); ty < resourceTypeLen; ty++ { - ret.stLoadInfos[ty] = map[uint64]*statistics.StoreLoadDetail{} + baseHotScheduler: base, + conf: conf, } return ret } @@ -130,34 +120,10 @@ func (s *shuffleHotRegionScheduler) IsScheduleAllowed(cluster schedule.Cluster) func (s *shuffleHotRegionScheduler) Schedule(cluster schedule.Cluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() - i := s.r.Int() % len(s.types) - return s.dispatch(s.types[i], cluster), nil -} - -func (s *shuffleHotRegionScheduler) dispatch(typ statistics.RWType, cluster schedule.Cluster) []*operator.Operator { - storeInfos := statistics.SummaryStoreInfos(cluster.GetStores()) - storesLoads := cluster.GetStoresLoads() - isTraceRegionFlow := cluster.GetOpts().IsTraceRegionFlow() - - switch typ { - case statistics.Read: - s.stLoadInfos[readLeader] = statistics.SummaryStoresLoad( - storeInfos, - storesLoads, - cluster.RegionReadStats(), - isTraceRegionFlow, - statistics.Read, core.LeaderKind) - return s.randomSchedule(cluster, s.stLoadInfos[readLeader]) - case statistics.Write: - s.stLoadInfos[writeLeader] = statistics.SummaryStoresLoad( - storeInfos, - storesLoads, - cluster.RegionWriteStats(), - isTraceRegionFlow, - statistics.Write, core.LeaderKind) - return s.randomSchedule(cluster, s.stLoadInfos[writeLeader]) - } - return nil + rw := s.randomRWType() + s.prepareForBalance(rw, cluster) + operators := s.randomSchedule(cluster, s.stLoadInfos[buildResourceType(rw, core.LeaderKind)]) + return operators, nil } func (s *shuffleHotRegionScheduler) randomSchedule(cluster schedule.Cluster, loadDetail map[uint64]*statistics.StoreLoadDetail) []*operator.Operator { From d2855288d0d00cfbdc52822e504502517ce4a939 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 23 Nov 2022 18:55:59 +0800 Subject: [PATCH 58/67] tests: make `TestExternalTS` stable (#5724) close tikv/pd#5682 Signed-off-by: lhy1024 --- tests/server/cluster/cluster_test.go | 51 ++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index a2b20fdd790f..ac941a427740 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -1503,31 +1503,52 @@ func TestExternalTimestamp(t *testing.T) { } { // case3: set external timestamp larger than global ts - req := &pdpb.TsoRequest{ + tsoClient, err := grpcPDClient.Tso(ctx) + re.NoError(err) + defer tsoClient.CloseSend() + // get external ts + req := &pdpb.GetExternalTimestampRequest{ + Header: testutil.NewRequestHeader(clusterID), + } + resp, err := grpcPDClient.GetExternalTimestamp(context.Background(), req) + re.NoError(err) + ts = resp.GetTimestamp() + // get global ts + req2 := &pdpb.TsoRequest{ Header: testutil.NewRequestHeader(clusterID), Count: 1, DcLocation: tso.GlobalDCLocation, } - tsoClient, err := grpcPDClient.Tso(ctx) - re.NoError(err) - defer tsoClient.CloseSend() - re.NoError(tsoClient.Send(req)) - resp, err := tsoClient.Recv() + re.NoError(tsoClient.Send(req2)) + resp2, err := tsoClient.Recv() re.NoError(err) - globalTS := tsoutil.GenerateTS(resp.Timestamp) - - req2 := &pdpb.SetExternalTimestampRequest{ + globalTS := resp2.GetTimestamp() + // set external ts larger than global ts + unexpectedTS := tsoutil.ComposeTS(globalTS.Physical+2, 0) + req3 := &pdpb.SetExternalTimestampRequest{ Header: testutil.NewRequestHeader(clusterID), - Timestamp: globalTS + 1, + Timestamp: unexpectedTS, } - _, err = grpcPDClient.SetExternalTimestamp(context.Background(), req2) + _, err = grpcPDClient.SetExternalTimestamp(context.Background(), req3) re.NoError(err) - - req3 := &pdpb.GetExternalTimestampRequest{ + // get external ts again + req4 := &pdpb.GetExternalTimestampRequest{ Header: testutil.NewRequestHeader(clusterID), } - resp2, err := grpcPDClient.GetExternalTimestamp(context.Background(), req3) + resp4, err := grpcPDClient.GetExternalTimestamp(context.Background(), req4) re.NoError(err) - re.Equal(ts, resp2.GetTimestamp()) + // get global ts again + req5 := &pdpb.TsoRequest{ + Header: testutil.NewRequestHeader(clusterID), + Count: 1, + DcLocation: tso.GlobalDCLocation, + } + re.NoError(tsoClient.Send(req5)) + resp5, err := tsoClient.Recv() + re.NoError(err) + currentGlobalTS := tsoutil.GenerateTS(resp5.GetTimestamp()) + // check external ts should not be larger than global ts + re.Equal(1, tsoutil.CompareTimestampUint64(unexpectedTS, currentGlobalTS)) + re.Equal(ts, resp4.GetTimestamp()) } } From 8158054b28da04f8433a01b3f2b8243f4514c244 Mon Sep 17 00:00:00 2001 From: Zwb Date: Thu, 24 Nov 2022 07:45:58 +0800 Subject: [PATCH 59/67] operator: split SwitchWitness from ChangePeer (#5634) close tikv/pd#5633 Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot --- go.mod | 2 +- go.sum | 4 +- server/schedule/checker/rule_checker_test.go | 45 ++++- server/schedule/operator/builder.go | 156 ++++++++++++++++-- server/schedule/operator/create_operator.go | 17 +- .../schedule/operator/create_operator_test.go | 111 +++++++++++++ server/schedule/operator/kind.go | 2 +- server/schedule/operator/step.go | 133 ++++++++++++--- server/schedule/operator/step_test.go | 34 ++++ tests/client/go.mod | 2 +- tests/client/go.sum | 3 +- 11 files changed, 459 insertions(+), 50 deletions(-) diff --git a/go.mod b/go.mod index 0ea251607306..2bc1c3e793f6 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce - github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 + github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d github.com/pingcap/tidb-dashboard v0.0.0-20221103012625-46facc4b7f6d diff --git a/go.sum b/go.sum index 526c737c18e3..c29abb89978e 100644 --- a/go.sum +++ b/go.sum @@ -417,8 +417,8 @@ github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMt github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= -github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 h1:iJXUNA0LoOYuuMJ6U0tJGg2gCo/8xGZVhKLvuUWNjzw= +github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 93691a549ec2..eab19933364b 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -364,10 +364,53 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness3() { op := suite.rc.Check(r) suite.NotNil(op) suite.Equal("fix-non-witness-peer", op.Desc()) - suite.Equal(uint64(3), op.Step(0).(operator.BecomeNonWitness).StoreID) + suite.Equal(uint64(3), op.Step(0).(operator.RemovePeer).FromStore) + suite.Equal(uint64(3), op.Step(1).(operator.AddLearner).ToStore) } func (suite *ruleCheckerTestSuite) TestFixRuleWitness4() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "learner"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + r := suite.cluster.GetRegion(1) + // set peer3 to witness learner + r = r.Clone(core.WithLearners([]*metapb.Peer{r.GetPeer(3)})) + r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) + + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: "pd", + ID: "default", + Index: 100, + Override: true, + Role: placement.Voter, + Count: 2, + IsWitness: false, + }, + { + GroupID: "pd", + ID: "r1", + Index: 100, + Override: false, + Role: placement.Learner, + Count: 1, + IsWitness: false, + LabelConstraints: []placement.LabelConstraint{ + {Key: "C", Op: "in", Values: []string{"learner"}}, + }, + }, + }) + suite.NoError(err) + + op := suite.rc.Check(r) + suite.NotNil(op) + suite.Equal("fix-non-witness-peer", op.Desc()) + suite.Equal(uint64(3), op.Step(0).(operator.BecomeNonWitness).StoreID) +} + +func (suite *ruleCheckerTestSuite) TestFixRuleWitness5() { suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"}) suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) diff --git a/server/schedule/operator/builder.go b/server/schedule/operator/builder.go index 8900a63aa2f8..5eb74e047d5e 100644 --- a/server/schedule/operator/builder.go +++ b/server/schedule/operator/builder.go @@ -77,11 +77,12 @@ type Builder struct { forceTargetLeader bool // intermediate states - currentPeers peersMap - currentLeaderStoreID uint64 - toAdd, toRemove, toPromote, toDemote peersMap // pending tasks. - steps []OpStep // generated steps. - peerAddStep map[uint64]int // record at which step a peer is created. + currentPeers peersMap + currentLeaderStoreID uint64 + toAdd, toRemove, toPromote, toDemote peersMap + toWitness, toNonWitness, toPromoteAfterSwitchToNonWitness peersMap + steps []OpStep // generated steps. + peerAddStep map[uint64]int // record at which step a peer is created. // comparison function stepPlanPreferFuncs []func(stepPlan) int // for buildStepsWithoutJointConsensus @@ -247,20 +248,40 @@ func (b *Builder) DemoteVoter(storeID uint64) *Builder { return b } -// BecomeNonWitness records a remove witness attr operation in Builder. +// BecomeWitness records a switch to witness operation in Builder. +func (b *Builder) BecomeWitness(storeID uint64) *Builder { + if b.err != nil { + return b + } + if peer, ok := b.targetPeers[storeID]; !ok { + b.err = errors.Errorf("cannot switch peer to witness %d: not found", storeID) + } else if core.IsWitness(peer) { + b.err = errors.Errorf("cannot switch peer to witness %d: is already witness", storeID) + } else { + b.targetPeers.Set(&metapb.Peer{ + Id: peer.GetId(), + StoreId: peer.GetStoreId(), + Role: peer.GetRole(), + IsWitness: true, + }) + } + return b +} + +// BecomeNonWitness records a switch to non-witness operation in Builder. func (b *Builder) BecomeNonWitness(storeID uint64) *Builder { if b.err != nil { return b } if peer, ok := b.targetPeers[storeID]; !ok { - b.err = errors.Errorf("cannot set non-witness attr to peer %d: not found", storeID) + b.err = errors.Errorf("cannot switch peer to non-witness %d: not found", storeID) } else if !core.IsWitness(peer) { - b.err = errors.Errorf("cannot set non-witness attr to peer %d: is already non-witness", storeID) + b.err = errors.Errorf("cannot switch peer to non-witness %d: is already non-witness", storeID) } else { b.targetPeers.Set(&metapb.Peer{ Id: peer.GetId(), StoreId: peer.GetStoreId(), - Role: metapb.PeerRole_Learner, + Role: peer.GetRole(), IsWitness: false, }) } @@ -402,6 +423,9 @@ func (b *Builder) prepareBuild() (string, error) { b.toRemove = newPeersMap() b.toPromote = newPeersMap() b.toDemote = newPeersMap() + b.toWitness = newPeersMap() + b.toNonWitness = newPeersMap() + b.toPromoteAfterSwitchToNonWitness = newPeersMap() voterCount := 0 for _, peer := range b.targetPeers { @@ -413,7 +437,8 @@ func (b *Builder) prepareBuild() (string, error) { return "", errors.New("cannot create operator: target peers have no voter") } - // Diff `originPeers` and `targetPeers` to initialize `toAdd`, `toRemove`, `toPromote`, `toDemote`. + // Diff `originPeers` and `targetPeers` to initialize `toAdd`, `toRemove`, `toPromote`, `toDemote`, + // `toWitness`, `toNonWitness`, `toPromoteAfterSwitchToNonWitness`. // Note: Use `toDemote` only when `useJointConsensus` is true. Otherwise use `toAdd`, `toRemove` instead. for _, o := range b.originPeers { n := b.targetPeers[o.GetStoreId()] @@ -433,6 +458,20 @@ func (b *Builder) prepareBuild() (string, error) { } } + isOriginPeerWitness := core.IsWitness(o) + isTargetPeerWitness := core.IsWitness(n) + if isOriginPeerWitness && !isTargetPeerWitness { + // Demote voter to learner before switch witness to non-witness if needed. + if !core.IsLearner(n) { + n.Role = metapb.PeerRole_Learner + n.IsWitness = true + b.toPromoteAfterSwitchToNonWitness.Set(n) + } + b.toNonWitness.Set(n) + } else if !isOriginPeerWitness && isTargetPeerWitness { + b.toWitness.Set(n) + } + isOriginPeerLearner := core.IsLearner(o) isTargetPeerLearner := core.IsLearner(n) if isOriginPeerLearner && !isTargetPeerLearner { @@ -484,8 +523,11 @@ func (b *Builder) prepareBuild() (string, error) { } } - if len(b.toAdd)+len(b.toRemove)+len(b.toPromote) <= 1 && len(b.toDemote) == 0 && - !(len(b.toRemove) == 1 && len(b.targetPeers) == 1) { + // Although switch witness may have nothing to do with conf change (except switch witness voter to non-witness voter: + // it will domote to learner first, then switch witness, finally promote the non-witness learner to voter back), + // the logic here is reused for batch switch. + if len(b.toAdd)+len(b.toRemove)+len(b.toPromote)+len(b.toWitness)+len(b.toNonWitness)+len(b.toPromoteAfterSwitchToNonWitness) <= 1 && + len(b.toDemote) == 0 && !(len(b.toRemove) == 1 && len(b.targetPeers) == 1) { // If only one peer changed and the change type is not demote, joint consensus is not used. // Unless the changed is 2 voters to 1 voter, see https://github.com/tikv/pd/issues/4411 . b.useJointConsensus = false @@ -517,6 +559,10 @@ func (b *Builder) brief() string { return fmt.Sprintf("evict leader: from store %d to one in %v, or to %d (for compatibility)", b.originLeaderStoreID, b.targetLeaderStoreIDs, b.targetLeaderStoreID) case b.originLeaderStoreID != b.targetLeaderStoreID: return fmt.Sprintf("transfer leader: store %d to %d", b.originLeaderStoreID, b.targetLeaderStoreID) + case len(b.toWitness) > 0: + return fmt.Sprintf("switch peer: store %s to witness", b.toWitness) + case len(b.toNonWitness) > 0: + return fmt.Sprintf("switch peer: store %s to non-witness", b.toNonWitness) default: return "" } @@ -586,6 +632,16 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) { kind |= OpRegion } + b.execBatchSwitchWitnesses() + + for _, promote := range b.toPromoteAfterSwitchToNonWitness.IDs() { + peer := b.toPromoteAfterSwitchToNonWitness[promote] + b.toPromote.Set(peer) + kind |= OpRegion + } + b.toPromoteAfterSwitchToNonWitness = newPeersMap() + b.execChangePeerV2(true, false) + return kind, nil } @@ -653,7 +709,7 @@ func (b *Builder) preferOldPeerAsLeader(targetLeaderStoreID uint64) int { func (b *Builder) buildStepsWithoutJointConsensus(kind OpKind) (OpKind, error) { b.initStepPlanPreferFuncs() - for len(b.toAdd) > 0 || len(b.toRemove) > 0 || len(b.toPromote) > 0 || len(b.toDemote) > 0 { + for len(b.toAdd) > 0 || len(b.toRemove) > 0 || len(b.toPromote) > 0 || len(b.toDemote) > 0 || len(b.toNonWitness) > 0 || len(b.toWitness) > 0 { plan := b.peerPlan() if plan.IsEmpty() { return kind, errors.New("fail to build operator: plan is empty, maybe no valid leader") @@ -677,6 +733,14 @@ func (b *Builder) buildStepsWithoutJointConsensus(kind OpKind) (OpKind, error) { b.execRemovePeer(plan.remove) kind |= OpRegion } + if plan.witness != nil { + b.execSwitchToWitness(plan.witness) + kind |= OpRegion + } + if plan.nonWitness != nil { + b.execSwitchToNonWitness(plan.nonWitness) + kind |= OpRegion + } } b.setTargetLeaderIfNotExist() @@ -779,6 +843,41 @@ func (b *Builder) execChangePeerV2(needEnter bool, needTransferLeader bool) { } } +func (b *Builder) execSwitchToNonWitness(peer *metapb.Peer) { + b.steps = append(b.steps, BecomeNonWitness{StoreID: peer.GetStoreId(), PeerID: peer.GetId()}) + delete(b.toNonWitness, peer.GetStoreId()) +} + +func (b *Builder) execSwitchToWitness(peer *metapb.Peer) { + b.steps = append(b.steps, BecomeWitness{StoreID: peer.GetStoreId(), PeerID: peer.GetId()}) + delete(b.toWitness, peer.GetStoreId()) +} + +func (b *Builder) execBatchSwitchWitnesses() { + if len(b.toNonWitness)+len(b.toWitness) == 0 { + return + } + + step := BatchSwitchWitness{ + ToWitnesses: make([]BecomeWitness, 0, len(b.toWitness)), + ToNonWitnesses: make([]BecomeNonWitness, 0, len(b.toNonWitness)), + } + + for _, w := range b.toWitness.IDs() { + peer := b.toWitness[w] + step.ToWitnesses = append(step.ToWitnesses, BecomeWitness{StoreID: peer.GetStoreId(), PeerID: peer.GetId()}) + } + b.toWitness = newPeersMap() + + for _, nw := range b.toNonWitness.IDs() { + peer := b.toNonWitness[nw] + step.ToNonWitnesses = append(step.ToNonWitnesses, BecomeNonWitness{StoreID: peer.GetStoreId(), PeerID: peer.GetId()}) + } + b.toNonWitness = newPeersMap() + + b.steps = append(b.steps, step) +} + // check if the peer is allowed to become the leader. func (b *Builder) allowLeader(peer *metapb.Peer, ignoreClusterLimit bool) bool { // these peer roles are not allowed to become leader. @@ -830,6 +929,7 @@ func (b *Builder) allowLeader(peer *metapb.Peer, ignoreClusterLimit bool) bool { // 7. demote voter. // 8. remove voter/learner. // 9. add voter/learner. +// 10. switch a witness learner to non-witness learner // Plan 1-5 (replace plans) do not change voter/learner count, so they have higher priority. type stepPlan struct { leaderBeforeAdd uint64 // leader before adding peer. @@ -838,15 +938,17 @@ type stepPlan struct { remove *metapb.Peer promote *metapb.Peer demote *metapb.Peer + witness *metapb.Peer + nonWitness *metapb.Peer } func (p stepPlan) String() string { - return fmt.Sprintf("stepPlan{leaderBeforeAdd=%v,add={%s},promote={%s},leaderBeforeRemove=%v,demote={%s},remove={%s}}", - p.leaderBeforeAdd, p.add, p.promote, p.leaderBeforeRemove, p.demote, p.remove) + return fmt.Sprintf("stepPlan{leaderBeforeAdd=%v,add={%s},promote={%s},leaderBeforeRemove=%v,demote={%s},remove={%s},witness={%s},nonWitness={%s}}", + p.leaderBeforeAdd, p.add, p.promote, p.leaderBeforeRemove, p.demote, p.remove, p.witness, p.nonWitness) } func (p stepPlan) IsEmpty() bool { - return p.promote == nil && p.demote == nil && p.add == nil && p.remove == nil + return p.promote == nil && p.demote == nil && p.add == nil && p.remove == nil && p.witness == nil && p.nonWitness == nil } func (b *Builder) peerPlan() stepPlan { @@ -867,6 +969,12 @@ func (b *Builder) peerPlan() stepPlan { if p := b.planAddPeer(); !p.IsEmpty() { return p } + if p := b.planWitness(); !p.IsEmpty() { + return p + } + if p := b.planNonWitness(); !p.IsEmpty() { + return p + } return stepPlan{} } @@ -989,6 +1097,22 @@ func (b *Builder) planRemovePeer() stepPlan { return best } +func (b *Builder) planWitness() stepPlan { + for _, i := range b.toWitness.IDs() { + peer := b.toWitness[i] + return stepPlan{witness: peer} + } + return stepPlan{} +} + +func (b *Builder) planNonWitness() stepPlan { + for _, i := range b.toNonWitness.IDs() { + peer := b.toNonWitness[i] + return stepPlan{nonWitness: peer} + } + return stepPlan{} +} + func (b *Builder) planAddPeer() stepPlan { var best stepPlan for _, i := range b.toAdd.IDs() { diff --git a/server/schedule/operator/create_operator.go b/server/schedule/operator/create_operator.go index a3f982481b92..206d839ab284 100644 --- a/server/schedule/operator/create_operator.go +++ b/server/schedule/operator/create_operator.go @@ -158,8 +158,9 @@ func CreateMergeRegionOperator(desc string, ci ClusterInformer, source *core.Reg peers := make(map[uint64]*metapb.Peer) for _, p := range target.GetPeers() { peers[p.GetStoreId()] = &metapb.Peer{ - StoreId: p.GetStoreId(), - Role: p.GetRole(), + StoreId: p.GetStoreId(), + Role: p.GetRole(), + IsWitness: p.GetIsWitness(), } } matchOp, err := NewBuilder("", ci, source). @@ -197,7 +198,7 @@ func isRegionMatch(a, b *core.RegionInfo) bool { } for _, pa := range a.GetPeers() { pb := b.GetStorePeer(pa.GetStoreId()) - if pb == nil || core.IsLearner(pb) != core.IsLearner(pa) { + if pb == nil || core.IsLearner(pb) != core.IsLearner(pa) || core.IsWitness(pb) != core.IsWitness(pa) { return false } } @@ -294,12 +295,14 @@ func CreateLeaveJointStateOperator(desc string, ci ClusterInformer, origin *core // CreateWitnessPeerOperator creates an operator that set a follower or learner peer with witness func CreateWitnessPeerOperator(desc string, ci ClusterInformer, region *core.RegionInfo, peer *metapb.Peer) (*Operator, error) { - brief := fmt.Sprintf("create witness: region %v peer %v on store %v", region.GetID(), peer.Id, peer.StoreId) - return NewOperator(desc, brief, region.GetID(), region.GetRegionEpoch(), OpRegion, region.GetApproximateSize(), BecomeWitness{StoreID: peer.StoreId, PeerID: peer.Id}), nil + return NewBuilder(desc, ci, region). + BecomeWitness(peer.GetStoreId()). + Build(0) } // CreateNonWitnessPeerOperator creates an operator that set a peer with non-witness func CreateNonWitnessPeerOperator(desc string, ci ClusterInformer, region *core.RegionInfo, peer *metapb.Peer) (*Operator, error) { - brief := fmt.Sprintf("promote to non-witness: region %v peer %v on store %v", region.GetID(), peer.Id, peer.StoreId) - return NewOperator(desc, brief, region.GetID(), region.GetRegionEpoch(), OpRegion, region.GetApproximateSize(), BecomeNonWitness{StoreID: peer.StoreId, PeerID: peer.Id}), nil + return NewBuilder(desc, ci, region). + BecomeNonWitness(peer.GetStoreId()). + Build(0) } diff --git a/server/schedule/operator/create_operator_test.go b/server/schedule/operator/create_operator_test.go index 55cac2c7004a..0ae0c7b0b94e 100644 --- a/server/schedule/operator/create_operator_test.go +++ b/server/schedule/operator/create_operator_test.go @@ -237,6 +237,56 @@ func (suite *createOperatorTestSuite) TestCreateMergeRegionOperator() { true, nil, }, + { + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter, IsWitness: true}, + }, + []*metapb.Peer{ + {Id: 4, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_Voter, IsWitness: true}, + }, + OpMerge | OpRegion, + false, + []OpStep{ + AddLearner{ToStore: 3}, + ChangePeerV2Enter{ + PromoteLearners: []PromoteLearner{{ToStore: 3}}, + DemoteVoters: []DemoteVoter{{ToStore: 2}}, + }, + ChangePeerV2Leave{ + PromoteLearners: []PromoteLearner{{ToStore: 3}}, + DemoteVoters: []DemoteVoter{{ToStore: 2}}, + }, + RemovePeer{FromStore: 2}, + }, + }, + { + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter, IsWitness: true}, + {Id: 3, StoreId: 3, Role: metapb.PeerRole_Voter}, + }, + []*metapb.Peer{ + {Id: 4, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 6, StoreId: 3, Role: metapb.PeerRole_Voter, IsWitness: true}, + {Id: 5, StoreId: 2, Role: metapb.PeerRole_Voter}, + }, + OpMerge | OpRegion, + false, + []OpStep{ + ChangePeerV2Enter{ + DemoteVoters: []DemoteVoter{{ToStore: 2, PeerID: 2, IsWitness: true}}, + }, + BatchSwitchWitness{ + ToWitnesses: []BecomeWitness{{PeerID: 3, StoreID: 3}}, + ToNonWitnesses: []BecomeNonWitness{{PeerID: 2, StoreID: 2}}, + }, + ChangePeerV2Enter{ + PromoteLearners: []PromoteLearner{{PeerID: 2, ToStore: 2, IsWitness: false}}, + }, + }, + }, } for _, testCase := range testCases { @@ -1138,3 +1188,64 @@ func TestCreateLeaveJointStateOperatorWithoutFitRules(t *testing.T) { re.Equal(uint64(4), step1.PromoteLearners[0].ToStore) re.Equal(uint64(3), step1.DemoteVoters[0].ToStore) } + +func (suite *createOperatorTestSuite) TestCreateNonWitnessPeerOperator() { + type testCase struct { + originPeers []*metapb.Peer // first is leader + kind OpKind + expectedError bool + prepareSteps []OpStep + } + testCases := []testCase{ + { + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner, IsWitness: true}, + }, + OpRegion, + false, + []OpStep{ + BecomeNonWitness{StoreID: 2, PeerID: 2}, + }, + }, + { + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter, IsWitness: true}, + }, + OpRegion, + false, + []OpStep{ + ChangePeerV2Enter{ + DemoteVoters: []DemoteVoter{{ToStore: 2, PeerID: 2, IsWitness: true}}, + }, + BecomeNonWitness{StoreID: 2, PeerID: 2}, + ChangePeerV2Enter{ + PromoteLearners: []PromoteLearner{{ToStore: 2, PeerID: 2, IsWitness: false}}, + }, + }, + }, + } + + for _, testCase := range testCases { + region := core.NewRegionInfo(&metapb.Region{Id: 68, Peers: testCase.originPeers}, testCase.originPeers[0]) + op, err := CreateNonWitnessPeerOperator("test", suite.cluster, region, testCase.originPeers[1]) + suite.NoError(err) + suite.NotNil(op) + suite.Equal(testCase.kind, op.kind) + + expectedSteps := testCase.prepareSteps + for i := 0; i < op.Len(); i++ { + switch step := op.Step(i).(type) { + case ChangePeerV2Enter: + suite.Len(step.DemoteVoters, len(expectedSteps[i].(ChangePeerV2Enter).DemoteVoters)) + for j, d := range expectedSteps[i].(ChangePeerV2Enter).DemoteVoters { + suite.Equal(d.ToStore, step.DemoteVoters[j].ToStore) + } + case BecomeNonWitness: + suite.Equal(step.StoreID, expectedSteps[i].(BecomeNonWitness).StoreID) + suite.Equal(step.PeerID, expectedSteps[i].(BecomeNonWitness).PeerID) + } + } + } +} diff --git a/server/schedule/operator/kind.go b/server/schedule/operator/kind.go index 6f291d1c5734..51d4085990ae 100644 --- a/server/schedule/operator/kind.go +++ b/server/schedule/operator/kind.go @@ -38,7 +38,7 @@ const ( OpSplit // Initiated by hot region scheduler. OpHotRegion - // Include peer addition or removal. This means that this operator may take a long time. + // Include peer addition or removal or switch witness. This means that this operator may take a long time. OpRegion // Include leader transfer. OpLeader diff --git a/server/schedule/operator/step.go b/server/schedule/operator/step.go index 8e3d177b0a2d..c6a3f28d2374 100644 --- a/server/schedule/operator/step.go +++ b/server/schedule/operator/step.go @@ -212,17 +212,17 @@ func (ap AddPeer) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb.Re // BecomeWitness is an OpStep that makes a peer become a witness. type BecomeWitness struct { - StoreID, PeerID uint64 + PeerID, StoreID uint64 } // ConfVerChanged returns the delta value for version increased by this step. func (bw BecomeWitness) ConfVerChanged(region *core.RegionInfo) uint64 { peer := region.GetStorePeer(bw.StoreID) - return typeutil.BoolToUint64(peer.GetId() == bw.PeerID) + return typeutil.BoolToUint64((peer.GetId() == bw.PeerID) && peer.GetIsWitness()) } func (bw BecomeWitness) String() string { - return fmt.Sprintf("change peer %v on store %v to witness", bw.PeerID, bw.StoreID) + return fmt.Sprintf("switch peer %v on store %v to witness", bw.PeerID, bw.StoreID) } // IsFinish checks if current step is finished. @@ -264,26 +264,23 @@ func (bw BecomeWitness) Timeout(regionSize int64) time.Duration { } // GetCmd returns the schedule command for heartbeat response. -func (bw BecomeWitness) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb.RegionHeartbeatResponse { - if core.IsLearner(region.GetStorePeer(bw.StoreID)) { - return createResponse(addLearnerNode(bw.PeerID, bw.StoreID, true), useConfChangeV2) - } - return createResponse(addNode(bw.PeerID, bw.StoreID, true), useConfChangeV2) +func (bw BecomeWitness) GetCmd(_ *core.RegionInfo, _ bool) *pdpb.RegionHeartbeatResponse { + return switchWitness(bw.PeerID, true) } // BecomeNonWitness is an OpStep that makes a peer become a non-witness. type BecomeNonWitness struct { - StoreID, PeerID uint64 + PeerID, StoreID uint64 } // ConfVerChanged returns the delta value for version increased by this step. func (bn BecomeNonWitness) ConfVerChanged(region *core.RegionInfo) uint64 { peer := region.GetStorePeer(bn.StoreID) - return typeutil.BoolToUint64(peer.GetId() == bn.PeerID) + return typeutil.BoolToUint64((peer.GetId() == bn.PeerID) && !peer.GetIsWitness() && (region.GetPendingPeer(peer.GetId()) == nil)) } func (bn BecomeNonWitness) String() string { - return fmt.Sprintf("change peer %v on store %v to non-witness", bn.PeerID, bn.StoreID) + return fmt.Sprintf("switch peer %v on store %v to non-witness", bn.PeerID, bn.StoreID) } // IsFinish checks if current step is finished. @@ -325,7 +322,102 @@ func (bn BecomeNonWitness) Timeout(regionSize int64) time.Duration { // GetCmd returns the schedule command for heartbeat response. func (bn BecomeNonWitness) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb.RegionHeartbeatResponse { - return createResponse(addLearnerNode(bn.PeerID, bn.StoreID, false), useConfChangeV2) + return switchWitness(bn.PeerID, false) +} + +// BatchSwitchWitness is an OpStep that batch switch witness. +type BatchSwitchWitness struct { + ToWitnesses []BecomeWitness + ToNonWitnesses []BecomeNonWitness +} + +func (bsw BatchSwitchWitness) String() string { + b := &strings.Builder{} + _, _ = b.WriteString("batch switch witness") + for _, w := range bsw.ToWitnesses { + _, _ = fmt.Fprintf(b, ", switch peer %v on store %v to witness", w.PeerID, w.StoreID) + } + for _, nw := range bsw.ToNonWitnesses { + _, _ = fmt.Fprintf(b, ", switch peer %v on store %v to non-witness", nw.PeerID, nw.StoreID) + } + return b.String() +} + +// ConfVerChanged returns the delta value for version increased by this step. +func (bsw BatchSwitchWitness) ConfVerChanged(region *core.RegionInfo) uint64 { + for _, w := range bsw.ToWitnesses { + if w.ConfVerChanged(region) == 0 { + return 0 + } + } + for _, nw := range bsw.ToNonWitnesses { + if nw.ConfVerChanged(region) == 0 { + return 0 + } + } + return uint64(len(bsw.ToWitnesses) + len(bsw.ToNonWitnesses)) +} + +// IsFinish checks if current step is finished. +func (bsw BatchSwitchWitness) IsFinish(region *core.RegionInfo) bool { + for _, w := range bsw.ToWitnesses { + if !w.IsFinish(region) { + return false + } + } + for _, nw := range bsw.ToNonWitnesses { + if !nw.IsFinish(region) { + return false + } + } + return true +} + +// CheckInProgress checks if the step is in the progress of advancing. +func (bsw BatchSwitchWitness) CheckInProgress(ci ClusterInformer, region *core.RegionInfo) error { + for _, w := range bsw.ToWitnesses { + if err := w.CheckInProgress(ci, region); err != nil { + return err + } + } + for _, nw := range bsw.ToNonWitnesses { + if err := nw.CheckInProgress(ci, region); err != nil { + return err + } + } + return nil +} + +// Influence calculates the store difference that current step makes. +func (bsw BatchSwitchWitness) Influence(opInfluence OpInfluence, region *core.RegionInfo) { + for _, w := range bsw.ToWitnesses { + w.Influence(opInfluence, region) + } + for _, nw := range bsw.ToNonWitnesses { + nw.Influence(opInfluence, region) + } +} + +// Timeout returns duration that current step may take. +func (bsw BatchSwitchWitness) Timeout(regionSize int64) time.Duration { + count := uint64(len(bsw.ToWitnesses)+len(bsw.ToNonWitnesses)) + 1 + return fastStepWaitDuration(regionSize) * time.Duration(count) +} + +// GetCmd returns the schedule command for heartbeat response. +func (bsw BatchSwitchWitness) GetCmd(region *core.RegionInfo, useConfChangeV2 bool) *pdpb.RegionHeartbeatResponse { + switches := make([]*pdpb.SwitchWitness, 0, len(bsw.ToWitnesses)+len(bsw.ToNonWitnesses)) + for _, w := range bsw.ToWitnesses { + switches = append(switches, w.GetCmd(region, useConfChangeV2).SwitchWitnesses.SwitchWitnesses...) + } + for _, nw := range bsw.ToNonWitnesses { + switches = append(switches, nw.GetCmd(region, useConfChangeV2).SwitchWitnesses.SwitchWitnesses...) + } + return &pdpb.RegionHeartbeatResponse{ + SwitchWitnesses: &pdpb.BatchSwitchWitness{ + SwitchWitnesses: switches, + }, + } } // AddLearner is an OpStep that adds a region learner peer. @@ -646,11 +738,7 @@ type DemoteVoter struct { } func (dv DemoteVoter) String() string { - info := "non-witness" - if dv.IsWitness { - info = "witness" - } - return fmt.Sprintf("demote voter peer %v on store %v to %v learner", dv.PeerID, dv.ToStore, info) + return fmt.Sprintf("demote voter peer %v on store %v to learner", dv.PeerID, dv.ToStore) } // ConfVerChanged returns the delta value for version increased by this step. @@ -667,9 +755,6 @@ func (dv DemoteVoter) IsFinish(region *core.RegionInfo) bool { log.Warn("obtain unexpected peer", zap.String("expect", dv.String()), zap.Uint64("obtain-learner", peer.GetId())) return false } - if peer.IsWitness != dv.IsWitness { - return false - } return region.GetPendingLearner(peer.GetId()) == nil } return false @@ -1017,3 +1102,11 @@ func createResponse(change *pdpb.ChangePeer, useConfChangeV2 bool) *pdpb.RegionH ChangePeer: change, } } + +func switchWitness(peerID uint64, isWitness bool) *pdpb.RegionHeartbeatResponse { + return &pdpb.RegionHeartbeatResponse{ + SwitchWitnesses: &pdpb.BatchSwitchWitness{ + SwitchWitnesses: []*pdpb.SwitchWitness{{PeerId: peerID, IsWitness: isWitness}}, + }, + } +} diff --git a/server/schedule/operator/step_test.go b/server/schedule/operator/step_test.go index 983723815a14..5bf097fa0867 100644 --- a/server/schedule/operator/step_test.go +++ b/server/schedule/operator/step_test.go @@ -526,6 +526,40 @@ func (suite *operatorStepTestSuite) TestChangePeerV2Leave() { suite.check(cpl, desc, testCases) } +func (suite *operatorStepTestSuite) TestSwitchToWitness() { + step := BecomeWitness{StoreID: 2, PeerID: 2} + testCases := []testCase{ + { + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}, + }, + 0, + false, + suite.NoError, + }, + { + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, + }, + 0, + false, + suite.NoError, + }, + { + []*metapb.Peer{ + {Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter, IsWitness: true}, + }, + 1, + true, + suite.NoError, + }, + } + suite.check(step, "switch peer 2 on store 2 to witness", testCases) +} + func (suite *operatorStepTestSuite) check(step OpStep, desc string, testCases []testCase) { suite.Equal(desc, step.String()) for _, testCase := range testCases { diff --git a/tests/client/go.mod b/tests/client/go.mod index 70644bda3caf..dc856461c7f2 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -5,7 +5,7 @@ go 1.18 require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 + github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 github.com/stretchr/testify v1.7.0 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 diff --git a/tests/client/go.sum b/tests/client/go.sum index 914456539682..c33df7a43ffa 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -390,8 +390,9 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= github.com/pingcap/kvproto v0.0.0-20200411081810-b85805c9476c/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI= -github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 h1:iJXUNA0LoOYuuMJ6U0tJGg2gCo/8xGZVhKLvuUWNjzw= +github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= From 5ade4aa6a0abdbb7091d4b91a36993702646eaff Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 24 Nov 2022 16:19:58 +0800 Subject: [PATCH 60/67] tests: speed up test by reducing sleep time (#5727) ref tikv/pd#4399, ref tikv/pd#5721 Signed-off-by: lhy1024 Co-authored-by: Ti Chi Robot --- pkg/cache/cache_test.go | 18 ++-- pkg/mock/mockcluster/mockcluster.go | 8 +- server/core/region_option.go | 5 +- server/core/region_test.go | 4 +- server/statistics/hot_peer_cache_test.go | 4 +- tests/pdctl/hot/hot_test.go | 51 ++++++--- tests/pdctl/store/store_test.go | 4 +- .../server/storage/hot_region_storage_test.go | 100 ++++++++++-------- 8 files changed, 111 insertions(+), 83 deletions(-) diff --git a/pkg/cache/cache_test.go b/pkg/cache/cache_test.go index 1c4bc8c58cfe..37ddd7fad64f 100644 --- a/pkg/cache/cache_test.go +++ b/pkg/cache/cache_test.go @@ -28,10 +28,10 @@ func TestExpireRegionCache(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - cache := NewIDTTL(ctx, time.Second, 2*time.Second) + cache := NewIDTTL(ctx, 10*time.Millisecond, 20*time.Millisecond) // Test Pop - cache.PutWithTTL(9, "9", 5*time.Second) - cache.PutWithTTL(10, "10", 5*time.Second) + cache.PutWithTTL(9, "9", 50*time.Millisecond) + cache.PutWithTTL(10, "10", 50*time.Millisecond) re.Equal(2, cache.Len()) k, v, success := cache.pop() re.True(success) @@ -50,17 +50,17 @@ func TestExpireRegionCache(t *testing.T) { re.True(ok) re.Equal(expV, v2.(string)) - cache.PutWithTTL(11, "11", time.Second) - time.Sleep(5 * time.Second) + cache.PutWithTTL(11, "11", 10*time.Millisecond) + time.Sleep(50 * time.Millisecond) k, v, success = cache.pop() re.False(success) re.Nil(k) re.Nil(v) // Test Get - cache.PutWithTTL(1, 1, time.Second) - cache.PutWithTTL(2, "v2", 5*time.Second) - cache.PutWithTTL(3, 3.0, 5*time.Second) + cache.PutWithTTL(1, 1, 10*time.Millisecond) + cache.PutWithTTL(2, "v2", 50*time.Millisecond) + cache.PutWithTTL(3, 3.0, 50*time.Millisecond) value, ok := cache.Get(1) re.True(ok) @@ -78,7 +78,7 @@ func TestExpireRegionCache(t *testing.T) { re.Equal(sortIDs(cache.GetAllID()), []uint64{1, 2, 3}) - time.Sleep(2 * time.Second) + time.Sleep(20 * time.Millisecond) value, ok = cache.Get(1) re.False(ok) diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index 8b38fa50ae6f..a014a9289419 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -392,7 +392,7 @@ func (mc *Cluster) AddRegionWithReadInfo( r := mc.newMockRegionInfo(regionID, leaderStoreID, otherPeerStoreIDs...) r = r.Clone(core.SetReadBytes(readBytes)) r = r.Clone(core.SetReadKeys(readKeys)) - r = r.Clone(core.SetReportInterval(reportInterval)) + r = r.Clone(core.SetReportInterval(0, reportInterval)) r = r.Clone(core.SetReadQuery(readQuery)) filledNum := mc.HotCache.GetFilledPeriod(statistics.Read) if len(filledNums) > 0 { @@ -414,7 +414,7 @@ func (mc *Cluster) AddRegionWithReadInfo( func (mc *Cluster) AddRegionWithPeerReadInfo(regionID, leaderStoreID, targetStoreID, readBytes, readKeys, reportInterval uint64, otherPeerStoreIDs []uint64, filledNums ...int) []*statistics.HotPeerStat { r := mc.newMockRegionInfo(regionID, leaderStoreID, otherPeerStoreIDs...) - r = r.Clone(core.SetReadBytes(readBytes), core.SetReadKeys(readKeys), core.SetReportInterval(reportInterval)) + r = r.Clone(core.SetReadBytes(readBytes), core.SetReadKeys(readKeys), core.SetReportInterval(0, reportInterval)) filledNum := mc.HotCache.GetFilledPeriod(statistics.Read) if len(filledNums) > 0 { filledNum = filledNums[0] @@ -442,7 +442,7 @@ func (mc *Cluster) AddRegionLeaderWithReadInfo( r = r.Clone(core.SetReadBytes(readBytes)) r = r.Clone(core.SetReadKeys(readKeys)) r = r.Clone(core.SetReadQuery(readQuery)) - r = r.Clone(core.SetReportInterval(reportInterval)) + r = r.Clone(core.SetReportInterval(0, reportInterval)) filledNum := mc.HotCache.GetFilledPeriod(statistics.Read) if len(filledNums) > 0 { filledNum = filledNums[0] @@ -468,7 +468,7 @@ func (mc *Cluster) AddLeaderRegionWithWriteInfo( r := mc.newMockRegionInfo(regionID, leaderStoreID, otherPeerStoreIDs...) r = r.Clone(core.SetWrittenBytes(writtenBytes)) r = r.Clone(core.SetWrittenKeys(writtenKeys)) - r = r.Clone(core.SetReportInterval(reportInterval)) + r = r.Clone(core.SetReportInterval(0, reportInterval)) r = r.Clone(core.SetWrittenQuery(writtenQuery)) filledNum := mc.HotCache.GetFilledPeriod(statistics.Write) diff --git a/server/core/region_option.go b/server/core/region_option.go index b405dabab337..4fa3de028625 100644 --- a/server/core/region_option.go +++ b/server/core/region_option.go @@ -278,9 +278,10 @@ func SetApproximateKeys(v int64) RegionCreateOption { } // SetReportInterval sets the report interval for the region. -func SetReportInterval(v uint64) RegionCreateOption { +// This func is only used for test. +func SetReportInterval(start, end uint64) RegionCreateOption { return func(region *RegionInfo) { - region.interval = &pdpb.TimeInterval{StartTimestamp: 0, EndTimestamp: v} + region.interval = &pdpb.TimeInterval{StartTimestamp: start, EndTimestamp: end} } } diff --git a/server/core/region_test.go b/server/core/region_test.go index 6db10a6bd04c..23acca7705c3 100644 --- a/server/core/region_test.go +++ b/server/core/region_test.go @@ -281,7 +281,7 @@ func TestRegionWriteRate(t *testing.T) { {10, 3, 500, 0, 0}, } for _, testCase := range testCases { - r := NewRegionInfo(&metapb.Region{Id: 100}, nil, SetWrittenBytes(testCase.bytes), SetWrittenKeys(testCase.keys), SetReportInterval(testCase.interval)) + r := NewRegionInfo(&metapb.Region{Id: 100}, nil, SetWrittenBytes(testCase.bytes), SetWrittenKeys(testCase.keys), SetReportInterval(0, testCase.interval)) bytesRate, keysRate := r.GetWriteRate() re.Equal(testCase.expectBytesRate, bytesRate) re.Equal(testCase.expectKeysRate, keysRate) @@ -518,7 +518,7 @@ func TestSetRegion(t *testing.T) { SetApproximateSize(30), SetWrittenBytes(40), SetWrittenKeys(10), - SetReportInterval(5)) + SetReportInterval(0, 5)) regions.SetRegion(region) checkRegions(re, regions) re.Equal(96, regions.tree.length()) diff --git a/server/statistics/hot_peer_cache_test.go b/server/statistics/hot_peer_cache_test.go index 9baf8c9a3005..77f9f01d1343 100644 --- a/server/statistics/hot_peer_cache_test.go +++ b/server/statistics/hot_peer_cache_test.go @@ -271,7 +271,7 @@ func buildRegion(kind RWType, peerCount int, interval uint64) *core.RegionInfo { return core.NewRegionInfo( meta, leader, - core.SetReportInterval(interval), + core.SetReportInterval(0, interval), core.SetReadBytes(10*units.MiB*interval), core.SetReadKeys(10*units.MiB*interval), core.SetReadQuery(1024*interval), @@ -280,7 +280,7 @@ func buildRegion(kind RWType, peerCount int, interval uint64) *core.RegionInfo { return core.NewRegionInfo( meta, leader, - core.SetReportInterval(interval), + core.SetReportInterval(0, interval), core.SetWrittenBytes(10*units.MiB*interval), core.SetWrittenKeys(10*units.MiB*interval), core.SetWrittenQuery(1024*interval), diff --git a/tests/pdctl/hot/hot_test.go b/tests/pdctl/hot/hot_test.go index 7bab989ccc50..dcd917c5fddf 100644 --- a/tests/pdctl/hot/hot_test.go +++ b/tests/pdctl/hot/hot_test.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/testutil" "github.com/tikv/pd/pkg/typeutil" "github.com/tikv/pd/server/api" "github.com/tikv/pd/server/config" @@ -148,7 +149,10 @@ func TestHot(t *testing.T) { Id: hotRegionID, }, leader) rc.GetHotStat().CheckReadAsync(statistics.NewCheckPeerTask(peerInfo, region)) - time.Sleep(5000 * time.Millisecond) + testutil.Eventually(re, func() bool { + hotPeerStat := rc.GetHotPeerStat(statistics.Read, hotRegionID, hotStoreID) + return hotPeerStat != nil + }) if reportInterval >= statistics.ReadReportInterval { count++ } @@ -158,8 +162,11 @@ func TestHot(t *testing.T) { re, cluster, hotRegionID, hotStoreID, []byte("c"), []byte("d"), - core.SetWrittenBytes(1000000000*reportInterval), core.SetReportInterval(reportInterval)) - time.Sleep(5000 * time.Millisecond) + core.SetWrittenBytes(1000000000*reportInterval), core.SetReportInterval(0, reportInterval)) + testutil.Eventually(re, func() bool { + hotPeerStat := rc.GetHotPeerStat(statistics.Write, hotRegionID, hotStoreID) + return hotPeerStat != nil + }) if reportInterval >= statistics.WriteReportInterval { count++ } @@ -221,11 +228,16 @@ func TestHotWithStoreID(t *testing.T) { } defer cluster.Destroy() - pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(statistics.WriteReportInterval)) - pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), core.SetReportInterval(statistics.WriteReportInterval)) - pdctl.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f"), core.SetWrittenBytes(9000000000), core.SetReportInterval(statistics.WriteReportInterval)) + pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(0, statistics.WriteReportInterval)) + pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), core.SetReportInterval(0, statistics.WriteReportInterval)) + pdctl.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f"), core.SetWrittenBytes(9000000000), core.SetReportInterval(0, statistics.WriteReportInterval)) // wait hot scheduler starts - time.Sleep(5000 * time.Millisecond) + rc := leaderServer.GetRaftCluster() + testutil.Eventually(re, func() bool { + return rc.GetHotPeerStat(statistics.Write, 1, 1) != nil && + rc.GetHotPeerStat(statistics.Write, 2, 2) != nil && + rc.GetHotPeerStat(statistics.Write, 3, 1) != nil + }) args := []string{"-u", pdAddr, "hot", "write", "1"} output, err := pdctl.ExecuteCommand(cmd, args...) hotRegion := statistics.StoreHotPeersInfos{} @@ -290,15 +302,24 @@ func TestHistoryHotRegions(t *testing.T) { pdctl.MustPutStore(re, leaderServer.GetServer(), store) } defer cluster.Destroy() - startTime := time.Now().UnixNano() / int64(time.Millisecond) - pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(statistics.WriteReportInterval)) - pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), core.SetReportInterval(statistics.WriteReportInterval)) - pdctl.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f"), core.SetWrittenBytes(9000000000), core.SetReportInterval(statistics.WriteReportInterval)) - pdctl.MustPutRegion(re, cluster, 4, 3, []byte("g"), []byte("h"), core.SetWrittenBytes(9000000000), core.SetReportInterval(statistics.WriteReportInterval)) + startTime := time.Now().Second() + pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) + pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) + pdctl.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f"), core.SetWrittenBytes(9000000000), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) + pdctl.MustPutRegion(re, cluster, 4, 3, []byte("g"), []byte("h"), core.SetWrittenBytes(9000000000), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) // wait hot scheduler starts - time.Sleep(5000 * time.Millisecond) + testutil.Eventually(re, func() bool { + hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() + iter := hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, int64(startTime*1000), time.Now().UnixNano()/int64(time.Millisecond)) + next, err := iter.Next() + return err == nil && next != nil + }) endTime := time.Now().UnixNano() / int64(time.Millisecond) - start := strconv.FormatInt(startTime, 10) + start := strconv.FormatInt(int64(startTime*1000), 10) end := strconv.FormatInt(endTime, 10) args := []string{"-u", pdAddr, "hot", "history", start, end, @@ -413,8 +434,6 @@ func TestHotWithoutHotPeer(t *testing.T) { } defer cluster.Destroy() - // wait hot scheduler starts - time.Sleep(5000 * time.Millisecond) { args := []string{"-u", pdAddr, "hot", "read"} output, err := pdctl.ExecuteCommand(cmd, args...) diff --git a/tests/pdctl/store/store_test.go b/tests/pdctl/store/store_test.go index 0ccc5f88f6a8..590333d4f5fb 100644 --- a/tests/pdctl/store/store_test.go +++ b/tests/pdctl/store/store_test.go @@ -513,8 +513,8 @@ func TestTombstoneStore(t *testing.T) { pdctl.MustPutStore(re, leaderServer.GetServer(), store.Store.Store) } defer cluster.Destroy() - pdctl.MustPutRegion(re, cluster, 1, 2, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(statistics.WriteReportInterval)) - pdctl.MustPutRegion(re, cluster, 2, 3, []byte("b"), []byte("c"), core.SetWrittenBytes(3000000000), core.SetReportInterval(statistics.WriteReportInterval)) + pdctl.MustPutRegion(re, cluster, 1, 2, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(0, statistics.WriteReportInterval)) + pdctl.MustPutRegion(re, cluster, 2, 3, []byte("b"), []byte("c"), core.SetWrittenBytes(3000000000), core.SetReportInterval(0, statistics.WriteReportInterval)) // store remove-tombstone args := []string{"-u", pdAddr, "store", "remove-tombstone"} output, err := pdctl.ExecuteCommand(cmd, args...) diff --git a/tests/server/storage/hot_region_storage_test.go b/tests/server/storage/hot_region_storage_test.go index 39fe345a91b9..da19c568f7b9 100644 --- a/tests/server/storage/hot_region_storage_test.go +++ b/tests/server/storage/hot_region_storage_test.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/testutil" "github.com/tikv/pd/server/config" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/statistics" @@ -65,15 +66,19 @@ func TestHotRegionStorage(t *testing.T) { pdctl.MustPutStore(re, leaderServer.GetServer(), store) } defer cluster.Destroy() - startTime := time.Now().UnixNano() / int64(time.Millisecond) - pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(statistics.WriteReportInterval)) - pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), core.SetReportInterval(statistics.WriteReportInterval)) - pdctl.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f")) - pdctl.MustPutRegion(re, cluster, 4, 2, []byte("g"), []byte("h")) + startTime := time.Now().Second() + pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) + pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) + pdctl.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f"), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) + pdctl.MustPutRegion(re, cluster, 4, 2, []byte("g"), []byte("h"), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) storeStats := []*pdpb.StoreStats{ { StoreId: 1, - Interval: &pdpb.TimeInterval{StartTimestamp: 0, EndTimestamp: statistics.ReadReportInterval}, + Interval: &pdpb.TimeInterval{StartTimestamp: uint64(startTime - statistics.StoreHeartBeatReportInterval), EndTimestamp: uint64(startTime)}, PeerStats: []*pdpb.PeerStat{ { RegionId: 3, @@ -83,7 +88,7 @@ func TestHotRegionStorage(t *testing.T) { }, { StoreId: 2, - Interval: &pdpb.TimeInterval{StartTimestamp: 0, EndTimestamp: statistics.ReadReportInterval}, + Interval: &pdpb.TimeInterval{StartTimestamp: uint64(startTime - statistics.StoreHeartBeatReportInterval), EndTimestamp: uint64(startTime)}, PeerStats: []*pdpb.PeerStat{ { RegionId: 4, @@ -95,14 +100,14 @@ func TestHotRegionStorage(t *testing.T) { for _, storeStats := range storeStats { leaderServer.GetRaftCluster().HandleStoreHeartbeat(&pdpb.StoreHeartbeatRequest{Stats: storeStats}, &pdpb.StoreHeartbeatResponse{}) } - // wait hot scheduler starts - time.Sleep(5000 * time.Millisecond) - endTime := time.Now().UnixNano() / int64(time.Millisecond) + var iter storage.HotRegionStorageIterator + var next *storage.HistoryHotRegion hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() - iter := hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, startTime, endTime) - next, err := iter.Next() - re.NoError(err) - re.NotNil(next) + testutil.Eventually(re, func() bool { // wait for the history hot region to be written to the storage + iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, int64(startTime*1000), time.Now().UnixNano()/int64(time.Millisecond)) + next, err = iter.Next() + return err == nil && next != nil + }) re.Equal(uint64(1), next.RegionID) re.Equal(uint64(1), next.StoreID) re.Equal(storage.WriteType.String(), next.HotRegionType) @@ -115,10 +120,8 @@ func TestHotRegionStorage(t *testing.T) { next, err = iter.Next() re.NoError(err) re.Nil(next) - iter = hotRegionStorage.NewIterator([]string{storage.ReadType.String()}, startTime, endTime) + iter = hotRegionStorage.NewIterator([]string{storage.ReadType.String()}, int64(startTime*1000), time.Now().UnixNano()/int64(time.Millisecond)) next, err = iter.Next() - re.NoError(err) - re.NotNil(next) re.Equal(uint64(3), next.RegionID) re.Equal(uint64(1), next.StoreID) re.Equal(storage.ReadType.String(), next.HotRegionType) @@ -169,16 +172,17 @@ func TestHotRegionStorageReservedDayConfigChange(t *testing.T) { pdctl.MustPutStore(re, leaderServer.GetServer(), store) } defer cluster.Destroy() - startTime := time.Now().UnixNano() / int64(time.Millisecond) - pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(statistics.WriteReportInterval)) - // wait hot scheduler starts - time.Sleep(5000 * time.Millisecond) - endTime := time.Now().UnixNano() / int64(time.Millisecond) - hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() - iter := hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, startTime, endTime) - next, err := iter.Next() - re.NoError(err) - re.NotNil(next) + startTime := time.Now().Second() + pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), + core.SetReportInterval(uint64(startTime-statistics.RegionHeartBeatReportInterval), uint64(startTime))) + var iter storage.HotRegionStorageIterator + var next *storage.HistoryHotRegion + testutil.Eventually(re, func() bool { // wait for the history hot region to be written to the storage + hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() + iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, int64(startTime*1000), time.Now().UnixNano()/int64(time.Millisecond)) + next, err = iter.Next() + return err == nil && next != nil + }) re.Equal(uint64(1), next.RegionID) re.Equal(uint64(1), next.StoreID) re.Equal(storage.WriteType.String(), next.HotRegionType) @@ -190,11 +194,12 @@ func TestHotRegionStorageReservedDayConfigChange(t *testing.T) { schedule.HotRegionsReservedDays = 0 leaderServer.GetServer().SetScheduleConfig(schedule) time.Sleep(3 * interval) - pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), core.SetReportInterval(statistics.WriteReportInterval)) + pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), + core.SetReportInterval(uint64(time.Now().Second()-statistics.WriteReportInterval), uint64(time.Now().Second()))) time.Sleep(10 * interval) - endTime = time.Now().UnixNano() / int64(time.Millisecond) - hotRegionStorage = leaderServer.GetServer().GetHistoryHotRegionStorage() - iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, startTime, endTime) + endTime := time.Now().UnixNano() / int64(time.Millisecond) + hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() + iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, int64(startTime*1000), endTime) next, err = iter.Next() re.NoError(err) re.NotNil(next) @@ -210,7 +215,7 @@ func TestHotRegionStorageReservedDayConfigChange(t *testing.T) { time.Sleep(3 * interval) endTime = time.Now().UnixNano() / int64(time.Millisecond) hotRegionStorage = leaderServer.GetServer().GetHistoryHotRegionStorage() - iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, startTime, endTime) + iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, int64(startTime*1000), endTime) next, err = iter.Next() re.NoError(err) re.NotNil(next) @@ -261,16 +266,18 @@ func TestHotRegionStorageWriteIntervalConfigChange(t *testing.T) { pdctl.MustPutStore(re, leaderServer.GetServer(), store) } defer cluster.Destroy() - startTime := time.Now().UnixNano() / int64(time.Millisecond) - pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(statistics.WriteReportInterval)) - // wait hot scheduler starts - time.Sleep(5000 * time.Millisecond) - endTime := time.Now().UnixNano() / int64(time.Millisecond) - hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() - iter := hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, startTime, endTime) - next, err := iter.Next() - re.NoError(err) - re.NotNil(next) + startTime := time.Now().Second() + pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), + core.SetWrittenBytes(3000000000), + core.SetReportInterval(uint64(startTime-statistics.WriteReportInterval), uint64(startTime))) + var iter storage.HotRegionStorageIterator + var next *storage.HistoryHotRegion + testutil.Eventually(re, func() bool { // wait for the history hot region to be written to the storage + hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() + iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, int64(startTime*1000), time.Now().UnixNano()/int64(time.Millisecond)) + next, err = iter.Next() + return err == nil && next != nil + }) re.Equal(uint64(1), next.RegionID) re.Equal(uint64(1), next.StoreID) re.Equal(storage.WriteType.String(), next.HotRegionType) @@ -282,12 +289,13 @@ func TestHotRegionStorageWriteIntervalConfigChange(t *testing.T) { schedule.HotRegionsWriteInterval.Duration = 20 * interval leaderServer.GetServer().SetScheduleConfig(schedule) time.Sleep(3 * interval) - pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), core.SetReportInterval(statistics.WriteReportInterval)) + pdctl.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), + core.SetReportInterval(uint64(time.Now().Second()-statistics.WriteReportInterval), uint64(time.Now().Second()))) time.Sleep(10 * interval) - endTime = time.Now().UnixNano() / int64(time.Millisecond) + endTime := time.Now().UnixNano() / int64(time.Millisecond) // it cant get new hot region because wait time smaller than hot region write interval - hotRegionStorage = leaderServer.GetServer().GetHistoryHotRegionStorage() - iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, startTime, endTime) + hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() + iter = hotRegionStorage.NewIterator([]string{storage.WriteType.String()}, int64(startTime*1000), endTime) next, err = iter.Next() re.NoError(err) re.NotNil(next) From 3606b6827f5bdb16a7c5126a372bdf3d98131d48 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 25 Nov 2022 11:05:58 +0800 Subject: [PATCH 61/67] statistics: remove `Kind` from hot peer stat (#5729) ref tikv/pd#5692 Signed-off-by: lhy1024 Co-authored-by: Ti Chi Robot --- pkg/mock/mockcluster/mockcluster.go | 8 ++--- server/cluster/coordinator_test.go | 3 +- server/schedulers/hot_region.go | 2 +- server/statistics/hot_cache.go | 4 +-- server/statistics/hot_peer.go | 21 ++--------- server/statistics/hot_peer_cache.go | 44 +++++++++++++----------- server/statistics/hot_peer_cache_test.go | 5 ++- server/statistics/kind.go | 32 +++++++++++++---- 8 files changed, 61 insertions(+), 58 deletions(-) diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index a014a9289419..d281d287eb1b 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -403,7 +403,7 @@ func (mc *Cluster) AddRegionWithReadInfo( for i := 0; i < filledNum; i++ { items = mc.CheckRegionRead(r) for _, item := range items { - mc.HotCache.Update(item) + mc.HotCache.Update(item, statistics.Read) } } mc.PutRegion(r) @@ -424,7 +424,7 @@ func (mc *Cluster) AddRegionWithPeerReadInfo(regionID, leaderStoreID, targetStor items = mc.CheckRegionRead(r) for _, item := range items { if item.StoreID == targetStoreID { - mc.HotCache.Update(item) + mc.HotCache.Update(item, statistics.Read) } } } @@ -452,7 +452,7 @@ func (mc *Cluster) AddRegionLeaderWithReadInfo( for i := 0; i < filledNum; i++ { items = mc.CheckRegionLeaderRead(r) for _, item := range items { - mc.HotCache.Update(item) + mc.HotCache.Update(item, statistics.Read) } } mc.PutRegion(r) @@ -480,7 +480,7 @@ func (mc *Cluster) AddLeaderRegionWithWriteInfo( for i := 0; i < filledNum; i++ { items = mc.CheckRegionWrite(r) for _, item := range items { - mc.HotCache.Update(item) + mc.HotCache.Update(item, statistics.Write) } } mc.PutRegion(r) diff --git a/server/cluster/coordinator_test.go b/server/cluster/coordinator_test.go index df4bac0f8c5d..24ca29c8094d 100644 --- a/server/cluster/coordinator_test.go +++ b/server/cluster/coordinator_test.go @@ -282,12 +282,11 @@ func TestCollectMetrics(t *testing.T) { item := &statistics.HotPeerStat{ StoreID: uint64(i % 5), RegionID: uint64(i*1000 + k), - Kind: statistics.Write, Loads: []float64{10, 20, 30}, HotDegree: 10, AntiCount: statistics.HotRegionAntiCount, // for write } - tc.hotStat.HotCache.Update(item) + tc.hotStat.HotCache.Update(item, statistics.Write) } } for i := 0; i < 1000; i++ { diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index 2b5d99db6ee4..8e3881bece71 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -762,7 +762,7 @@ func (bs *balanceSolver) checkSrcByPriorityAndTolerance(minLoad, expectLoad *sta // The returned hotPeer count in controlled by `max-peer-number`. func (bs *balanceSolver) filterHotPeers(storeLoad *statistics.StoreLoadDetail) (ret []*statistics.HotPeerStat) { appendItem := func(item *statistics.HotPeerStat) { - if _, ok := bs.sche.regionPendings[item.ID()]; !ok && !item.IsNeedCoolDownTransferLeader(bs.minHotDegree) { + if _, ok := bs.sche.regionPendings[item.ID()]; !ok && !item.IsNeedCoolDownTransferLeader(bs.minHotDegree, bs.rwTy) { // no in pending operator and no need cool down after transfer leader ret = append(ret, item) } diff --git a/server/statistics/hot_cache.go b/server/statistics/hot_cache.go index 3dc182ef8b4b..cd8bc0148b89 100644 --- a/server/statistics/hot_cache.go +++ b/server/statistics/hot_cache.go @@ -157,8 +157,8 @@ func (w *HotCache) runWriteTask(task FlowItemTask) { // Update updates the cache. // This is used for mockcluster, for test purpose. -func (w *HotCache) Update(item *HotPeerStat) { - switch item.Kind { +func (w *HotCache) Update(item *HotPeerStat, kind RWType) { + switch kind { case Write: w.writeCache.updateStat(item) case Read: diff --git a/server/statistics/hot_peer.go b/server/statistics/hot_peer.go index 3de9dbd48304..42881e8ec613 100644 --- a/server/statistics/hot_peer.go +++ b/server/statistics/hot_peer.go @@ -80,8 +80,6 @@ type HotPeerStat struct { HotDegree int `json:"hot_degree"` // AntiCount used to eliminate some noise when remove region in cache AntiCount int `json:"anti_count"` - - Kind RWType `json:"-"` // Loads contains only Kind-related statistics and is DimLen in length. Loads []float64 `json:"loads"` @@ -126,7 +124,6 @@ func (stat *HotPeerStat) Log(str string, level func(msg string, fields ...zap.Fi zap.Uint64("store", stat.StoreID), zap.Bool("is-leader", stat.isLeader), zap.Bool("is-learner", stat.isLearner), - zap.String("type", stat.Kind.String()), zap.Float64s("loads", stat.GetLoads()), zap.Float64s("loads-instant", stat.Loads), zap.Float64s("thresholds", stat.thresholds), @@ -140,8 +137,8 @@ func (stat *HotPeerStat) Log(str string, level func(msg string, fields ...zap.Fi } // IsNeedCoolDownTransferLeader use cooldown time after transfer leader to avoid unnecessary schedule -func (stat *HotPeerStat) IsNeedCoolDownTransferLeader(minHotDegree int) bool { - return time.Since(stat.lastTransferLeaderTime).Seconds() < float64(minHotDegree*stat.hotStatReportInterval()) +func (stat *HotPeerStat) IsNeedCoolDownTransferLeader(minHotDegree int, rwTy RWType) bool { + return time.Since(stat.lastTransferLeaderTime).Seconds() < float64(minHotDegree*rwTy.ReportInterval()) } // IsLeader indicates the item belong to the leader. @@ -203,13 +200,6 @@ func (stat *HotPeerStat) clearLastAverage() { } } -func (stat *HotPeerStat) hotStatReportInterval() int { - if stat.Kind == Read { - return ReadReportInterval - } - return WriteReportInterval -} - func (stat *HotPeerStat) getIntervalSum() time.Duration { if len(stat.rollingLoads) == 0 || stat.rollingLoads[0] == nil { return 0 @@ -230,10 +220,3 @@ func (stat *HotPeerStat) GetStores() []uint64 { func (stat *HotPeerStat) IsLearner() bool { return stat.isLearner } - -func (stat *HotPeerStat) defaultAntiCount() int { - if stat.Kind == Read { - return HotRegionAntiCount * (RegionHeartBeatReportInterval / StoreHeartBeatReportInterval) - } - return HotRegionAntiCount -} diff --git a/server/statistics/hot_peer_cache.go b/server/statistics/hot_peer_cache.go index 2df29a3d7908..64d1bb5d3778 100644 --- a/server/statistics/hot_peer_cache.go +++ b/server/statistics/hot_peer_cache.go @@ -97,7 +97,7 @@ func (f *hotPeerCache) RegionStats(minHotDegree int) map[uint64][]*HotPeerStat { values := peers.GetAll() stat := make([]*HotPeerStat, 0, len(values)) for _, v := range values { - if peer := v.(*HotPeerStat); peer.HotDegree >= minHotDegree && !peer.inCold && peer.AntiCount == peer.defaultAntiCount() { + if peer := v.(*HotPeerStat); peer.HotDegree >= minHotDegree && !peer.inCold && peer.AntiCount == f.kind.DefaultAntiCount() { stat = append(stat, peer) } } @@ -111,12 +111,12 @@ func (f *hotPeerCache) updateStat(item *HotPeerStat) { case Remove: f.removeItem(item) item.Log("region heartbeat remove from cache", log.Debug) - incMetrics("remove_item", item.StoreID, item.Kind) + incMetrics("remove_item", item.StoreID, f.kind) return case Add: - incMetrics("add_item", item.StoreID, item.Kind) + incMetrics("add_item", item.StoreID, f.kind) case Update: - incMetrics("update_item", item.StoreID, item.Kind) + incMetrics("update_item", item.StoreID, f.kind) } // for add and update f.putItem(item) @@ -204,7 +204,6 @@ func (f *hotPeerCache) checkPeerFlow(peer *core.PeerInfo, region *core.RegionInf newItem := &HotPeerStat{ StoreID: storeID, RegionID: regionID, - Kind: f.kind, Loads: f.kind.GetLoadRatesFromPeer(peer), LastUpdateTime: time.Now(), isLeader: region.GetLeader().GetStoreId() == storeID, @@ -247,7 +246,6 @@ func (f *hotPeerCache) checkColdPeer(storeID uint64, reportRegions map[uint64]*c newItem := &HotPeerStat{ StoreID: storeID, RegionID: regionID, - Kind: f.kind, // use 0 to make the cold newItem won't affect the loads. Loads: make([]float64, len(oldItem.Loads)), LastUpdateTime: time.Now(), @@ -423,8 +421,8 @@ func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldIt // maintain anticount and hotdegree to avoid store threshold and hot peer are unstable. // For write stat, as the stat is send by region heartbeat, the first heartbeat will be skipped. // For read stat, as the stat is send by store heartbeat, the first heartbeat won't be skipped. - if newItem.Kind == Write { - inheritItem(newItem, oldItem) + if f.kind == Write { + f.inheritItem(newItem, oldItem) return newItem } } else { @@ -438,24 +436,24 @@ func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldIt isFull := newItem.rollingLoads[0].isFull() // The intervals of dims are the same, so it is only necessary to determine whether any of them if !isFull { // not update hot degree and anti count - inheritItem(newItem, oldItem) + f.inheritItem(newItem, oldItem) } else { // If item is inCold, it means the pd didn't recv this item in the store heartbeat, // thus we make it colder if newItem.inCold { - coldItem(newItem, oldItem) + f.coldItem(newItem, oldItem) } else { if f.isOldColdPeer(oldItem, newItem.StoreID) { if newItem.isHot() { - initItem(newItem) + f.initItem(newItem) } else { newItem.actionType = Remove } } else { if newItem.isHot() { - hotItem(newItem, oldItem) + f.hotItem(newItem, oldItem) } else { - coldItem(newItem, oldItem) + f.coldItem(newItem, oldItem) } } } @@ -468,12 +466,12 @@ func (f *hotPeerCache) updateNewHotPeerStat(newItem *HotPeerStat, deltaLoads []f regionStats := f.kind.RegionStats() // interval is not 0 which is guaranteed by the caller. if interval.Seconds() >= float64(f.reportIntervalSecs) { - initItem(newItem) + f.initItem(newItem) } newItem.actionType = Add newItem.rollingLoads = make([]*dimStat, len(regionStats)) for i, k := range regionStats { - ds := newDimStat(k, time.Duration(newItem.hotStatReportInterval())*time.Second) + ds := newDimStat(k, f.interval()) ds.Add(deltaLoads[k], interval) if ds.isFull() { ds.clearLastAverage() @@ -516,7 +514,7 @@ func (f *hotPeerCache) removeItem(item *HotPeerStat) { } } -func coldItem(newItem, oldItem *HotPeerStat) { +func (f *hotPeerCache) coldItem(newItem, oldItem *HotPeerStat) { newItem.HotDegree = oldItem.HotDegree - 1 newItem.AntiCount = oldItem.AntiCount - 1 if newItem.AntiCount <= 0 { @@ -526,9 +524,9 @@ func coldItem(newItem, oldItem *HotPeerStat) { } } -func hotItem(newItem, oldItem *HotPeerStat) { +func (f *hotPeerCache) hotItem(newItem, oldItem *HotPeerStat) { newItem.HotDegree = oldItem.HotDegree + 1 - if oldItem.AntiCount < oldItem.defaultAntiCount() { + if oldItem.AntiCount < f.kind.DefaultAntiCount() { newItem.AntiCount = oldItem.AntiCount + 1 } else { newItem.AntiCount = oldItem.AntiCount @@ -536,13 +534,17 @@ func hotItem(newItem, oldItem *HotPeerStat) { newItem.allowInherited = true } -func initItem(item *HotPeerStat) { +func (f *hotPeerCache) initItem(item *HotPeerStat) { item.HotDegree = 1 - item.AntiCount = item.defaultAntiCount() + item.AntiCount = f.kind.DefaultAntiCount() item.allowInherited = true } -func inheritItem(newItem, oldItem *HotPeerStat) { +func (f *hotPeerCache) inheritItem(newItem, oldItem *HotPeerStat) { newItem.HotDegree = oldItem.HotDegree newItem.AntiCount = oldItem.AntiCount } + +func (f *hotPeerCache) interval() time.Duration { + return time.Duration(f.kind.ReportInterval()) * time.Second +} diff --git a/server/statistics/hot_peer_cache_test.go b/server/statistics/hot_peer_cache_test.go index 77f9f01d1343..81d1a44caa6b 100644 --- a/server/statistics/hot_peer_cache_test.go +++ b/server/statistics/hot_peer_cache_test.go @@ -350,7 +350,7 @@ func TestUpdateHotPeerStat(t *testing.T) { re.Equal(0, newItem.HotDegree) re.Equal(0, newItem.AntiCount) // sum of interval is larger than report interval, and hot - newItem.AntiCount = newItem.defaultAntiCount() + newItem.AntiCount = Read.DefaultAntiCount() cache.updateStat(newItem) newItem = cache.checkPeerFlow(core.NewPeerInfo(peer, deltaLoads, uint64(interval)), region) re.Equal(1, newItem.HotDegree) @@ -405,7 +405,6 @@ func testMetrics(re *require.Assertions, interval, byteRate, expectThreshold flo for { thresholds := cache.calcHotThresholds(storeID) newItem := &HotPeerStat{ - Kind: cache.kind, StoreID: storeID, RegionID: i, actionType: Update, @@ -525,7 +524,7 @@ func TestRemoveFromCacheRandom(t *testing.T) { func checkCoolDown(re *require.Assertions, cache *hotPeerCache, region *core.RegionInfo, expect bool) { item := cache.getOldHotPeerStat(region.GetID(), region.GetLeader().GetStoreId()) - re.Equal(expect, item.IsNeedCoolDownTransferLeader(3)) + re.Equal(expect, item.IsNeedCoolDownTransferLeader(3, cache.kind)) } func TestCoolDownTransferLeader(t *testing.T) { diff --git a/server/statistics/kind.go b/server/statistics/kind.go index 612829379552..06f8f64ca072 100644 --- a/server/statistics/kind.go +++ b/server/statistics/kind.go @@ -208,12 +208,23 @@ func (rw RWType) Inverse() RWType { } } -// ForeachRegionStats foreach all region stats of read and write. -func ForeachRegionStats(f func(RWType, int, RegionStatKind)) { - for _, rwTy := range []RWType{Read, Write} { - for dim, kind := range rwTy.RegionStats() { - f(rwTy, dim, kind) - } +// ReportInterval returns the report interval of read or write. +func (rw RWType) ReportInterval() int { + switch rw { + case Write: + return WriteReportInterval + default: // Case Read + return ReadReportInterval + } +} + +// DefaultAntiCount returns the default anti count of read or write. +func (rw RWType) DefaultAntiCount() int { + switch rw { + case Write: + return HotRegionAntiCount + default: // Case Read + return HotRegionAntiCount * (RegionHeartBeatReportInterval / StoreHeartBeatReportInterval) } } @@ -235,6 +246,15 @@ func (rw RWType) SetFullLoadRates(full []float64, loads []float64) { } } +// ForeachRegionStats foreach all region stats of read and write. +func ForeachRegionStats(f func(RWType, int, RegionStatKind)) { + for _, rwTy := range []RWType{Read, Write} { + for dim, kind := range rwTy.RegionStats() { + f(rwTy, dim, kind) + } + } +} + // ActionType indicates the action type for the stat item. type ActionType int From 441f0fbfce3ebf2dd17af6223edc71f35b264ec7 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Fri, 25 Nov 2022 14:14:00 +0800 Subject: [PATCH 62/67] server/cluster: automatic gc tombstone store (#5726) close tikv/pd#5348 server/cluster: automatic gc tombstone store Signed-off-by: nolouch --- server/cluster/cluster.go | 17 +++++++++++++++-- server/cluster/cluster_test.go | 6 ++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 21926540f24d..119b536013af 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -78,6 +78,7 @@ const ( updateStoreStatsInterval = 9 * time.Millisecond clientTimeout = 3 * time.Second defaultChangedRegionsLimit = 10000 + gcTombstoreInterval = 30 * 24 * time.Hour // persistLimitRetryTimes is used to reduce the probability of the persistent error // since the once the store is add or remove, we shouldn't return an error even if the store limit is failed to persist. persistLimitRetryTimes = 5 @@ -1523,6 +1524,17 @@ func (c *RaftCluster) checkStores() { for _, store := range stores { // the store has already been tombstone if store.IsRemoved() { + if store.DownTime() > gcTombstoreInterval { + err := c.deleteStore(store) + if err != nil { + log.Error("auto gc the tombstore store failed", + zap.Stringer("store", store.GetMeta()), + zap.Duration("down-time", store.DownTime()), + errs.ZapError(err)) + } else { + log.Info("auto gc the tombstore store success", zap.Stringer("store", store.GetMeta()), zap.Duration("down-time", store.DownTime())) + } + } continue } @@ -1810,7 +1822,7 @@ func (c *RaftCluster) RemoveTombStoneRecords() error { continue } // the store has already been tombstone - err := c.deleteStoreLocked(store) + err := c.deleteStore(store) if err != nil { log.Error("delete store failed", zap.Stringer("store", store.GetMeta()), @@ -1835,7 +1847,8 @@ func (c *RaftCluster) RemoveTombStoneRecords() error { return nil } -func (c *RaftCluster) deleteStoreLocked(store *core.StoreInfo) error { +// deleteStore deletes the store from the cluster. it's concurrent safe. +func (c *RaftCluster) deleteStore(store *core.StoreInfo) error { if c.storage != nil { if err := c.storage.DeleteStore(store.GetMeta()); err != nil { return err diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index fe922a75236f..c83c4e4de212 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -270,6 +270,12 @@ func TestSetOfflineStore(t *testing.T) { re.NoError(cluster.BuryStore(storeID, false)) } } + // test clean up tombstone store + toCleanStore := cluster.GetStore(1).Clone().GetMeta() + toCleanStore.LastHeartbeat = time.Now().Add(-40 * 24 * time.Hour).UnixNano() + cluster.PutStore(toCleanStore) + cluster.checkStores() + re.Nil(cluster.GetStore(1)) } func TestSetOfflineWithReplica(t *testing.T) { From 7593af1e0071e81ede89699a54e0421c3293aba9 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Fri, 25 Nov 2022 14:27:59 +0800 Subject: [PATCH 63/67] tools: use metrics to record statistics (#5673) ref tikv/pd#5468 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- tools/pd-simulator/main.go | 1 - tools/pd-simulator/simulator/drive.go | 5 - tools/pd-simulator/simulator/metrics.go | 18 ++ tools/pd-simulator/simulator/raft.go | 2 - tools/pd-simulator/simulator/statistics.go | 219 --------------------- tools/pd-simulator/simulator/task.go | 34 ++-- 6 files changed, 36 insertions(+), 243 deletions(-) delete mode 100644 tools/pd-simulator/simulator/statistics.go diff --git a/tools/pd-simulator/main.go b/tools/pd-simulator/main.go index 48602d10aa92..08348b6b5193 100644 --- a/tools/pd-simulator/main.go +++ b/tools/pd-simulator/main.go @@ -206,7 +206,6 @@ EXIT: } fmt.Printf("%s [%s] total iteration: %d, time cost: %v\n", simResult, simCase, driver.TickCount(), time.Since(start)) - driver.PrintStatistics() if analysis.GetTransferCounter().IsValid { analysis.GetTransferCounter().PrintResult() } diff --git a/tools/pd-simulator/simulator/drive.go b/tools/pd-simulator/simulator/drive.go index 6968520c1bd9..8d8133c49120 100644 --- a/tools/pd-simulator/simulator/drive.go +++ b/tools/pd-simulator/simulator/drive.go @@ -163,11 +163,6 @@ func (d *Driver) Check() bool { return d.simCase.Checker(d.raftEngine.regionsInfo, stats) } -// PrintStatistics prints the statistics of the scheduler. -func (d *Driver) PrintStatistics() { - d.raftEngine.schedulerStats.PrintStatistics() -} - // Start starts all nodes. func (d *Driver) Start() error { for _, n := range d.conn.Nodes { diff --git a/tools/pd-simulator/simulator/metrics.go b/tools/pd-simulator/simulator/metrics.go index 5f06b2cbb65e..19e675c56a99 100644 --- a/tools/pd-simulator/simulator/metrics.go +++ b/tools/pd-simulator/simulator/metrics.go @@ -11,8 +11,26 @@ var ( Help: "Bucketed histogram of processing time (s) of handled snap requests.", Buckets: prometheus.ExponentialBuckets(0.0005, 2, 20), }, []string{"store", "type"}) + + schedulingCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "schedule", + Name: "scheduling_count", + Help: "Counter of region scheduling", + }, []string{"type"}) + + snapshotCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "schedule", + Name: "snapshot_count", + Help: "Counter of region snapshot", + }, []string{"store", "type"}) ) func init() { prometheus.MustRegister(snapDuration) + prometheus.MustRegister(schedulingCounter) + prometheus.MustRegister(snapshotCounter) } diff --git a/tools/pd-simulator/simulator/raft.go b/tools/pd-simulator/simulator/raft.go index cfd1e8719f03..45964eeecb2a 100644 --- a/tools/pd-simulator/simulator/raft.go +++ b/tools/pd-simulator/simulator/raft.go @@ -32,7 +32,6 @@ type RaftEngine struct { regionsInfo *core.RegionsInfo conn *Connection regionChange map[uint64][]uint64 - schedulerStats *schedulerStatistics regionSplitSize int64 regionSplitKeys int64 storeConfig *SimConfig @@ -45,7 +44,6 @@ func NewRaftEngine(conf *cases.Case, conn *Connection, storeConfig *SimConfig) * regionsInfo: core.NewRegionsInfo(), conn: conn, regionChange: make(map[uint64][]uint64), - schedulerStats: newSchedulerStatistics(), regionSplitSize: conf.RegionSplitSize, regionSplitKeys: conf.RegionSplitKeys, storeConfig: storeConfig, diff --git a/tools/pd-simulator/simulator/statistics.go b/tools/pd-simulator/simulator/statistics.go deleted file mode 100644 index 2dadd78020d5..000000000000 --- a/tools/pd-simulator/simulator/statistics.go +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright 2018 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package simulator - -import ( - "fmt" - "math" - - "github.com/tikv/pd/pkg/syncutil" -) - -type taskStatistics struct { - syncutil.RWMutex - addVoter map[uint64]int - removePeer map[uint64]int - addLearner map[uint64]int - promoteLeaner map[uint64]int - demoteVoter map[uint64]int - transferLeader map[uint64]map[uint64]int - mergeRegion int -} - -func newTaskStatistics() *taskStatistics { - return &taskStatistics{ - addVoter: make(map[uint64]int), - removePeer: make(map[uint64]int), - addLearner: make(map[uint64]int), - promoteLeaner: make(map[uint64]int), - demoteVoter: make(map[uint64]int), - transferLeader: make(map[uint64]map[uint64]int), - } -} - -func (t *taskStatistics) getStatistics() map[string]int { - t.RLock() - defer t.RUnlock() - stats := make(map[string]int) - addVoter := getSum(t.addVoter) - removePeer := getSum(t.removePeer) - addLearner := getSum(t.addLearner) - promoteLearner := getSum(t.promoteLeaner) - demoteVoter := getSum(t.demoteVoter) - - var transferLeader int - for _, to := range t.transferLeader { - for _, v := range to { - transferLeader += v - } - } - - stats["Add Voter (task)"] = addVoter - stats["Remove Peer (task)"] = removePeer - stats["Add Learner (task)"] = addLearner - stats["Promote Learner (task)"] = promoteLearner - stats["Demote Voter (task)"] = demoteVoter - stats["Transfer Leader (task)"] = transferLeader - stats["Merge Region (task)"] = t.mergeRegion - - return stats -} - -func (t *taskStatistics) incAddVoter(regionID uint64) { - t.Lock() - defer t.Unlock() - t.addVoter[regionID]++ -} - -func (t *taskStatistics) incAddLearner(regionID uint64) { - t.Lock() - defer t.Unlock() - t.addLearner[regionID]++ -} - -func (t *taskStatistics) incPromoteLearner(regionID uint64) { - t.Lock() - defer t.Unlock() - t.promoteLeaner[regionID]++ -} - -func (t *taskStatistics) incDemoteVoter(regionID uint64) { - t.Lock() - defer t.Unlock() - t.demoteVoter[regionID]++ -} - -func (t *taskStatistics) incRemovePeer(regionID uint64) { - t.Lock() - defer t.Unlock() - t.removePeer[regionID]++ -} - -func (t *taskStatistics) incMergeRegion() { - t.Lock() - defer t.Unlock() - t.mergeRegion++ -} - -func (t *taskStatistics) incTransferLeader(fromPeerStoreID, toPeerStoreID uint64) { - t.Lock() - defer t.Unlock() - _, ok := t.transferLeader[fromPeerStoreID] - if ok { - t.transferLeader[fromPeerStoreID][toPeerStoreID]++ - } else { - m := make(map[uint64]int) - m[toPeerStoreID]++ - t.transferLeader[fromPeerStoreID] = m - } -} - -type snapshotStatistics struct { - syncutil.RWMutex - receive map[uint64]int - send map[uint64]int -} - -func newSnapshotStatistics() *snapshotStatistics { - return &snapshotStatistics{ - receive: make(map[uint64]int), - send: make(map[uint64]int), - } -} - -type schedulerStatistics struct { - taskStats *taskStatistics - snapshotStats *snapshotStatistics -} - -func newSchedulerStatistics() *schedulerStatistics { - return &schedulerStatistics{ - taskStats: newTaskStatistics(), - snapshotStats: newSnapshotStatistics(), - } -} - -func (s *snapshotStatistics) getStatistics() map[string]int { - s.RLock() - defer s.RUnlock() - maxSend := getMax(s.send) - maxReceive := getMax(s.receive) - minSend := getMin(s.send) - minReceive := getMin(s.receive) - - stats := make(map[string]int) - stats["Send Maximum (snapshot)"] = maxSend - stats["Receive Maximum (snapshot)"] = maxReceive - if minSend != math.MaxInt32 { - stats["Send Minimum (snapshot)"] = minSend - } - if minReceive != math.MaxInt32 { - stats["Receive Minimum (snapshot)"] = minReceive - } - - return stats -} - -func (s *snapshotStatistics) incSendSnapshot(storeID uint64) { - s.Lock() - defer s.Unlock() - s.send[storeID]++ -} - -func (s *snapshotStatistics) incReceiveSnapshot(storeID uint64) { - s.Lock() - defer s.Unlock() - s.receive[storeID]++ -} - -// PrintStatistics prints the statistics of the scheduler. -func (s *schedulerStatistics) PrintStatistics() { - task := s.taskStats.getStatistics() - snap := s.snapshotStats.getStatistics() - for t, count := range task { - fmt.Println(t, count) - } - for s, count := range snap { - fmt.Println(s, count) - } -} - -func getMax(m map[uint64]int) int { - var max int - for _, v := range m { - if v > max { - max = v - } - } - return max -} - -func getMin(m map[uint64]int) int { - min := math.MaxInt32 - for _, v := range m { - if v < min { - min = v - } - } - return min -} - -func getSum(m map[uint64]int) int { - var sum int - for _, v := range m { - sum += v - } - return sum -} diff --git a/tools/pd-simulator/simulator/task.go b/tools/pd-simulator/simulator/task.go index 14919d2768c4..379707f4b3bf 100644 --- a/tools/pd-simulator/simulator/task.go +++ b/tools/pd-simulator/simulator/task.go @@ -252,7 +252,7 @@ func (m *mergeRegion) tick(engine *RaftEngine, region *core.RegionInfo) (newRegi core.SetApproximateSize(targetRegion.GetApproximateSize()+region.GetApproximateSize()), core.SetApproximateKeys(targetRegion.GetApproximateKeys()+region.GetApproximateKeys()), ) - engine.schedulerStats.taskStats.incMergeRegion() + schedulingCounter.WithLabelValues("merge").Inc() return newRegion, true } @@ -274,11 +274,11 @@ func (t *transferLeader) tick(engine *RaftEngine, region *core.RegionInfo) (newR } newRegion = region.Clone(core.WithLeader(toPeer)) - engine.schedulerStats.taskStats.incTransferLeader(t.fromPeerStoreID, toPeer.GetStoreId()) + schedulingCounter.WithLabelValues("transfer-leader").Inc() return } -func checkAndCreateChangePeerOption(engine *RaftEngine, region *core.RegionInfo, +func checkAndCreateChangePeerOption(region *core.RegionInfo, peer *metapb.Peer, from, to metapb.PeerRole) []core.RegionCreateOption { // `from` and `to` need to satisfy the combination in switch. @@ -298,9 +298,9 @@ func checkAndCreateChangePeerOption(engine *RaftEngine, region *core.RegionInfo, // create option switch to { case metapb.PeerRole_Voter: // Learner/IncomingVoter -> Voter - engine.schedulerStats.taskStats.incPromoteLearner(region.GetID()) + schedulingCounter.WithLabelValues("promote-learner").Inc() case metapb.PeerRole_Learner: // Voter/DemotingVoter -> Learner - engine.schedulerStats.taskStats.incDemoteVoter(region.GetID()) + schedulingCounter.WithLabelValues("demote-voter").Inc() case metapb.PeerRole_IncomingVoter: // Learner -> IncomingVoter, only in joint state case metapb.PeerRole_DemotingVoter: // Voter -> DemotingVoter, only in joint state default: @@ -316,7 +316,7 @@ type promoteLearner struct { func (pl *promoteLearner) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { isFinished = true peer := region.GetPeer(pl.peer.GetId()) - opts := checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_Learner, metapb.PeerRole_Voter) + opts := checkAndCreateChangePeerOption(region, peer, metapb.PeerRole_Learner, metapb.PeerRole_Voter) if len(opts) > 0 { newRegion = region.Clone(opts...) } @@ -330,7 +330,7 @@ type demoteVoter struct { func (dv *demoteVoter) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion *core.RegionInfo, isFinished bool) { isFinished = true peer := region.GetPeer(dv.peer.GetId()) - opts := checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_Voter, metapb.PeerRole_Learner) + opts := checkAndCreateChangePeerOption(region, peer, metapb.PeerRole_Voter, metapb.PeerRole_Learner) if len(opts) > 0 { newRegion = region.Clone(opts...) } @@ -347,7 +347,7 @@ func (ce *changePeerV2Enter) tick(engine *RaftEngine, region *core.RegionInfo) ( var opts []core.RegionCreateOption for _, pl := range ce.promoteLearners { peer := region.GetPeer(pl.GetId()) - subOpts := checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_Learner, metapb.PeerRole_IncomingVoter) + subOpts := checkAndCreateChangePeerOption(region, peer, metapb.PeerRole_Learner, metapb.PeerRole_IncomingVoter) if len(subOpts) == 0 { return } @@ -355,7 +355,7 @@ func (ce *changePeerV2Enter) tick(engine *RaftEngine, region *core.RegionInfo) ( } for _, dv := range ce.demoteVoters { peer := region.GetPeer(dv.GetId()) - subOpts := checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_Voter, metapb.PeerRole_DemotingVoter) + subOpts := checkAndCreateChangePeerOption(region, peer, metapb.PeerRole_Voter, metapb.PeerRole_DemotingVoter) if len(subOpts) == 0 { return } @@ -373,9 +373,9 @@ func (cl *changePeerV2Leave) tick(engine *RaftEngine, region *core.RegionInfo) ( for _, peer := range region.GetPeers() { switch peer.GetRole() { case metapb.PeerRole_IncomingVoter: - opts = append(opts, checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_IncomingVoter, metapb.PeerRole_Voter)...) + opts = append(opts, checkAndCreateChangePeerOption(region, peer, metapb.PeerRole_IncomingVoter, metapb.PeerRole_Voter)...) case metapb.PeerRole_DemotingVoter: - opts = append(opts, checkAndCreateChangePeerOption(engine, region, peer, metapb.PeerRole_DemotingVoter, metapb.PeerRole_Learner)...) + opts = append(opts, checkAndCreateChangePeerOption(region, peer, metapb.PeerRole_DemotingVoter, metapb.PeerRole_Learner)...) } } if len(opts) < 4 { @@ -408,9 +408,9 @@ func (a *addPeer) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion * if region.GetPeer(a.peer.GetId()) == nil { switch a.peer.GetRole() { case metapb.PeerRole_Voter: - engine.schedulerStats.taskStats.incAddVoter(region.GetID()) + schedulingCounter.WithLabelValues("add-voter").Inc() case metapb.PeerRole_Learner: - engine.schedulerStats.taskStats.incAddLearner(region.GetID()) + schedulingCounter.WithLabelValues("add-learner").Inc() } pendingPeers := append(region.GetPendingPeers(), a.peer) return region.Clone(core.WithAddPeer(a.peer), core.WithIncConfVer(), core.WithPendingPeers(pendingPeers)), false @@ -419,11 +419,13 @@ func (a *addPeer) tick(engine *RaftEngine, region *core.RegionInfo) (newRegion * if !processSnapshot(sendNode, a.sendingStat) { return nil, false } - engine.schedulerStats.snapshotStats.incSendSnapshot(sendNode.Id) + sendStoreID := fmt.Sprintf("store-%d", sendNode.Id) + snapshotCounter.WithLabelValues(sendStoreID, "send").Inc() if !processSnapshot(recvNode, a.receivingStat) { return nil, false } - engine.schedulerStats.snapshotStats.incReceiveSnapshot(recvNode.Id) + recvStoreID := fmt.Sprintf("store-%d", recvNode.Id) + snapshotCounter.WithLabelValues(recvStoreID, "recv").Inc() recvNode.incUsedSize(uint64(region.GetApproximateSize())) // Step 3: Remove the Pending state newRegion = region.Clone(removePendingPeer(region, a.peer)) @@ -450,7 +452,7 @@ func (r *removePeer) tick(engine *RaftEngine, region *core.RegionInfo) (newRegio return nil, false } // Step 2: Remove Peer - engine.schedulerStats.taskStats.incRemovePeer(region.GetID()) + schedulingCounter.WithLabelValues("remove-peer").Inc() newRegion = region.Clone( core.WithIncConfVer(), core.WithRemoveStorePeer(r.peer.GetStoreId()), From cdcce2dc2df6c6903153f81e8e13783e79c5bb55 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 25 Nov 2022 16:13:59 +0800 Subject: [PATCH 64/67] statistics: cached thresholds in hot peer cache (#5728) ref tikv/pd#5692, ref tikv/pd#5721 Signed-off-by: lhy1024 Co-authored-by: Ti Chi Robot --- server/schedulers/hot_region_test.go | 14 ++++- server/statistics/hot_cache.go | 16 +++++- server/statistics/hot_peer_cache.go | 67 ++++++++++++++---------- server/statistics/hot_peer_cache_test.go | 19 ++++++- 4 files changed, 85 insertions(+), 31 deletions(-) diff --git a/server/schedulers/hot_region_test.go b/server/schedulers/hot_region_test.go index 4e937c073027..cbdf51df784a 100644 --- a/server/schedulers/hot_region_test.go +++ b/server/schedulers/hot_region_test.go @@ -1450,6 +1450,10 @@ func TestHotCacheKeyThresholds(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() opt := config.NewTestOptions() + statistics.ThresholdsUpdateInterval = 0 + defer func() { + statistics.ThresholdsUpdateInterval = 8 * time.Second + }() { // only a few regions tc := mockcluster.NewCluster(ctx, opt) tc.SetHotRegionCacheHitsThreshold(0) @@ -1526,6 +1530,10 @@ func TestHotCacheByteAndKey(t *testing.T) { opt := config.NewTestOptions() tc := mockcluster.NewCluster(ctx, opt) tc.SetHotRegionCacheHitsThreshold(0) + statistics.ThresholdsUpdateInterval = 0 + defer func() { + statistics.ThresholdsUpdateInterval = 8 * time.Second + }() regions := []testRegionInfo{} for i := 1; i <= 500; i++ { regions = append(regions, testRegionInfo{ @@ -1717,6 +1725,10 @@ func TestHotCacheCheckRegionFlowWithDifferentThreshold(t *testing.T) { tc.SetMaxReplicas(3) tc.SetLocationLabels([]string{"zone", "host"}) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) + statistics.ThresholdsUpdateInterval = 0 + defer func() { + statistics.ThresholdsUpdateInterval = statistics.StoreHeartBeatReportInterval + }() // some peers are hot, and some are cold #3198 rate := uint64(512 * units.KiB) @@ -1726,7 +1738,7 @@ func TestHotCacheCheckRegionFlowWithDifferentThreshold(t *testing.T) { } } items := tc.AddLeaderRegionWithWriteInfo(201, 1, rate*statistics.WriteReportInterval, 0, 0, statistics.WriteReportInterval, []uint64{2, 3}, 1) - re.Equal(float64(rate)*statistics.HotThresholdRatio, items[0].GetThresholds()[0]) + re.Equal(float64(rate)*statistics.HotThresholdRatio, tc.HotCache.GetThresholds(statistics.Write, items[0].StoreID)[0]) // Threshold of store 1,2,3 is 409.6 units.KiB and others are 1 units.KiB // Make the hot threshold of some store is high and the others are low rate = 10 * units.KiB diff --git a/server/statistics/hot_cache.go b/server/statistics/hot_cache.go index cd8bc0148b89..66121e8d0268 100644 --- a/server/statistics/hot_cache.go +++ b/server/statistics/hot_cache.go @@ -196,9 +196,21 @@ func (w *HotCache) GetFilledPeriod(kind RWType) int { var reportIntervalSecs int switch kind { case Write: - reportIntervalSecs = w.writeCache.reportIntervalSecs + reportIntervalSecs = w.writeCache.kind.ReportInterval() case Read: - reportIntervalSecs = w.readCache.reportIntervalSecs + reportIntervalSecs = w.readCache.kind.ReportInterval() } return movingaverage.NewTimeMedian(DefaultAotSize, rollingWindowsSize, time.Duration(reportIntervalSecs)*time.Second).GetFilledPeriod() } + +// GetThresholds returns thresholds. +// This is used for test purpose. +func (w *HotCache) GetThresholds(kind RWType, storeID uint64) []float64 { + switch kind { + case Write: + return w.writeCache.calcHotThresholds(storeID) + case Read: + return w.readCache.calcHotThresholds(storeID) + } + return nil +} diff --git a/server/statistics/hot_peer_cache.go b/server/statistics/hot_peer_cache.go index 64d1bb5d3778..d709c5df8142 100644 --- a/server/statistics/hot_peer_cache.go +++ b/server/statistics/hot_peer_cache.go @@ -46,6 +46,10 @@ const ( queueCap = 20000 ) +// ThresholdsUpdateInterval is the default interval to update thresholds. +// the refresh interval should be less than store heartbeat interval to keep the next calculate must use the latest threshold. +var ThresholdsUpdateInterval = 8 * time.Second + // Denoising is an option to calculate flow base on the real heartbeats. Should // only turn off by the simulator and the test. var Denoising = true @@ -60,33 +64,34 @@ var MinHotThresholds = [RegionStatCount]float64{ RegionWriteQueryNum: 32, } +type thresholds struct { + updatedTime time.Time + rates []float64 +} + // hotPeerCache saves the hot peer's statistics. type hotPeerCache struct { - kind RWType - peersOfStore map[uint64]*TopN // storeID -> hot peers - storesOfRegion map[uint64]map[uint64]struct{} // regionID -> storeIDs - regionsOfStore map[uint64]map[uint64]struct{} // storeID -> regionIDs - topNTTL time.Duration - reportIntervalSecs int - taskQueue chan FlowItemTask + kind RWType + peersOfStore map[uint64]*TopN // storeID -> hot peers + storesOfRegion map[uint64]map[uint64]struct{} // regionID -> storeIDs + regionsOfStore map[uint64]map[uint64]struct{} // storeID -> regionIDs + topNTTL time.Duration + taskQueue chan FlowItemTask + thresholdsOfStore map[uint64]*thresholds // storeID -> thresholds + // TODO: consider to remove store info when store is offline. } // NewHotPeerCache creates a hotPeerCache func NewHotPeerCache(kind RWType) *hotPeerCache { - c := &hotPeerCache{ - kind: kind, - peersOfStore: make(map[uint64]*TopN), - storesOfRegion: make(map[uint64]map[uint64]struct{}), - regionsOfStore: make(map[uint64]map[uint64]struct{}), - taskQueue: make(chan FlowItemTask, queueCap), - } - if kind == Write { - c.reportIntervalSecs = WriteReportInterval - } else { - c.reportIntervalSecs = ReadReportInterval + return &hotPeerCache{ + kind: kind, + peersOfStore: make(map[uint64]*TopN), + storesOfRegion: make(map[uint64]map[uint64]struct{}), + regionsOfStore: make(map[uint64]map[uint64]struct{}), + taskQueue: make(chan FlowItemTask, queueCap), + thresholdsOfStore: make(map[uint64]*thresholds), + topNTTL: time.Duration(3*kind.ReportInterval()) * time.Second, } - c.topNTTL = time.Duration(3*c.reportIntervalSecs) * time.Second - return c } // TODO: rename RegionStats as PeerStats @@ -292,19 +297,27 @@ func (f *hotPeerCache) getOldHotPeerStat(regionID, storeID uint64) *HotPeerStat } func (f *hotPeerCache) calcHotThresholds(storeID uint64) []float64 { + t, ok := f.thresholdsOfStore[storeID] + if ok && time.Since(t.updatedTime) <= ThresholdsUpdateInterval { + return t.rates + } + t = &thresholds{ + updatedTime: time.Now(), + rates: make([]float64, DimLen), + } + f.thresholdsOfStore[storeID] = t statKinds := f.kind.RegionStats() - ret := make([]float64, DimLen) for dim, kind := range statKinds { - ret[dim] = MinHotThresholds[kind] + t.rates[dim] = MinHotThresholds[kind] } tn, ok := f.peersOfStore[storeID] if !ok || tn.Len() < TopNN { - return ret + return t.rates } - for i := range ret { - ret[i] = math.Max(tn.GetTopNMin(i).(*HotPeerStat).GetLoad(i)*HotThresholdRatio, ret[i]) + for i := range t.rates { + t.rates[i] = math.Max(tn.GetTopNMin(i).(*HotPeerStat).GetLoad(i)*HotThresholdRatio, t.rates[i]) } - return ret + return t.rates } // gets the storeIDs, including old region and new region @@ -465,7 +478,7 @@ func (f *hotPeerCache) updateHotPeerStat(region *core.RegionInfo, newItem, oldIt func (f *hotPeerCache) updateNewHotPeerStat(newItem *HotPeerStat, deltaLoads []float64, interval time.Duration) *HotPeerStat { regionStats := f.kind.RegionStats() // interval is not 0 which is guaranteed by the caller. - if interval.Seconds() >= float64(f.reportIntervalSecs) { + if interval.Seconds() >= float64(f.kind.ReportInterval()) { f.initItem(newItem) } newItem.actionType = Add diff --git a/server/statistics/hot_peer_cache_test.go b/server/statistics/hot_peer_cache_test.go index 81d1a44caa6b..c0103e9c8449 100644 --- a/server/statistics/hot_peer_cache_test.go +++ b/server/statistics/hot_peer_cache_test.go @@ -312,6 +312,10 @@ func TestUpdateHotPeerStat(t *testing.T) { region := core.NewRegionInfo(&metapb.Region{Id: regionID, Peers: []*metapb.Peer{peer}}, peer) // we statistic read peer info from store heartbeat rather than region heartbeat m := RegionHeartBeatReportInterval / StoreHeartBeatReportInterval + ThresholdsUpdateInterval = 0 + defer func() { + ThresholdsUpdateInterval = 8 * time.Second + }() // skip interval=0 interval := 0 @@ -399,6 +403,10 @@ func testMetrics(re *require.Assertions, interval, byteRate, expectThreshold flo cache := NewHotPeerCache(Read) storeID := uint64(1) re.GreaterOrEqual(byteRate, MinHotThresholds[RegionReadBytes]) + ThresholdsUpdateInterval = 0 + defer func() { + ThresholdsUpdateInterval = 8 * time.Second + }() for i := uint64(1); i < TopNN+10; i++ { var oldItem *HotPeerStat var item *HotPeerStat @@ -667,7 +675,7 @@ func TestHotPeerCacheTopN(t *testing.T) { cache := NewHotPeerCache(Write) now := time.Now() - for id := uint64(99); id > 0; id-- { + for id := uint64(0); id < 100; id++ { meta := &metapb.Region{ Id: id, Peers: []*metapb.Peer{{Id: id, StoreId: 1}}, @@ -686,10 +694,19 @@ func TestHotPeerCacheTopN(t *testing.T) { cache.updateStat(stat) } } + if id < 60 { + re.Equal(MinHotThresholds[RegionWriteKeys], cache.calcHotThresholds(1)[KeyDim]) // num Date: Fri, 25 Nov 2022 17:01:59 +0800 Subject: [PATCH 65/67] rule_checker: Add witness related rules (#5630) close tikv/pd#5627, ref tikv/pd#5627 Signed-off-by: Wenbo Zhang Co-authored-by: Ti Chi Robot Co-authored-by: JmPotato --- pkg/mock/mockcluster/mockcluster.go | 3 + server/cluster/cluster.go | 10 + server/config/config.go | 12 + server/config/persist_options.go | 24 ++ server/schedule/checker/checker_controller.go | 2 +- server/schedule/checker/rule_checker.go | 142 ++++++-- server/schedule/checker/rule_checker_test.go | 302 +++++++++++++++++- server/schedule/cluster.go | 1 + server/schedule/operator/builder.go | 77 +++-- .../schedule/operator/create_operator_test.go | 15 +- server/schedule/operator/operator.go | 12 + server/schedule/operator_controller.go | 3 + server/schedule/placement/fit.go | 38 ++- server/schedule/placement/fit_region_test.go | 20 +- server/schedule/placement/fit_test.go | 4 +- server/schedule/placement/rule_manager.go | 5 +- server/versioninfo/feature.go | 3 + 17 files changed, 576 insertions(+), 97 deletions(-) diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index d281d287eb1b..c751cf84541d 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -865,3 +865,6 @@ func (mc *Cluster) ObserveRegionsStats() { storeIDs, writeBytesRates, writeKeysRates := mc.BasicCluster.GetStoresWriteRate() mc.HotStat.ObserveRegionsStats(storeIDs, writeBytesRates, writeKeysRates) } + +// RecordOpStepWithTTL records OpStep with TTL +func (mc *Cluster) RecordOpStepWithTTL(regionID uint64) {} diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 119b536013af..6b67e4bab16a 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -557,6 +557,16 @@ func (c *RaftCluster) GetMergeChecker() *checker.MergeChecker { return c.coordinator.checkers.GetMergeChecker() } +// GetRuleChecker returns rule checker. +func (c *RaftCluster) GetRuleChecker() *checker.RuleChecker { + return c.coordinator.checkers.GetRuleChecker() +} + +// RecordOpStepWithTTL records OpStep with TTL +func (c *RaftCluster) RecordOpStepWithTTL(regionID uint64) { + c.GetRuleChecker().RecordRegionPromoteToNonWitness(regionID) +} + // GetSchedulers gets all schedulers. func (c *RaftCluster) GetSchedulers() []string { return c.coordinator.getSchedulers() diff --git a/server/config/config.go b/server/config/config.go index e596104c3729..88ebefab9043 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -246,6 +246,7 @@ const ( defaultEnablePlacementRules = true defaultEnableGRPCGateway = true defaultDisableErrorVerbose = true + defaultEnableSwitchWitness = false defaultDashboardAddress = "auto" @@ -650,6 +651,8 @@ type ScheduleConfig struct { MaxMergeRegionKeys uint64 `toml:"max-merge-region-keys" json:"max-merge-region-keys"` // SplitMergeInterval is the minimum interval time to permit merge after split. SplitMergeInterval typeutil.Duration `toml:"split-merge-interval" json:"split-merge-interval"` + // SwitchWitnessInterval is the minimum interval that allows a peer to become a witness again after it is promoted to non-witness. + SwitchWitnessInterval typeutil.Duration `toml:"switch-witness-interval" json:"swtich-witness-interval"` // EnableOneWayMerge is the option to enable one way merge. This means a Region can only be merged into the next region of it. EnableOneWayMerge bool `toml:"enable-one-way-merge" json:"enable-one-way-merge,string"` // EnableCrossTableMerge is the option to enable cross table merge. This means two Regions can be merged with different table IDs. @@ -770,6 +773,9 @@ type ScheduleConfig struct { // EnableDiagnostic is the the option to enable using diagnostic EnableDiagnostic bool `toml:"enable-diagnostic" json:"enable-diagnostic,string"` + + // EnableSwitchWitness is the option to enable using witness + EnableSwitchWitness bool `toml:"enable-switch-witness" json:"enable-switch-witness,string"` } // Clone returns a cloned scheduling configuration. @@ -795,6 +801,7 @@ const ( defaultMaxPendingPeerCount = 64 defaultMaxMergeRegionSize = 20 defaultSplitMergeInterval = time.Hour + defaultSwitchWitnessInterval = time.Hour defaultEnableDiagnostic = false defaultPatrolRegionInterval = 10 * time.Millisecond defaultMaxStoreDownTime = 30 * time.Minute @@ -833,6 +840,7 @@ func (c *ScheduleConfig) adjust(meta *configMetaData, reloading bool) error { adjustUint64(&c.MaxMergeRegionSize, defaultMaxMergeRegionSize) } adjustDuration(&c.SplitMergeInterval, defaultSplitMergeInterval) + adjustDuration(&c.SwitchWitnessInterval, defaultSwitchWitnessInterval) adjustDuration(&c.PatrolRegionInterval, defaultPatrolRegionInterval) adjustDuration(&c.MaxStoreDownTime, defaultMaxStoreDownTime) adjustDuration(&c.HotRegionsWriteInterval, defaultHotRegionsWriteInterval) @@ -882,6 +890,10 @@ func (c *ScheduleConfig) adjust(meta *configMetaData, reloading bool) error { c.EnableDiagnostic = defaultEnableDiagnostic } + if !meta.IsDefined("enable-switch-witness") { + c.EnableSwitchWitness = defaultEnableSwitchWitness + } + // new cluster:v2, old cluster:v1 if !meta.IsDefined("region-score-formula-version") && !reloading { adjustString(&c.RegionScoreFormulaVersion, defaultRegionScoreFormulaVersion) diff --git a/server/config/persist_options.go b/server/config/persist_options.go index 67d596d21a3e..35d5f5f97c22 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -272,6 +272,11 @@ func (o *PersistOptions) SetSplitMergeInterval(splitMergeInterval time.Duration) o.SetScheduleConfig(v) } +// GetSwitchWitnessInterval returns the interval between promote to non-witness and starting to switch to witness. +func (o *PersistOptions) GetSwitchWitnessInterval() time.Duration { + return o.GetScheduleConfig().SwitchWitnessInterval.Duration +} + // IsDiagnosticAllowed returns whether is enable to use diagnostic. func (o *PersistOptions) IsDiagnosticAllowed() bool { return o.GetScheduleConfig().EnableDiagnostic @@ -284,6 +289,18 @@ func (o *PersistOptions) SetEnableDiagnostic(enable bool) { o.SetScheduleConfig(v) } +// IsSwitchWitnessAllowed returns whether is enable to use witness. +func (o *PersistOptions) IsSwitchWitnessAllowed() bool { + return o.GetScheduleConfig().EnableSwitchWitness +} + +// SetEnableSwitchWitness to set the option for witness. It's only used to test. +func (o *PersistOptions) SetEnableSwitchWitness(enable bool) { + v := o.GetScheduleConfig().Clone() + v.EnableSwitchWitness = enable + o.SetScheduleConfig(v) +} + // SetMaxMergeRegionSize sets the max merge region size. func (o *PersistOptions) SetMaxMergeRegionSize(maxMergeRegionSize uint64) { v := o.GetScheduleConfig().Clone() @@ -563,6 +580,13 @@ func (o *PersistOptions) IsUseJointConsensus() bool { return o.GetScheduleConfig().EnableJointConsensus } +// SetEnableUseJointConsensus to set the option for using joint consensus. It's only used to test. +func (o *PersistOptions) SetEnableUseJointConsensus(enable bool) { + v := o.GetScheduleConfig().Clone() + v.EnableJointConsensus = enable + o.SetScheduleConfig(v) +} + // IsTraceRegionFlow returns if the region flow is tracing. // If the accuracy cannot reach 0.1 MB, it is considered not. func (o *PersistOptions) IsTraceRegionFlow() bool { diff --git a/server/schedule/checker/checker_controller.go b/server/schedule/checker/checker_controller.go index 4e7e28334d02..4106ed1c7809 100644 --- a/server/schedule/checker/checker_controller.go +++ b/server/schedule/checker/checker_controller.go @@ -59,7 +59,7 @@ func NewController(ctx context.Context, cluster schedule.Cluster, ruleManager *p opController: opController, learnerChecker: NewLearnerChecker(cluster), replicaChecker: NewReplicaChecker(cluster, regionWaitingList), - ruleChecker: NewRuleChecker(cluster, ruleManager, regionWaitingList), + ruleChecker: NewRuleChecker(ctx, cluster, ruleManager, regionWaitingList), splitChecker: NewSplitChecker(cluster, ruleManager, labeler), mergeChecker: NewMergeChecker(ctx, cluster), jointStateChecker: NewJointStateChecker(cluster), diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 8596af39412e..82fa6c7385c6 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -15,12 +15,14 @@ package checker import ( + "context" "errors" "math" "time" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/cache" "github.com/tikv/pd/pkg/errs" @@ -29,6 +31,7 @@ import ( "github.com/tikv/pd/server/schedule/filter" "github.com/tikv/pd/server/schedule/operator" "github.com/tikv/pd/server/schedule/placement" + "github.com/tikv/pd/server/versioninfo" "go.uber.org/zap" ) @@ -46,23 +49,25 @@ const maxPendingListLen = 100000 // RuleChecker fix/improve region by placement rules. type RuleChecker struct { PauseController - cluster schedule.Cluster - ruleManager *placement.RuleManager - name string - regionWaitingList cache.Cache - pendingList cache.Cache - record *recorder + cluster schedule.Cluster + ruleManager *placement.RuleManager + name string + regionWaitingList cache.Cache + pendingList cache.Cache + switchWitnessCache *cache.TTLUint64 + record *recorder } // NewRuleChecker creates a checker instance. -func NewRuleChecker(cluster schedule.Cluster, ruleManager *placement.RuleManager, regionWaitingList cache.Cache) *RuleChecker { +func NewRuleChecker(ctx context.Context, cluster schedule.Cluster, ruleManager *placement.RuleManager, regionWaitingList cache.Cache) *RuleChecker { return &RuleChecker{ - cluster: cluster, - ruleManager: ruleManager, - name: "rule-checker", - regionWaitingList: regionWaitingList, - pendingList: cache.NewDefaultCache(maxPendingListLen), - record: newRecord(), + cluster: cluster, + ruleManager: ruleManager, + name: "rule-checker", + regionWaitingList: regionWaitingList, + pendingList: cache.NewDefaultCache(maxPendingListLen), + switchWitnessCache: cache.NewIDTTL(ctx, time.Minute, cluster.GetOpts().GetSwitchWitnessInterval()), + record: newRecord(), } } @@ -144,6 +149,17 @@ func (c *RuleChecker) CheckWithFit(region *core.RegionInfo, fit *placement.Regio return nil } +// RecordRegionPromoteToNonWitness put the recently switch non-witness region into cache. RuleChecker +// will skip switch it back to witness for a while. +func (c *RuleChecker) RecordRegionPromoteToNonWitness(regionID uint64) { + c.switchWitnessCache.PutWithTTL(regionID, nil, c.cluster.GetOpts().GetSwitchWitnessInterval()) +} + +func (c *RuleChecker) isWitnessEnabled() bool { + return versioninfo.IsFeatureSupported(c.cluster.GetOpts().GetClusterVersion(), versioninfo.SwitchWitness) && + c.cluster.GetOpts().IsSwitchWitnessAllowed() +} + func (c *RuleChecker) fixRulePeer(region *core.RegionInfo, fit *placement.RegionFit, rf *placement.RuleFit) (*operator.Operator, error) { // make up peers. if len(rf.Peers) < rf.Rule.Count { @@ -152,8 +168,16 @@ func (c *RuleChecker) fixRulePeer(region *core.RegionInfo, fit *placement.Region // fix down/offline peers. for _, peer := range rf.Peers { if c.isDownPeer(region, peer) { - checkerCounter.WithLabelValues("rule_checker", "replace-down").Inc() - return c.replaceUnexpectRulePeer(region, rf, fit, peer, downStatus) + if c.isStoreDownTimeHitMaxDownTime(peer.GetStoreId()) { + checkerCounter.WithLabelValues("rule_checker", "replace-down").Inc() + return c.replaceUnexpectRulePeer(region, rf, fit, peer, downStatus) + } + if c.isWitnessEnabled() { + if witness, ok := c.hasAvailableWitness(region, peer); ok { + checkerCounter.WithLabelValues("rule_checker", "promote-witness").Inc() + return operator.CreateNonWitnessPeerOperator("promote-witness", c.cluster, region, witness) + } + } } if c.isOfflinePeer(peer) { checkerCounter.WithLabelValues("rule_checker", "replace-offline").Inc() @@ -182,7 +206,11 @@ func (c *RuleChecker) addRulePeer(region *core.RegionInfo, rf *placement.RuleFit c.handleFilterState(region, filterByTempState) return nil, errNoStoreToAdd } - peer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: rf.Rule.IsWitness} + isWitness := rf.Rule.IsWitness + if !c.isWitnessEnabled() { + isWitness = false + } + peer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: isWitness} op, err := operator.CreateAddPeerOperator("add-rule-peer", c.cluster, region, peer, operator.OpReplica) if err != nil { return nil, err @@ -200,7 +228,20 @@ func (c *RuleChecker) replaceUnexpectRulePeer(region *core.RegionInfo, rf *place c.handleFilterState(region, filterByTempState) return nil, errNoStoreToReplace } - newPeer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: rf.Rule.IsWitness} + var isWitness bool + if c.isWitnessEnabled() { + // No matter whether witness placement rule is enabled or disabled, when peer's downtime + // exceeds the threshold(30min), add a witness and remove the down peer. Then witness is + // promoted to non-witness gradually to improve availability. + if status == "down" { + isWitness = true + } else { + isWitness = rf.Rule.IsWitness + } + } else { + isWitness = false + } + newPeer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: isWitness} // pick the smallest leader store to avoid the Offline store be snapshot generator bottleneck. var newLeader *metapb.Peer if region.GetLeader().GetId() == peer.GetId() { @@ -270,14 +311,23 @@ func (c *RuleChecker) fixLooseMatchPeer(region *core.RegionInfo, fit *placement. if region.GetLeader().GetId() == peer.GetId() && rf.Rule.IsWitness { return nil, errPeerCannotBeWitness } - if !core.IsWitness(peer) && rf.Rule.IsWitness { + if !core.IsWitness(peer) && rf.Rule.IsWitness && c.isWitnessEnabled() { + c.switchWitnessCache.UpdateTTL(c.cluster.GetOpts().GetSwitchWitnessInterval()) + if c.switchWitnessCache.Exists(region.GetID()) { + checkerCounter.WithLabelValues("rule_checker", "recently-promote-to-non-witness").Inc() + return nil, nil + } + if len(region.GetPendingPeers()) > 0 { + checkerCounter.WithLabelValues("rule_checker", "cancel-switch-to-witness").Inc() + return nil, nil + } lv := "set-voter-witness" if core.IsLearner(peer) { lv = "set-learner-witness" } checkerCounter.WithLabelValues("rule_checker", lv).Inc() return operator.CreateWitnessPeerOperator("fix-witness-peer", c.cluster, region, peer) - } else if core.IsWitness(peer) && !rf.Rule.IsWitness { + } else if core.IsWitness(peer) && (!rf.Rule.IsWitness || !c.isWitnessEnabled()) { lv := "set-voter-non-witness" if core.IsLearner(peer) { lv = "set-learner-non-witness" @@ -327,7 +377,11 @@ func (c *RuleChecker) fixBetterLocation(region *core.RegionInfo, rf *placement.R return nil, nil } checkerCounter.WithLabelValues("rule_checker", "move-to-better-location").Inc() - newPeer := &metapb.Peer{StoreId: newStore, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: rf.Rule.IsWitness} + isWitness := rf.Rule.IsWitness + if !c.isWitnessEnabled() { + isWitness = false + } + newPeer := &metapb.Peer{StoreId: newStore, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: isWitness} return operator.CreateMovePeerOperator("move-to-better-location", c.cluster, region, operator.OpReplica, oldStore, newPeer) } @@ -364,24 +418,24 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg func (c *RuleChecker) isDownPeer(region *core.RegionInfo, peer *metapb.Peer) bool { for _, stats := range region.GetDownPeers() { - if stats.GetPeer().GetId() != peer.GetId() { - continue - } - storeID := peer.GetStoreId() - store := c.cluster.GetStore(storeID) - if store == nil { - log.Warn("lost the store, maybe you are recovering the PD cluster", zap.Uint64("store-id", storeID)) - return false - } - // Only consider the state of the Store, not `stats.DownSeconds`. - if store.DownTime() < c.cluster.GetOpts().GetMaxStoreDownTime() { - continue + if stats.GetPeer().GetId() == peer.GetId() { + storeID := peer.GetStoreId() + store := c.cluster.GetStore(storeID) + if store == nil { + log.Warn("lost the store, maybe you are recovering the PD cluster", zap.Uint64("store-id", storeID)) + return false + } + return true } - return true } return false } +func (c *RuleChecker) isStoreDownTimeHitMaxDownTime(storeID uint64) bool { + store := c.cluster.GetStore(storeID) + return store.DownTime() >= c.cluster.GetOpts().GetMaxStoreDownTime() +} + func (c *RuleChecker) isOfflinePeer(peer *metapb.Peer) bool { store := c.cluster.GetStore(peer.GetStoreId()) if store == nil { @@ -391,6 +445,28 @@ func (c *RuleChecker) isOfflinePeer(peer *metapb.Peer) bool { return !store.IsPreparing() && !store.IsServing() } +func (c *RuleChecker) hasAvailableWitness(region *core.RegionInfo, peer *metapb.Peer) (*metapb.Peer, bool) { + witnesses := region.GetWitnesses() + if len(witnesses) == 0 { + return nil, false + } + isAvailable := func(downPeers []*pdpb.PeerStats, witness *metapb.Peer) bool { + for _, stats := range downPeers { + if stats.GetPeer().GetId() == witness.GetId() { + return false + } + } + return c.cluster.GetStore(witness.GetStoreId()) != nil + } + downPeers := region.GetDownPeers() + for _, witness := range witnesses { + if witness.GetId() != peer.GetId() && isAvailable(downPeers, witness) { + return witness, true + } + } + return nil, false +} + func (c *RuleChecker) strategy(region *core.RegionInfo, rule *placement.Rule) *ReplicaStrategy { return &ReplicaStrategy{ checkerName: c.name, diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index eab19933364b..3778b78e4172 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -16,13 +16,16 @@ package checker import ( "context" + "fmt" "testing" + "time" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/cache" + "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mock/mockcluster" "github.com/tikv/pd/pkg/testutil" "github.com/tikv/pd/server/config" @@ -49,10 +52,12 @@ func (suite *ruleCheckerTestSuite) SetupTest() { cfg := config.NewTestOptions() suite.ctx, suite.cancel = context.WithCancel(context.Background()) suite.cluster = mockcluster.NewCluster(suite.ctx, cfg) - suite.cluster.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) + suite.cluster.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.SwitchWitness)) suite.cluster.SetEnablePlacementRules(true) + suite.cluster.SetEnableSwitchWitness(true) + suite.cluster.SetEnableUseJointConsensus(false) suite.ruleManager = suite.cluster.RuleManager - suite.rc = NewRuleChecker(suite.cluster, suite.ruleManager, cache.NewDefaultCache(10)) + suite.rc = NewRuleChecker(suite.ctx, suite.cluster, suite.ruleManager, cache.NewDefaultCache(10)) } func (suite *ruleCheckerTestSuite) TearDownTest() { @@ -416,7 +421,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness5() { suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) suite.cluster.AddLeaderRegion(1, 1, 2, 3) - suite.ruleManager.SetRule(&placement.Rule{ + err := suite.ruleManager.SetRule(&placement.Rule{ GroupID: "pd", ID: "r1", Index: 100, @@ -428,8 +433,86 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness5() { {Key: "A", Op: "In", Values: []string{"leader"}}, }, }) + suite.Error(err) + suite.Equal(errs.ErrRuleContent.FastGenByArgs(fmt.Sprintf("define multiple witness by count %d", 2)).Error(), err.Error()) +} + +func (suite *ruleCheckerTestSuite) TestFixRuleWitness6() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: "pd", + ID: "default", + Index: 100, + Role: placement.Voter, + IsWitness: false, + Count: 2, + }, + { + GroupID: "pd", + ID: "r1", + Index: 100, + Role: placement.Voter, + Count: 1, + IsWitness: true, + LabelConstraints: []placement.LabelConstraint{ + {Key: "C", Op: "in", Values: []string{"voter"}}, + }, + }, + }) + suite.NoError(err) + + suite.rc.RecordRegionPromoteToNonWitness(1) op := suite.rc.Check(suite.cluster.GetRegion(1)) suite.Nil(op) + + suite.rc.switchWitnessCache.Remove(1) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) +} + +func (suite *ruleCheckerTestSuite) TestDisableWitness() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: "pd", + ID: "default", + Index: 100, + Role: placement.Voter, + IsWitness: false, + Count: 2, + }, + { + GroupID: "pd", + ID: "r1", + Index: 100, + Role: placement.Voter, + Count: 1, + IsWitness: true, + LabelConstraints: []placement.LabelConstraint{ + {Key: "C", Op: "in", Values: []string{"voter"}}, + }, + }, + }) + suite.NoError(err) + + r := suite.cluster.GetRegion(1) + r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) + + op := suite.rc.Check(r) + suite.Nil(op) + + suite.cluster.SetEnableSwitchWitness(false) + op = suite.rc.Check(r) + suite.NotNil(op) } func (suite *ruleCheckerTestSuite) TestBetterReplacement() { @@ -718,6 +801,188 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeer() { suite.Nil(suite.rc.Check(region)) } +func (suite *ruleCheckerTestSuite) TestFixDownPeerWithNoWitness() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + suite.cluster.SetStoreDown(2) + suite.cluster.GetStore(2).GetMeta().LastHeartbeat = time.Now().Add(-11 * time.Minute).UnixNano() + r := suite.cluster.GetRegion(1) + // set peer2 to down + r = r.Clone(core.WithDownPeers([]*pdpb.PeerStats{{Peer: r.GetStorePeer(2), DownSeconds: 600}})) + suite.Nil(suite.rc.Check(r)) +} + +func (suite *ruleCheckerTestSuite) TestFixDownWitnessPeer() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + suite.cluster.SetStoreDown(2) + suite.cluster.GetStore(2).GetMeta().LastHeartbeat = time.Now().Add(-11 * time.Minute).UnixNano() + r := suite.cluster.GetRegion(1) + // set peer2 to down + r = r.Clone(core.WithDownPeers([]*pdpb.PeerStats{{Peer: r.GetStorePeer(2), DownSeconds: 600}})) + // set peer2 to witness + r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(2)})) + + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 2, + }) + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Role: placement.Voter, + Count: 1, + IsWitness: true, + }) + suite.Nil(suite.rc.Check(r)) + + suite.cluster.GetStore(2).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + suite.Nil(suite.rc.Check(r)) +} + +func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + suite.cluster.SetStoreDown(2) + suite.cluster.GetStore(2).GetMeta().LastHeartbeat = time.Now().Add(-11 * time.Minute).UnixNano() + r := suite.cluster.GetRegion(1) + // set peer2 to down + r = r.Clone(core.WithDownPeers([]*pdpb.PeerStats{{Peer: r.GetStorePeer(2), DownSeconds: 600}})) + // set peer3 to witness + r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) + + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 2, + }) + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Role: placement.Voter, + Count: 1, + IsWitness: true, + }) + + op := suite.rc.Check(r) + + suite.NotNil(op) + suite.Equal("promote-witness", op.Desc()) + suite.Equal(uint64(3), op.Step(0).(operator.RemovePeer).FromStore) + suite.Equal(uint64(3), op.Step(1).(operator.AddLearner).ToStore) + suite.Equal(uint64(3), op.Step(2).(operator.BecomeNonWitness).StoreID) + suite.Equal(uint64(3), op.Step(3).(operator.PromoteLearner).ToStore) +} + +func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness2() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + suite.cluster.SetStoreDown(2) + suite.cluster.GetStore(2).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + r := suite.cluster.GetRegion(1) + // set peer2 to down + r = r.Clone(core.WithDownPeers([]*pdpb.PeerStats{{Peer: r.GetStorePeer(2), DownSeconds: 6000}})) + // set peer3 to witness + r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) + + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 2, + }) + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Role: placement.Voter, + Count: 1, + IsWitness: true, + }) + + op := suite.rc.Check(r) + + suite.Nil(op) +} + +func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness3() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + suite.cluster.SetStoreDown(2) + suite.cluster.GetStore(2).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + r := suite.cluster.GetRegion(1) + // set peer2 to down + r = r.Clone(core.WithDownPeers([]*pdpb.PeerStats{{Peer: r.GetStorePeer(2), DownSeconds: 6000}})) + // set peer3 to witness + r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) + + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 2, + }) + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Role: placement.Voter, + Count: 1, + IsWitness: true, + }) + + op := suite.rc.Check(r) + + suite.NotNil(op) + suite.Equal("replace-rule-down-peer", op.Desc()) + suite.Equal(uint64(4), op.Step(0).(operator.AddLearner).ToStore) + suite.True(op.Step(0).(operator.AddLearner).IsWitness) + suite.Equal(uint64(4), op.Step(1).(operator.PromoteLearner).ToStore) + suite.True(op.Step(1).(operator.PromoteLearner).IsWitness) + suite.Equal(uint64(2), op.Step(2).(operator.RemovePeer).FromStore) +} + +func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness4() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3) + + suite.cluster.SetStoreDown(2) + suite.cluster.GetStore(2).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + r := suite.cluster.GetRegion(1) + // set peer2 to down + r = r.Clone(core.WithDownPeers([]*pdpb.PeerStats{{Peer: r.GetStorePeer(2), DownSeconds: 6000}})) + + op := suite.rc.Check(r) + + suite.NotNil(op) + suite.Equal("replace-rule-down-peer", op.Desc()) + suite.Equal(uint64(4), op.Step(0).(operator.AddLearner).ToStore) + suite.True(op.Step(0).(operator.AddLearner).IsWitness) + suite.Equal(uint64(4), op.Step(1).(operator.PromoteLearner).ToStore) + suite.True(op.Step(1).(operator.PromoteLearner).IsWitness) + suite.Equal(uint64(2), op.Step(2).(operator.RemovePeer).FromStore) +} + // See issue: https://github.com/tikv/pd/issues/3705 func (suite *ruleCheckerTestSuite) TestFixOfflinePeer() { suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) @@ -751,6 +1016,37 @@ func (suite *ruleCheckerTestSuite) TestFixOfflinePeer() { suite.Nil(suite.rc.Check(region)) } +func (suite *ruleCheckerTestSuite) TestFixOfflinePeerWithAvaliableWitness() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z2"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) + suite.cluster.AddLeaderRegion(1, 1, 3, 4) + + r := suite.cluster.GetRegion(1) + r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(2)})) + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "default", + Role: placement.Voter, + Count: 2, + }) + suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Role: placement.Voter, + Count: 1, + IsWitness: true, + }) + suite.Nil(suite.rc.Check(r)) + + suite.cluster.SetStoreOffline(4) + op := suite.rc.Check(r) + suite.NotNil(op) + suite.Equal("replace-rule-offline-peer", op.Desc()) +} + func (suite *ruleCheckerTestSuite) TestRuleCache() { suite.cluster.PersistOptions.SetPlacementRulesCacheEnabled(true) suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) diff --git a/server/schedule/cluster.go b/server/schedule/cluster.go index 567ca9c96ddb..b18ba040b225 100644 --- a/server/schedule/cluster.go +++ b/server/schedule/cluster.go @@ -36,4 +36,5 @@ type Cluster interface { RemoveScheduler(name string) error AddSuspectRegions(ids ...uint64) SetHotPendingInfluenceMetrics(storeLabel, rwTy, dim string, load float64) + RecordOpStepWithTTL(regionID uint64) } diff --git a/server/schedule/operator/builder.go b/server/schedule/operator/builder.go index 5eb74e047d5e..4dfb98324e52 100644 --- a/server/schedule/operator/builder.go +++ b/server/schedule/operator/builder.go @@ -77,12 +77,12 @@ type Builder struct { forceTargetLeader bool // intermediate states - currentPeers peersMap - currentLeaderStoreID uint64 - toAdd, toRemove, toPromote, toDemote peersMap - toWitness, toNonWitness, toPromoteAfterSwitchToNonWitness peersMap - steps []OpStep // generated steps. - peerAddStep map[uint64]int // record at which step a peer is created. + currentPeers peersMap + currentLeaderStoreID uint64 + toAdd, toRemove, toPromote, toDemote peersMap + toWitness, toNonWitness, toPromoteNonWitness peersMap + steps []OpStep // generated steps. + peerAddStep map[uint64]int // record at which step a peer is created. // comparison function stepPlanPreferFuncs []func(stepPlan) int // for buildStepsWithoutJointConsensus @@ -425,7 +425,7 @@ func (b *Builder) prepareBuild() (string, error) { b.toDemote = newPeersMap() b.toWitness = newPeersMap() b.toNonWitness = newPeersMap() - b.toPromoteAfterSwitchToNonWitness = newPeersMap() + b.toPromoteNonWitness = newPeersMap() voterCount := 0 for _, peer := range b.targetPeers { @@ -438,7 +438,7 @@ func (b *Builder) prepareBuild() (string, error) { } // Diff `originPeers` and `targetPeers` to initialize `toAdd`, `toRemove`, `toPromote`, `toDemote`, - // `toWitness`, `toNonWitness`, `toPromoteAfterSwitchToNonWitness`. + // `toWitness`, `toNonWitness`, `toPromoteNonWitness`. // Note: Use `toDemote` only when `useJointConsensus` is true. Otherwise use `toAdd`, `toRemove` instead. for _, o := range b.originPeers { n := b.targetPeers[o.GetStoreId()] @@ -465,7 +465,7 @@ func (b *Builder) prepareBuild() (string, error) { if !core.IsLearner(n) { n.Role = metapb.PeerRole_Learner n.IsWitness = true - b.toPromoteAfterSwitchToNonWitness.Set(n) + b.toPromoteNonWitness.Set(n) } b.toNonWitness.Set(n) } else if !isOriginPeerWitness && isTargetPeerWitness { @@ -526,7 +526,7 @@ func (b *Builder) prepareBuild() (string, error) { // Although switch witness may have nothing to do with conf change (except switch witness voter to non-witness voter: // it will domote to learner first, then switch witness, finally promote the non-witness learner to voter back), // the logic here is reused for batch switch. - if len(b.toAdd)+len(b.toRemove)+len(b.toPromote)+len(b.toWitness)+len(b.toNonWitness)+len(b.toPromoteAfterSwitchToNonWitness) <= 1 && + if len(b.toAdd)+len(b.toRemove)+len(b.toPromote)+len(b.toWitness)+len(b.toNonWitness)+len(b.toPromoteNonWitness) <= 1 && len(b.toDemote) == 0 && !(len(b.toRemove) == 1 && len(b.targetPeers) == 1) { // If only one peer changed and the change type is not demote, joint consensus is not used. // Unless the changed is 2 voters to 1 voter, see https://github.com/tikv/pd/issues/4411 . @@ -634,12 +634,13 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) { b.execBatchSwitchWitnesses() - for _, promote := range b.toPromoteAfterSwitchToNonWitness.IDs() { - peer := b.toPromoteAfterSwitchToNonWitness[promote] + for _, promote := range b.toPromoteNonWitness.IDs() { + peer := b.toPromoteNonWitness[promote] + peer.IsWitness = false b.toPromote.Set(peer) kind |= OpRegion } - b.toPromoteAfterSwitchToNonWitness = newPeersMap() + b.toPromoteNonWitness = newPeersMap() b.execChangePeerV2(true, false) return kind, nil @@ -709,7 +710,8 @@ func (b *Builder) preferOldPeerAsLeader(targetLeaderStoreID uint64) int { func (b *Builder) buildStepsWithoutJointConsensus(kind OpKind) (OpKind, error) { b.initStepPlanPreferFuncs() - for len(b.toAdd) > 0 || len(b.toRemove) > 0 || len(b.toPromote) > 0 || len(b.toDemote) > 0 || len(b.toNonWitness) > 0 || len(b.toWitness) > 0 { + for len(b.toAdd) > 0 || len(b.toRemove) > 0 || len(b.toPromote) > 0 || len(b.toDemote) > 0 || + len(b.toNonWitness) > 0 || len(b.toPromoteNonWitness) > 0 || len(b.toWitness) > 0 { plan := b.peerPlan() if plan.IsEmpty() { return kind, errors.New("fail to build operator: plan is empty, maybe no valid leader") @@ -733,14 +735,17 @@ func (b *Builder) buildStepsWithoutJointConsensus(kind OpKind) (OpKind, error) { b.execRemovePeer(plan.remove) kind |= OpRegion } - if plan.witness != nil { - b.execSwitchToWitness(plan.witness) - kind |= OpRegion - } if plan.nonWitness != nil { b.execSwitchToNonWitness(plan.nonWitness) kind |= OpRegion } + if plan.promoteNonWitness != nil { + b.execPromoteNonWitness(plan.promoteNonWitness) + } + if plan.witness != nil { + b.execSwitchToWitness(plan.witness) + kind |= OpRegion + } } b.setTargetLeaderIfNotExist() @@ -770,6 +775,12 @@ func (b *Builder) execPromoteLearner(peer *metapb.Peer) { delete(b.toPromote, peer.GetStoreId()) } +func (b *Builder) execPromoteNonWitness(peer *metapb.Peer) { + b.steps = append(b.steps, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: false}) + b.currentPeers.Set(peer) + delete(b.toPromoteNonWitness, peer.GetStoreId()) +} + func (b *Builder) execAddPeer(peer *metapb.Peer) { if b.lightWeight { b.steps = append(b.steps, AddLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsLightWeight: b.lightWeight, IsWitness: peer.GetIsWitness()}) @@ -816,7 +827,7 @@ func (b *Builder) execChangePeerV2(needEnter bool, needTransferLeader bool) { for _, p := range b.toPromote.IDs() { peer := b.toPromote[p] - step.PromoteLearners = append(step.PromoteLearners, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId()}) + step.PromoteLearners = append(step.PromoteLearners, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) b.currentPeers.Set(peer) } b.toPromote = newPeersMap() @@ -886,6 +897,10 @@ func (b *Builder) allowLeader(peer *metapb.Peer, ignoreClusterLimit bool) bool { return false } + if peer.IsWitness { + return false + } + // store does not exist if peer.GetStoreId() == b.currentLeaderStoreID { return true @@ -938,17 +953,18 @@ type stepPlan struct { remove *metapb.Peer promote *metapb.Peer demote *metapb.Peer - witness *metapb.Peer nonWitness *metapb.Peer + promoteNonWitness *metapb.Peer + witness *metapb.Peer } func (p stepPlan) String() string { - return fmt.Sprintf("stepPlan{leaderBeforeAdd=%v,add={%s},promote={%s},leaderBeforeRemove=%v,demote={%s},remove={%s},witness={%s},nonWitness={%s}}", - p.leaderBeforeAdd, p.add, p.promote, p.leaderBeforeRemove, p.demote, p.remove, p.witness, p.nonWitness) + return fmt.Sprintf("stepPlan{leaderBeforeAdd=%v,add={%s},promote={%s},leaderBeforeRemove=%v,demote={%s},remove={%s},nonWitness={%s},promoteNonWitness={%s},witness={%s}}", + p.leaderBeforeAdd, p.add, p.promote, p.leaderBeforeRemove, p.demote, p.remove, p.nonWitness, p.promoteNonWitness, p.witness) } func (p stepPlan) IsEmpty() bool { - return p.promote == nil && p.demote == nil && p.add == nil && p.remove == nil && p.witness == nil && p.nonWitness == nil + return p.promote == nil && p.demote == nil && p.add == nil && p.remove == nil && p.nonWitness == nil && p.promoteNonWitness == nil && p.witness == nil } func (b *Builder) peerPlan() stepPlan { @@ -969,10 +985,13 @@ func (b *Builder) peerPlan() stepPlan { if p := b.planAddPeer(); !p.IsEmpty() { return p } - if p := b.planWitness(); !p.IsEmpty() { + if p := b.planNonWitness(); !p.IsEmpty() { return p } - if p := b.planNonWitness(); !p.IsEmpty() { + if p := b.planPromoteNonWitness(); !p.IsEmpty() { + return p + } + if p := b.planWitness(); !p.IsEmpty() { return p } return stepPlan{} @@ -1113,6 +1132,14 @@ func (b *Builder) planNonWitness() stepPlan { return stepPlan{} } +func (b *Builder) planPromoteNonWitness() stepPlan { + for _, i := range b.toPromoteNonWitness.IDs() { + peer := b.toPromoteNonWitness[i] + return stepPlan{promoteNonWitness: peer} + } + return stepPlan{} +} + func (b *Builder) planAddPeer() stepPlan { var best stepPlan for _, i := range b.toAdd.IDs() { diff --git a/server/schedule/operator/create_operator_test.go b/server/schedule/operator/create_operator_test.go index 0ae0c7b0b94e..33f6658af12a 100644 --- a/server/schedule/operator/create_operator_test.go +++ b/server/schedule/operator/create_operator_test.go @@ -249,14 +249,14 @@ func (suite *createOperatorTestSuite) TestCreateMergeRegionOperator() { OpMerge | OpRegion, false, []OpStep{ - AddLearner{ToStore: 3}, + AddLearner{ToStore: 3, IsWitness: true}, ChangePeerV2Enter{ - PromoteLearners: []PromoteLearner{{ToStore: 3}}, - DemoteVoters: []DemoteVoter{{ToStore: 2}}, + PromoteLearners: []PromoteLearner{{ToStore: 3, IsWitness: true}}, + DemoteVoters: []DemoteVoter{{ToStore: 2, IsWitness: true}}, }, ChangePeerV2Leave{ - PromoteLearners: []PromoteLearner{{ToStore: 3}}, - DemoteVoters: []DemoteVoter{{ToStore: 2}}, + PromoteLearners: []PromoteLearner{{ToStore: 3, IsWitness: true}}, + DemoteVoters: []DemoteVoter{{ToStore: 2, IsWitness: true}}, }, RemovePeer{FromStore: 2}, }, @@ -313,6 +313,7 @@ func (suite *createOperatorTestSuite) TestCreateMergeRegionOperator() { suite.Equal(expectedSteps[i].(TransferLeader).ToStore, step.ToStore) case AddLearner: suite.Equal(expectedSteps[i].(AddLearner).ToStore, step.ToStore) + suite.Equal(expectedSteps[i].(AddLearner).IsWitness, step.IsWitness) case RemovePeer: suite.Equal(expectedSteps[i].(RemovePeer).FromStore, step.FromStore) case ChangePeerV2Enter: @@ -320,18 +321,22 @@ func (suite *createOperatorTestSuite) TestCreateMergeRegionOperator() { suite.Len(step.DemoteVoters, len(expectedSteps[i].(ChangePeerV2Enter).DemoteVoters)) for j, p := range expectedSteps[i].(ChangePeerV2Enter).PromoteLearners { suite.Equal(p.ToStore, step.PromoteLearners[j].ToStore) + suite.Equal(p.IsWitness, step.PromoteLearners[j].IsWitness) } for j, d := range expectedSteps[i].(ChangePeerV2Enter).DemoteVoters { suite.Equal(d.ToStore, step.DemoteVoters[j].ToStore) + suite.Equal(d.IsWitness, step.DemoteVoters[j].IsWitness) } case ChangePeerV2Leave: suite.Len(step.PromoteLearners, len(expectedSteps[i].(ChangePeerV2Leave).PromoteLearners)) suite.Len(step.DemoteVoters, len(expectedSteps[i].(ChangePeerV2Leave).DemoteVoters)) for j, p := range expectedSteps[i].(ChangePeerV2Leave).PromoteLearners { suite.Equal(p.ToStore, step.PromoteLearners[j].ToStore) + suite.Equal(p.IsWitness, step.PromoteLearners[j].IsWitness) } for j, d := range expectedSteps[i].(ChangePeerV2Leave).DemoteVoters { suite.Equal(d.ToStore, step.DemoteVoters[j].ToStore) + suite.Equal(d.IsWitness, step.DemoteVoters[j].IsWitness) } case MergeRegion: suite.Equal(expectedSteps[i].(MergeRegion), step) diff --git a/server/schedule/operator/operator.go b/server/schedule/operator/operator.go index c673dfd5199b..3fae9d86eea2 100644 --- a/server/schedule/operator/operator.go +++ b/server/schedule/operator/operator.go @@ -250,6 +250,18 @@ func (o *Operator) Step(i int) OpStep { return nil } +// ContainNonWitnessStep returns true if it contains the target OpStep +func (o *Operator) ContainNonWitnessStep() bool { + for _, step := range o.steps { + switch step.(type) { + case BecomeNonWitness: + return true + default: + } + } + return false +} + // getCurrentTimeAndStep returns the start time of the i-th step. // opStep is nil if the i-th step is not found. func (o *Operator) getCurrentTimeAndStep() (startTime time.Time, opStep OpStep) { diff --git a/server/schedule/operator_controller.go b/server/schedule/operator_controller.go index 4fd806ee28ca..675443a883ac 100644 --- a/server/schedule/operator_controller.go +++ b/server/schedule/operator_controller.go @@ -118,6 +118,9 @@ func (oc *OperatorController) Dispatch(region *core.RegionInfo, source string) { } oc.SendScheduleCommand(region, step, source) case operator.SUCCESS: + if op.ContainNonWitnessStep() { + oc.cluster.RecordOpStepWithTTL(op.RegionID()) + } if oc.RemoveOperator(op) { operatorWaitCounter.WithLabelValues(op.Desc(), "promote-success").Inc() oc.PromoteWaitingOperator() diff --git a/server/schedule/placement/fit.go b/server/schedule/placement/fit.go index ac8df67854cd..4270ba25eb30 100644 --- a/server/schedule/placement/fit.go +++ b/server/schedule/placement/fit.go @@ -170,19 +170,20 @@ type StoreSet interface { } // fitRegion tries to fit peers of a region to the rules. -func fitRegion(stores []*core.StoreInfo, region *core.RegionInfo, rules []*Rule) *RegionFit { - w := newFitWorker(stores, region, rules) +func fitRegion(stores []*core.StoreInfo, region *core.RegionInfo, rules []*Rule, supportWitness bool) *RegionFit { + w := newFitWorker(stores, region, rules, supportWitness) w.run() return &w.bestFit } type fitWorker struct { - stores []*core.StoreInfo - bestFit RegionFit // update during execution - peers []*fitPeer // p.selected is updated during execution. - rules []*Rule - needIsolation bool - exit bool + stores []*core.StoreInfo + bestFit RegionFit // update during execution + peers []*fitPeer // p.selected is updated during execution. + rules []*Rule + supportWitness bool + needIsolation bool + exit bool } type fitPeerOpt func(peer *fitPeer) @@ -210,7 +211,7 @@ func newFitPeer(stores []*core.StoreInfo, region *core.RegionInfo, fitPeers []*m return peers } -func newFitWorker(stores []*core.StoreInfo, region *core.RegionInfo, rules []*Rule) *fitWorker { +func newFitWorker(stores []*core.StoreInfo, region *core.RegionInfo, rules []*Rule, supportWitness bool) *fitWorker { peers := newFitPeer(stores, region, region.GetPeers()) // Sort peers to keep the match result deterministic. sort.Slice(peers, func(i, j int) bool { @@ -219,11 +220,12 @@ func newFitWorker(stores []*core.StoreInfo, region *core.RegionInfo, rules []*Ru return si > sj || (si == sj && peers[i].GetId() < peers[j].GetId()) }) return &fitWorker{ - stores: stores, - bestFit: RegionFit{RuleFits: make([]*RuleFit, len(rules))}, - peers: peers, - needIsolation: needIsolation(rules), - rules: rules, + stores: stores, + bestFit: RegionFit{RuleFits: make([]*RuleFit, len(rules))}, + peers: peers, + needIsolation: needIsolation(rules), + rules: rules, + supportWitness: supportWitness, } } @@ -322,7 +324,7 @@ func unSelectPeers(seleted []*fitPeer) { // compareBest checks if the selected peers is better then previous best. // Returns true if it replaces `bestFit` with a better alternative. func (w *fitWorker) compareBest(selected []*fitPeer, index int) bool { - rf := newRuleFit(w.rules[index], selected) + rf := newRuleFit(w.rules[index], selected, w.supportWitness) cmp := 1 if best := w.bestFit.RuleFits[index]; best != nil { cmp = compareRuleFit(rf, best) @@ -360,11 +362,13 @@ func (w *fitWorker) updateOrphanPeers(index int) { } } -func newRuleFit(rule *Rule, peers []*fitPeer) *RuleFit { +func newRuleFit(rule *Rule, peers []*fitPeer, supportWitness bool) *RuleFit { rf := &RuleFit{Rule: rule, IsolationScore: isolationScore(peers, rule.LocationLabels)} for _, p := range peers { rf.Peers = append(rf.Peers, p.Peer) - if !p.matchRoleStrict(rule.Role) || p.IsWitness != rule.IsWitness { + if !p.matchRoleStrict(rule.Role) || + (supportWitness && (p.IsWitness != rule.IsWitness)) || + (!supportWitness && p.IsWitness) { rf.PeersWithDifferentRole = append(rf.PeersWithDifferentRole, p.Peer) } } diff --git a/server/schedule/placement/fit_region_test.go b/server/schedule/placement/fit_region_test.go index 42d3cb6f3a53..c8a5f2f2f797 100644 --- a/server/schedule/placement/fit_region_test.go +++ b/server/schedule/placement/fit_region_test.go @@ -121,7 +121,7 @@ func BenchmarkFitRegion(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -140,7 +140,7 @@ func BenchmarkFitRegionMoreStores(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -159,7 +159,7 @@ func BenchmarkFitRegionMorePeers(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -185,7 +185,7 @@ func BenchmarkFitRegionMorePeersEquals(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -213,7 +213,7 @@ func BenchmarkFitRegionMorePeersSplitRules(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -241,7 +241,7 @@ func BenchmarkFitRegionMoreVotersSplitRules(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -252,7 +252,7 @@ func BenchmarkFitRegionTiflash(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -279,7 +279,7 @@ func BenchmarkFitRegionCrossRegion(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -343,7 +343,7 @@ func BenchmarkFitRegionWithMoreRulesAndStoreLabels(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } @@ -401,6 +401,6 @@ func BenchmarkFitRegionWithLocationLabels(b *testing.B) { stores := getStoresByRegion(storesSet, region) b.ResetTimer() for i := 0; i < b.N; i++ { - fitRegion(stores, region, rules) + fitRegion(stores, region, rules, false) } } diff --git a/server/schedule/placement/fit_test.go b/server/schedule/placement/fit_test.go index 76f7096551be..ca8ea30d083a 100644 --- a/server/schedule/placement/fit_test.go +++ b/server/schedule/placement/fit_test.go @@ -145,7 +145,7 @@ func TestReplace(t *testing.T) { for _, r := range tc.rules { rules = append(rules, makeRule(r)) } - rf := fitRegion(stores.GetStores(), region, rules) + rf := fitRegion(stores.GetStores(), region, rules, false) rf.regionStores = stores.GetStores() re.Equal(rf.Replace(tc.srcStoreID, stores.GetStore(tc.dstStoreID), region), tc.ok) } @@ -186,7 +186,7 @@ func TestFitRegion(t *testing.T) { for _, r := range testCase.rules { rules = append(rules, makeRule(r)) } - rf := fitRegion(stores.GetStores(), region, rules) + rf := fitRegion(stores.GetStores(), region, rules, false) expects := strings.Split(testCase.fitPeers, "/") for i, f := range rf.RuleFits { re.True(checkPeerMatch(f.Peers, expects[i])) diff --git a/server/schedule/placement/rule_manager.go b/server/schedule/placement/rule_manager.go index 8427eb6a047b..621b072a8472 100644 --- a/server/schedule/placement/rule_manager.go +++ b/server/schedule/placement/rule_manager.go @@ -204,6 +204,9 @@ func (m *RuleManager) adjustRule(r *Rule, groupID string) (err error) { if r.Role == Leader && r.Count > 1 { return errs.ErrRuleContent.FastGenByArgs(fmt.Sprintf("define multiple leaders by count %d", r.Count)) } + if r.IsWitness && r.Count > 1 { + return errs.ErrRuleContent.FastGenByArgs(fmt.Sprintf("define multiple witness by count %d", r.Count)) + } for _, c := range r.LabelConstraints { if !validateOp(c.Op) { return errs.ErrRuleContent.FastGenByArgs(fmt.Sprintf("invalid op %s", c.Op)) @@ -327,7 +330,7 @@ func (m *RuleManager) FitRegion(storeSet StoreSet, region *core.RegionInfo) *Reg return fit } } - fit := fitRegion(regionStores, region, rules) + fit := fitRegion(regionStores, region, rules, m.opt.IsSwitchWitnessAllowed()) fit.regionStores = regionStores fit.rules = rules return fit diff --git a/server/versioninfo/feature.go b/server/versioninfo/feature.go index 8bc88f577993..cf3bdc533a5d 100644 --- a/server/versioninfo/feature.go +++ b/server/versioninfo/feature.go @@ -45,6 +45,8 @@ const ( ConfChangeV2 // HotScheduleWithQuery supports schedule hot region with query info. HotScheduleWithQuery + // SwitchWithess supports switch between witness and non-witness. + SwitchWitness ) var featuresDict = map[Feature]string{ @@ -57,6 +59,7 @@ var featuresDict = map[Feature]string{ Version5_0: "5.0.0", ConfChangeV2: "5.0.0", HotScheduleWithQuery: "5.2.0", + SwitchWitness: "6.5.0", } // MinSupportedVersion returns the minimum support version for the specified feature. From 2a4f9a0dd14c0c9a1fc211b3a84468c635b277a8 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 28 Nov 2022 16:28:00 +0800 Subject: [PATCH 66/67] pkg: use another `Median` (#5734) ref tikv/pd#5692 Signed-off-by: Ryan Leung Co-authored-by: Ti Chi Robot --- go.mod | 5 ++-- go.sum | 9 ++++--- pkg/movingaverage/max_filter.go | 5 ++-- pkg/movingaverage/median_filter.go | 5 ++-- pkg/movingaverage/median_filter_test.go | 32 +++++++++++++++++++++++++ tests/client/go.mod | 5 ++-- tests/client/go.sum | 9 ++++--- 7 files changed, 54 insertions(+), 16 deletions(-) create mode 100644 pkg/movingaverage/median_filter_test.go diff --git a/go.mod b/go.mod index 2bc1c3e793f6..54a1cad61862 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e github.com/coreos/go-semver v0.3.0 github.com/docker/go-units v0.4.0 + github.com/elliotchance/pie/v2 v2.1.0 github.com/gin-gonic/gin v1.7.4 github.com/go-echarts/go-echarts v1.0.0 github.com/gogo/protobuf v1.3.2 @@ -20,7 +21,6 @@ require ( github.com/joho/godotenv v1.4.0 github.com/mattn/go-shellwords v1.0.12 github.com/mgechev/revive v1.0.2 - github.com/montanaflynn/stats v0.5.0 github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c @@ -34,7 +34,7 @@ require ( github.com/sasha-s/go-deadlock v0.2.0 github.com/spf13/cobra v1.0.0 github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.7.0 + github.com/stretchr/testify v1.7.1 github.com/swaggo/http-swagger v0.0.0-20200308142732-58ac5e232fba github.com/swaggo/swag v1.8.3 github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 @@ -155,6 +155,7 @@ require ( go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.7.0 // indirect golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect + golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705 // indirect golang.org/x/image v0.0.0-20200119044424-58c23975cae1 // indirect golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 // indirect diff --git a/go.sum b/go.sum index c29abb89978e..c37ff93b79ae 100644 --- a/go.sum +++ b/go.sum @@ -100,6 +100,8 @@ github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 h1:qk/FSDDxo05w github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= +github.com/elliotchance/pie/v2 v2.1.0 h1:KEVAAzxYxTyFs4hvebFZVzBdEo3YeMzl2HYDWn+P3F4= +github.com/elliotchance/pie/v2 v2.1.0/go.mod h1:18t0dgGFH006g4eVdDtWfgFZPQEgl10IoEO8YWEq3Og= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= @@ -377,8 +379,6 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/montanaflynn/stats v0.5.0 h1:2EkzeTSqBB4V4bJwWrt5gIIrZmpJBcoIRGS2kWLgzmk= -github.com/montanaflynn/stats v0.5.0/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5 h1:BvoENQQU+fZ9uukda/RzCAL/191HHwJA5b13R6diVlY= github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= @@ -513,8 +513,9 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14 h1:PyYN9JH5jY9j6av01SpfRMb+1DWg/i3MbGOKPxJ2wjM= github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14/go.mod h1:gxQT6pBGRuIGunNf/+tSOB5OHvguWi8Tbt82WOkf35E= github.com/swaggo/gin-swagger v1.2.0/go.mod h1:qlH2+W7zXGZkczuL+r2nEBR2JTT+/lX05Nn6vPhc7OI= @@ -626,6 +627,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 h1:kUhD7nTDoI3fVd9G4ORWrbV5NY0liEs/Jg2pv5f+bBA= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705 h1:ba9YlqfDGTTQ5aZ2fwOoQ1hf32QySyQkR6ODGDzHlnE= +golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= golang.org/x/image v0.0.0-20200119044424-58c23975cae1 h1:5h3ngYt7+vXCDZCup/HkCQgW5XwmSvR/nA2JmJ0RErg= golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= diff --git a/pkg/movingaverage/max_filter.go b/pkg/movingaverage/max_filter.go index 0d54eb126622..70bd45f98de5 100644 --- a/pkg/movingaverage/max_filter.go +++ b/pkg/movingaverage/max_filter.go @@ -14,7 +14,7 @@ package movingaverage -import "github.com/montanaflynn/stats" +import "github.com/elliotchance/pie/v2" // MaxFilter works as a maximum filter with specified window size. // There are at most `size` data points for calculating. @@ -47,8 +47,7 @@ func (r *MaxFilter) Get() float64 { if r.count < r.size { records = r.records[:r.count] } - max, _ := stats.Max(records) - return max + return pie.Max(records) } // Reset cleans the data set. diff --git a/pkg/movingaverage/median_filter.go b/pkg/movingaverage/median_filter.go index 921470c1b730..030224c465b1 100644 --- a/pkg/movingaverage/median_filter.go +++ b/pkg/movingaverage/median_filter.go @@ -14,7 +14,7 @@ package movingaverage -import "github.com/montanaflynn/stats" +import "github.com/elliotchance/pie/v2" // MedianFilter works as a median filter with specified window size. // There are at most `size` data points for calculating. @@ -50,8 +50,7 @@ func (r *MedianFilter) Get() float64 { if r.count < r.size { records = r.records[:r.count] } - median, _ := stats.Median(records) - return median + return pie.Median(records) } // Reset cleans the data set. diff --git a/pkg/movingaverage/median_filter_test.go b/pkg/movingaverage/median_filter_test.go new file mode 100644 index 000000000000..59d6738b80b0 --- /dev/null +++ b/pkg/movingaverage/median_filter_test.go @@ -0,0 +1,32 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package movingaverage + +import ( + "testing" +) + +func BenchmarkMedianFilter(b *testing.B) { + data := []float64{2, 1, 3, 4, 1, 1, 3, 3, 2, 0, 5} + + mf := NewMedianFilter(10) + for _, n := range data { + mf.Add(n) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + mf.Get() + } +} diff --git a/tests/client/go.mod b/tests/client/go.mod index dc856461c7f2..3561547b2feb 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -6,7 +6,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 - github.com/stretchr/testify v1.7.0 + github.com/stretchr/testify v1.7.1 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 @@ -38,6 +38,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/docker/go-units v0.4.0 // indirect github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 // indirect + github.com/elliotchance/pie/v2 v2.1.0 // indirect github.com/fogleman/gg v1.3.0 // indirect github.com/gin-contrib/gzip v0.0.1 // indirect github.com/gin-contrib/sse v0.1.0 // indirect @@ -84,7 +85,6 @@ require ( github.com/minio/sio v0.3.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/montanaflynn/stats v0.5.0 // indirect github.com/oleiade/reflections v1.0.1 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/petermattis/goid v0.0.0-20211229010228-4d14c490ee36 // indirect @@ -131,6 +131,7 @@ require ( go.uber.org/multierr v1.7.0 // indirect go.uber.org/zap v1.20.0 // indirect golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect + golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705 // indirect golang.org/x/image v0.0.0-20200119044424-58c23975cae1 // indirect golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 // indirect golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421 // indirect diff --git a/tests/client/go.sum b/tests/client/go.sum index c33df7a43ffa..06aae3a4d5c6 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -88,6 +88,8 @@ github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 h1:qk/FSDDxo05w github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= +github.com/elliotchance/pie/v2 v2.1.0 h1:KEVAAzxYxTyFs4hvebFZVzBdEo3YeMzl2HYDWn+P3F4= +github.com/elliotchance/pie/v2 v2.1.0/go.mod h1:18t0dgGFH006g4eVdDtWfgFZPQEgl10IoEO8YWEq3Og= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= @@ -349,8 +351,6 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/montanaflynn/stats v0.5.0 h1:2EkzeTSqBB4V4bJwWrt5gIIrZmpJBcoIRGS2kWLgzmk= -github.com/montanaflynn/stats v0.5.0/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= @@ -475,8 +475,9 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14 h1:PyYN9JH5jY9j6av01SpfRMb+1DWg/i3MbGOKPxJ2wjM= github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14/go.mod h1:gxQT6pBGRuIGunNf/+tSOB5OHvguWi8Tbt82WOkf35E= github.com/swaggo/gin-swagger v1.2.0/go.mod h1:qlH2+W7zXGZkczuL+r2nEBR2JTT+/lX05Nn6vPhc7OI= @@ -580,6 +581,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 h1:kUhD7nTDoI3fVd9G4ORWrbV5NY0liEs/Jg2pv5f+bBA= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705 h1:ba9YlqfDGTTQ5aZ2fwOoQ1hf32QySyQkR6ODGDzHlnE= +golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= golang.org/x/image v0.0.0-20200119044424-58c23975cae1 h1:5h3ngYt7+vXCDZCup/HkCQgW5XwmSvR/nA2JmJ0RErg= golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= From c6537d9f496fdd819246998d2fc7cce372275211 Mon Sep 17 00:00:00 2001 From: Zwb Date: Tue, 29 Nov 2022 10:17:59 +0800 Subject: [PATCH 67/67] rule_checker: address witness pr commits (#5736) ref tikv/pd#5627, close tikv/pd#5627 rule_checker: address witness pr commits Signed-off-by: Wenbo Zhang --- server/config/config.go | 10 +++++----- server/config/persist_options.go | 12 ++++++------ server/schedule/checker/rule_checker.go | 3 ++- server/schedule/checker/rule_checker_test.go | 4 ++-- server/schedule/placement/rule_manager.go | 2 +- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/server/config/config.go b/server/config/config.go index 88ebefab9043..09ba1ab905d9 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -246,7 +246,7 @@ const ( defaultEnablePlacementRules = true defaultEnableGRPCGateway = true defaultDisableErrorVerbose = true - defaultEnableSwitchWitness = false + defaultEnableWitness = false defaultDashboardAddress = "auto" @@ -774,8 +774,8 @@ type ScheduleConfig struct { // EnableDiagnostic is the the option to enable using diagnostic EnableDiagnostic bool `toml:"enable-diagnostic" json:"enable-diagnostic,string"` - // EnableSwitchWitness is the option to enable using witness - EnableSwitchWitness bool `toml:"enable-switch-witness" json:"enable-switch-witness,string"` + // EnableWitness is the option to enable using witness + EnableWitness bool `toml:"enable-witness" json:"enable-witness,string"` } // Clone returns a cloned scheduling configuration. @@ -890,8 +890,8 @@ func (c *ScheduleConfig) adjust(meta *configMetaData, reloading bool) error { c.EnableDiagnostic = defaultEnableDiagnostic } - if !meta.IsDefined("enable-switch-witness") { - c.EnableSwitchWitness = defaultEnableSwitchWitness + if !meta.IsDefined("enable-witness") { + c.EnableWitness = defaultEnableWitness } // new cluster:v2, old cluster:v1 diff --git a/server/config/persist_options.go b/server/config/persist_options.go index 35d5f5f97c22..6264cb447b23 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -289,15 +289,15 @@ func (o *PersistOptions) SetEnableDiagnostic(enable bool) { o.SetScheduleConfig(v) } -// IsSwitchWitnessAllowed returns whether is enable to use witness. -func (o *PersistOptions) IsSwitchWitnessAllowed() bool { - return o.GetScheduleConfig().EnableSwitchWitness +// IsWitnessAllowed returns whether is enable to use witness. +func (o *PersistOptions) IsWitnessAllowed() bool { + return o.GetScheduleConfig().EnableWitness } -// SetEnableSwitchWitness to set the option for witness. It's only used to test. -func (o *PersistOptions) SetEnableSwitchWitness(enable bool) { +// SetEnableWitness to set the option for witness. It's only used to test. +func (o *PersistOptions) SetEnableWitness(enable bool) { v := o.GetScheduleConfig().Clone() - v.EnableSwitchWitness = enable + v.EnableWitness = enable o.SetScheduleConfig(v) } diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 82fa6c7385c6..43fea08772e5 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -157,7 +157,7 @@ func (c *RuleChecker) RecordRegionPromoteToNonWitness(regionID uint64) { func (c *RuleChecker) isWitnessEnabled() bool { return versioninfo.IsFeatureSupported(c.cluster.GetOpts().GetClusterVersion(), versioninfo.SwitchWitness) && - c.cluster.GetOpts().IsSwitchWitnessAllowed() + c.cluster.GetOpts().IsWitnessAllowed() } func (c *RuleChecker) fixRulePeer(region *core.RegionInfo, fit *placement.RegionFit, rf *placement.RuleFit) (*operator.Operator, error) { @@ -172,6 +172,7 @@ func (c *RuleChecker) fixRulePeer(region *core.RegionInfo, fit *placement.Region checkerCounter.WithLabelValues("rule_checker", "replace-down").Inc() return c.replaceUnexpectRulePeer(region, rf, fit, peer, downStatus) } + // When witness placement rule is enabled, promotes the witness to voter when region has down peer. if c.isWitnessEnabled() { if witness, ok := c.hasAvailableWitness(region, peer); ok { checkerCounter.WithLabelValues("rule_checker", "promote-witness").Inc() diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 3778b78e4172..cf2470c893bd 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -54,7 +54,7 @@ func (suite *ruleCheckerTestSuite) SetupTest() { suite.cluster = mockcluster.NewCluster(suite.ctx, cfg) suite.cluster.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.SwitchWitness)) suite.cluster.SetEnablePlacementRules(true) - suite.cluster.SetEnableSwitchWitness(true) + suite.cluster.SetEnableWitness(true) suite.cluster.SetEnableUseJointConsensus(false) suite.ruleManager = suite.cluster.RuleManager suite.rc = NewRuleChecker(suite.ctx, suite.cluster, suite.ruleManager, cache.NewDefaultCache(10)) @@ -510,7 +510,7 @@ func (suite *ruleCheckerTestSuite) TestDisableWitness() { op := suite.rc.Check(r) suite.Nil(op) - suite.cluster.SetEnableSwitchWitness(false) + suite.cluster.SetEnableWitness(false) op = suite.rc.Check(r) suite.NotNil(op) } diff --git a/server/schedule/placement/rule_manager.go b/server/schedule/placement/rule_manager.go index 621b072a8472..8d12fd11af3d 100644 --- a/server/schedule/placement/rule_manager.go +++ b/server/schedule/placement/rule_manager.go @@ -330,7 +330,7 @@ func (m *RuleManager) FitRegion(storeSet StoreSet, region *core.RegionInfo) *Reg return fit } } - fit := fitRegion(regionStores, region, rules, m.opt.IsSwitchWitnessAllowed()) + fit := fitRegion(regionStores, region, rules, m.opt.IsWitnessAllowed()) fit.regionStores = regionStores fit.rules = rules return fit