From 75190c9967418f2f90c4f1d5701e7fd0155e6830 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 29 Dec 2022 21:12:47 +0800 Subject: [PATCH 01/32] impl resouce manager tenant side client Signed-off-by: Cabinfever_B --- .../resource_manager/tenant_client/client.go | 864 ++++++++++++++++++ .../resource_manager/tenant_client/config.go | 92 ++ .../resource_manager/tenant_client/limiter.go | 527 +++++++++++ .../resource_manager/tenant_client/model.go | 110 +++ 4 files changed, 1593 insertions(+) create mode 100644 pkg/mcs/resource_manager/tenant_client/client.go create mode 100644 pkg/mcs/resource_manager/tenant_client/config.go create mode 100644 pkg/mcs/resource_manager/tenant_client/limiter.go create mode 100644 pkg/mcs/resource_manager/tenant_client/model.go diff --git a/pkg/mcs/resource_manager/tenant_client/client.go b/pkg/mcs/resource_manager/tenant_client/client.go new file mode 100644 index 00000000000..8bf89726c4d --- /dev/null +++ b/pkg/mcs/resource_manager/tenant_client/client.go @@ -0,0 +1,864 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tenantclient + +import ( + "context" + "sync" + "time" + + "github.com/pingcap/errors" + rmpb "github.com/pingcap/kvproto/pkg/resource_manager" + "github.com/pingcap/log" + "go.uber.org/zap" +) + +type TenantSideKVInterceptor interface { + OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) error + OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error +} + +type ResourceGroupProvider interface { + ListResourceGroups(ctx context.Context) ([]*rmpb.ResourceGroup, error) + GetResourceGroup(ctx context.Context, resourceGroupName string) (*rmpb.ResourceGroup, error) + AddResourceGroup(ctx context.Context, resourceGroupName string, settings *rmpb.GroupSettings) (string, error) + ModifyResourceGroup(ctx context.Context, resourceGroupName string, settings *rmpb.GroupSettings) (string, error) + DeleteResourceGroup(ctx context.Context, resourceGroupName string) (string, error) + AcquireTokenBuckets(ctx context.Context, request *rmpb.TokenBucketsRequest) ([]*rmpb.TokenBucketResponse, error) +} + +func NewResourceGroupController( + provider ResourceGroupProvider, +) (*resourceGroupsController, error) { + return newTenantSideCostController(provider) +} + +var _ TenantSideKVInterceptor = (*resourceGroupsController)(nil) + +type resourceGroupsController struct { + instanceFingerprint string + provider ResourceGroupProvider + groupsController sync.Map + config *Config + + // responseChan is used to receive results from token bucket requests, which + // are run in a separate goroutine. A nil response indicates an error. + responseChan chan []*rmpb.TokenBucketResponse + + // lowRUNotifyChan is used when the number of available resource is running low and + // we need to send an early token bucket request. + lowRUNotifyChan chan struct{} + + run struct { + now time.Time + lastRequestTime time.Time + // requestInProgress is true if we are in the process of sending a request; + // it gets set to false when we process the response (in the main loop), + // even in error cases. + requestInProgress bool + + // requestNeedsRetry is set if the last token bucket request encountered an + // error. This triggers a retry attempt on the next tick. + // + // Note: requestNeedsRetry and requestInProgress are never true at the same + // time. + requestNeedsRetry bool + + // targetPeriod stores the value of the TargetPeriodSetting setting at the + // last update. + targetPeriod time.Duration + } +} + +func newTenantSideCostController(provider ResourceGroupProvider) (*resourceGroupsController, error) { + return &resourceGroupsController{ + provider: provider, + config: DefaultConfig(), + lowRUNotifyChan: make(chan struct{}, 1), + responseChan: make(chan []*rmpb.TokenBucketResponse, 1), + }, nil +} + +func (c *resourceGroupsController) Start(ctx context.Context, instanceFingerprint string) error { + if len(instanceFingerprint) == 0 { + return errors.New("invalid SQLInstanceID") + } + c.instanceFingerprint = instanceFingerprint + // just for demo + if err := c.addDemoResourceGroup(ctx); err != nil { + log.Error("add Demo ResourceGroup failed", zap.Error(err)) + } + if err := c.updateAllResourceGroups(ctx); err != nil { + log.Error("update ResourceGroup failed", zap.Error(err)) + } + + c.initRunState(ctx) + go c.mainLoop(ctx) + return nil +} + +func (c *resourceGroupsController) updateAllResourceGroups(ctx context.Context) error { + groups, err := c.provider.ListResourceGroups(ctx) + if err != nil { + return err + } + lastedGroups := make(map[string]struct{}) + for _, group := range groups { + // todo: check add or modify + log.Info("create resource group cost controller", zap.String("name", group.GetName())) + gc := newGroupCostController(ctx, group, c.config, c.lowRUNotifyChan) + c.groupsController.Store(group.GetName(), gc) + lastedGroups[group.GetName()] = struct{}{} + } + c.groupsController.Range(func(key, value any) bool { + resourceGroupName := key.(string) + if _, ok := lastedGroups[resourceGroupName]; !ok { + c.groupsController.Delete(key) + } + return true + }) + return nil +} + +func (c *resourceGroupsController) initRunState(ctx context.Context) { + now := time.Now() + c.run.now = now + c.run.lastRequestTime = now + c.run.targetPeriod = c.config.targetPeriod + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + gc.initRunState(ctx) + return true + }) +} + +func (c *resourceGroupsController) updateRunState(ctx context.Context) { + c.run.now = time.Now() + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + gc.updateRunState(ctx) + return true + }) +} + +func (c *resourceGroupsController) shouldReportConsumption() bool { + if c.run.requestInProgress { + return false + } + timeSinceLastRequest := c.run.now.Sub(c.run.lastRequestTime) + if timeSinceLastRequest >= c.run.targetPeriod { + if timeSinceLastRequest >= extendedReportingPeriodFactor*c.run.targetPeriod { + return true + } + ret := false + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + ret = ret || gc.shouldReportConsumption() + return !ret + }) + } + return false +} + +func (c *resourceGroupsController) updateAvgRequestResourcePerSec(ctx context.Context) { + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + gc.updateAvgRequestResourcePerSec(ctx) + return true + }) +} + +func (c *resourceGroupsController) handleTokenBucketResponse(ctx context.Context, resp []*rmpb.TokenBucketResponse) { + for _, res := range resp { + name := res.GetResourceGroupName() + v, ok := c.groupsController.Load(name) + if !ok { + log.Warn("A non-existent resource group was found when handle token response.", zap.String("name", name)) + } + gc := v.(*groupCostController) + gc.handleTokenBucketResponse(ctx, res) + } +} + +func (c *resourceGroupsController) collectTokenBucketRequests(ctx context.Context, source string, low bool) { + requests := make([]*rmpb.TokenBucketRequst, 0) + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + request := gc.collectRequestAndConsumption(low) + if request != nil { + requests = append(requests, request) + } + return true + }) + if len(requests) > 0 { + c.sendTokenBucketRequests(ctx, requests, source) + } +} + +func (c *resourceGroupsController) sendTokenBucketRequests(ctx context.Context, requests []*rmpb.TokenBucketRequst, source string) { + c.run.requestInProgress = true + req := &rmpb.TokenBucketsRequest{ + Requests: requests, + TargetRequestPeriodMs: uint64(c.config.targetPeriod / time.Millisecond), + } + go func() { + now := time.Now() + log.Info("[tenant controllor] send token bucket request", zap.Time("now", now), zap.Any("req", req.Requests), zap.String("source", source)) + resp, err := c.provider.AcquireTokenBuckets(ctx, req) + if err != nil { + // Don't log any errors caused by the stopper canceling the context. + if !errors.ErrorEqual(err, context.Canceled) { + log.L().Sugar().Infof("TokenBucket RPC error: %v", err) + } + resp = nil + } + log.Info("[tenant controllor] token bucket response", zap.Time("now", time.Now()), zap.Any("resp", resp), zap.String("source", source), zap.Duration("latency", time.Since(now))) + c.responseChan <- resp + }() +} + +func (c *resourceGroupsController) handleTokenBucketTrickEvent(ctx context.Context) { + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + gc.handleTokenBucketTrickEvent(ctx) + return true + }) +} + +func (c *resourceGroupsController) mainLoop(ctx context.Context) { + interval := c.config.groupLoopUpdateInterval + ticker := time.NewTicker(interval) + defer ticker.Stop() + + c.updateRunState(ctx) + c.collectTokenBucketRequests(ctx, "init", false /* select all */) + + for { + select { + case <-ctx.Done(): + return + case resp := <-c.responseChan: + c.run.requestInProgress = false + if resp != nil { + c.updateRunState(ctx) + c.handleTokenBucketResponse(ctx, resp) + } else { + // A nil response indicates a failure (which would have been logged). + c.run.requestNeedsRetry = true + } + case <-ticker.C: + c.updateRunState(ctx) + c.updateAvgRequestResourcePerSec(ctx) + if c.run.requestNeedsRetry || c.shouldReportConsumption() { + c.run.requestNeedsRetry = false + c.collectTokenBucketRequests(ctx, "report", false /* select all */) + } + case <-c.lowRUNotifyChan: + c.updateRunState(ctx) + if !c.run.requestInProgress { + c.collectTokenBucketRequests(ctx, "low_ru", true /* only select low tokens resource group */) + } + default: + c.handleTokenBucketTrickEvent(ctx) + } + + } +} + +func (c *resourceGroupsController) OnRequestWait( + ctx context.Context, resourceGroupName string, info RequestInfo, +) error { + tmp, ok := c.groupsController.Load(resourceGroupName) + if !ok { + return errors.Errorf("[resource group] resourceGroupName %s is not existed.", resourceGroupName) + } + gc := tmp.(*groupCostController) + err := gc.OnRequestWait(ctx, info) + return err +} + +func (c *resourceGroupsController) OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { + tmp, ok := c.groupsController.Load(resourceGroupName) + if !ok { + log.Warn("[resource group] resourceGroupName is not existed.", zap.String("resourceGroupName", resourceGroupName)) + } + gc := tmp.(*groupCostController) + gc.OnResponse(ctx, req, resp) + return nil +} + +type groupCostController struct { + resourceGroupName string + mainCfg *Config + groupSettings *rmpb.GroupSettings + calculators []ResourceCalculator + mode rmpb.GroupMode + + handleRespFunc func(*rmpb.TokenBucketResponse) + + mu struct { + sync.Mutex + requestUnitConsumptions []*rmpb.RequestUnitItem + resourceConsumptions []*rmpb.ResourceItem + } + + lowRUNotifyChan chan struct{} + // run contains the state that is updated by the main loop. + run struct { + now time.Time + + // targetPeriod stores the value of the TargetPeriodSetting setting at the + // last update. + targetPeriod time.Duration + + // consumptions stores the last value of mu.consumption. + requestUnitConsumptions []*rmpb.RequestUnitItem + resourceConsumptions []*rmpb.ResourceItem + + lastRequestUnitConsumptions []*rmpb.RequestUnitItem + lastResourceConsumptions []*rmpb.ResourceItem + + // initialRequestCompleted is set to true when the first token bucket + // request completes successfully. + initialRequestCompleted bool + + resourceTokens map[rmpb.ResourceType]*tokenCounter + requestUnitTokens map[rmpb.RequestUnitType]*tokenCounter + } +} + +type tokenCounter struct { + // avgRUPerSec is an exponentially-weighted moving average of the RU + // consumption per second; used to estimate the RU requirements for the next + // request. + avgRUPerSec float64 + // lastSecRU is the consumption.RU value when avgRUPerSec was last updated. + avgRUPerSecLastRU float64 + avgLastTime time.Time + + setupNotificationCh <-chan time.Time + setupNotificationThreshold float64 + setupNotificationTimer *time.Timer + + lastDeadline time.Time + lastRate float64 + + limiter *Limiter +} + +func newGroupCostController(ctx context.Context, group *rmpb.ResourceGroup, mainCfg *Config, lowRUNotifyChan chan struct{}) *groupCostController { + gc := &groupCostController{ + resourceGroupName: group.GetName(), + mainCfg: mainCfg, + groupSettings: group.Settings, + calculators: []ResourceCalculator{newKVCalculator(mainCfg), newSQLLayerCPUCalculateor(mainCfg)}, + mode: group.Settings.GetMode(), + lowRUNotifyChan: lowRUNotifyChan, + } + + switch gc.mode { + case rmpb.GroupMode_RUMode: + gc.handleRespFunc = gc.handleRUTokenResponse + case rmpb.GroupMode_NativeMode: + gc.handleRespFunc = gc.handleResourceTokenResponse + } + + gc.mu.requestUnitConsumptions = make([]*rmpb.RequestUnitItem, ruLen) + for typ := range gc.mu.requestUnitConsumptions { + gc.mu.requestUnitConsumptions[typ] = &rmpb.RequestUnitItem{ + Type: rmpb.RequestUnitType(typ), + } + } + gc.mu.resourceConsumptions = make([]*rmpb.ResourceItem, resourceLen) + for typ := range gc.mu.resourceConsumptions { + gc.mu.resourceConsumptions[typ] = &rmpb.ResourceItem{ + Type: rmpb.ResourceType(typ), + } + } + return gc +} + +func (gc *groupCostController) initRunState(ctx context.Context) { + now := time.Now() + gc.run.now = now + gc.run.targetPeriod = gc.mainCfg.targetPeriod + + gc.run.requestUnitConsumptions = make([]*rmpb.RequestUnitItem, ruLen) + for typ := range gc.run.requestUnitConsumptions { + gc.run.requestUnitConsumptions[typ] = &rmpb.RequestUnitItem{ + Type: rmpb.RequestUnitType(typ), + } + } + gc.run.resourceConsumptions = make([]*rmpb.ResourceItem, resourceLen) + for typ := range gc.run.resourceConsumptions { + gc.run.resourceConsumptions[typ] = &rmpb.ResourceItem{ + Type: rmpb.ResourceType(typ), + } + } + + gc.run.lastRequestUnitConsumptions = make([]*rmpb.RequestUnitItem, ruLen) + for typ := range gc.run.lastRequestUnitConsumptions { + gc.run.lastRequestUnitConsumptions[typ] = &rmpb.RequestUnitItem{ + Type: rmpb.RequestUnitType(typ), + } + } + gc.run.lastResourceConsumptions = make([]*rmpb.ResourceItem, resourceLen) + for typ := range gc.run.lastResourceConsumptions { + gc.run.lastResourceConsumptions[typ] = &rmpb.ResourceItem{ + Type: rmpb.ResourceType(typ), + } + } + + switch gc.mode { + case rmpb.GroupMode_RUMode: + gc.run.requestUnitTokens = make(map[rmpb.RequestUnitType]*tokenCounter) + for typ := range requestUnitList { + counter := &tokenCounter{ + limiter: NewLimiter(0, initialRquestUnits, gc.lowRUNotifyChan), + avgRUPerSec: initialRquestUnits / gc.run.targetPeriod.Seconds(), + avgLastTime: now, + } + gc.run.requestUnitTokens[typ] = counter + } + case rmpb.GroupMode_NativeMode: + gc.run.resourceTokens = make(map[rmpb.ResourceType]*tokenCounter) + for typ := range requestResourceList { + counter := &tokenCounter{ + limiter: NewLimiter(0, initialRquestUnits, gc.lowRUNotifyChan), + avgRUPerSec: initialRquestUnits / gc.run.targetPeriod.Seconds(), + avgLastTime: now, + } + gc.run.resourceTokens[typ] = counter + } + } +} + +func (gc *groupCostController) updateRunState(ctx context.Context) { + newTime := time.Now() + deltaResource := make(map[rmpb.ResourceType]float64) + deltaRequestUnit := make(map[rmpb.RequestUnitType]float64) + for _, calc := range gc.calculators { + calc.Trickle(deltaResource, deltaRequestUnit, ctx) + } + gc.mu.Lock() + for typ, detail := range deltaRequestUnit { + gc.mu.requestUnitConsumptions[typ].Value += detail + } + copy(gc.run.requestUnitConsumptions, gc.mu.requestUnitConsumptions) + copy(gc.run.resourceConsumptions, gc.mu.resourceConsumptions) + gc.mu.Unlock() + + // remove tokens + switch gc.mode { + case rmpb.GroupMode_RUMode: + for typ, counter := range gc.run.requestUnitTokens { + v, ok := deltaRequestUnit[typ] + if ok { + value := v + counter.limiter.RemoveTokens(newTime, value) + } + } + case rmpb.GroupMode_NativeMode: + for typ, counter := range gc.run.resourceTokens { + v, ok := deltaResource[typ] + if ok { + value := v + counter.limiter.RemoveTokens(newTime, value) + } + } + } + + log.Info("update run state", zap.Any("request unit comsumption", gc.run.requestUnitConsumptions), zap.Any("resource comsumption", gc.run.resourceConsumptions)) + gc.run.now = newTime +} + +func (gc *groupCostController) updateAvgRequestResourcePerSec(ctx context.Context) { + switch gc.mode { + case rmpb.GroupMode_NativeMode: + gc.updateAvgResourcePerSec(ctx) + case rmpb.GroupMode_RUMode: + gc.updateAvgRUPerSec(ctx) + } +} + +func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) { + switch gc.mode { + case rmpb.GroupMode_NativeMode: + for _, counter := range gc.run.resourceTokens { + select { + case <-counter.setupNotificationCh: + counter.setupNotificationTimer = nil + counter.setupNotificationCh = nil + counter.limiter.SetupNotification(gc.run.now, float64(counter.setupNotificationThreshold)) + gc.updateRunState(ctx) + default: + } + } + case rmpb.GroupMode_RUMode: + for _, counter := range gc.run.requestUnitTokens { + select { + case <-counter.setupNotificationCh: + counter.setupNotificationTimer = nil + counter.setupNotificationCh = nil + counter.limiter.SetupNotification(gc.run.now, float64(counter.setupNotificationThreshold)) + gc.updateRunState(ctx) + default: + } + } + } +} + +func (gc *groupCostController) updateAvgResourcePerSec(ctx context.Context) { + for typ, counter := range gc.run.resourceTokens { + if !gc.calcAvg(counter, gc.run.resourceConsumptions[typ].Value) { + continue + } + log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.resourceGroupName), zap.String("type", rmpb.ResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) + } +} + +func (gc *groupCostController) updateAvgRUPerSec(ctx context.Context) { + for typ, counter := range gc.run.requestUnitTokens { + if !gc.calcAvg(counter, gc.run.resourceConsumptions[typ].Value) { + continue + } + log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.resourceGroupName), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) + } +} + +func (gc *groupCostController) calcAvg(counter *tokenCounter, new float64) bool { + deltaDuration := gc.run.now.Sub(counter.avgLastTime) + if deltaDuration <= 10*time.Millisecond { + return false + } + delta := (new - counter.avgRUPerSecLastRU) / deltaDuration.Seconds() + counter.avgRUPerSec = movingAvgFactor*counter.avgRUPerSec + (1-movingAvgFactor)*delta + counter.avgLastTime = gc.run.now + counter.avgRUPerSecLastRU = new + return true +} + +func (gc *groupCostController) shouldReportConsumption() bool { + for typ := range requestUnitList { + if gc.run.requestUnitConsumptions[typ].Value-gc.run.lastRequestUnitConsumptions[typ].Value >= consumptionsReportingThreshold { + return true + } + } + for typ := range requestResourceList { + if gc.run.resourceConsumptions[typ].Value-gc.run.lastResourceConsumptions[typ].Value >= consumptionsReportingThreshold { + return true + } + } + return false +} + +func (gc *groupCostController) handleTokenBucketResponse(ctx context.Context, resp *rmpb.TokenBucketResponse) { + gc.handleRespFunc(resp) + if !gc.run.initialRequestCompleted { + gc.run.initialRequestCompleted = true + // This is the first successful request. Take back the initial RUs that we + // used to pre-fill the bucket. + for _, counter := range gc.run.resourceTokens { + counter.limiter.RemoveTokens(gc.run.now, initialRquestUnits) + } + } +} + +func (gc *groupCostController) handleResourceTokenResponse(resp *rmpb.TokenBucketResponse) { + for _, grantedTB := range resp.GetGrantedResourceTokens() { + typ := grantedTB.GetType() + // todo: check whether grant = 0 + counter, ok := gc.run.resourceTokens[typ] + if !ok { + log.Warn("not support this resource type", zap.String("type", rmpb.ResourceType_name[int32(typ)])) + continue + } + gc.modifyTokenCounter(counter, grantedTB.GetGrantedTokens(), grantedTB.GetTrickleTimeMs()) + } +} + +func (gc *groupCostController) handleRUTokenResponse(resp *rmpb.TokenBucketResponse) { + for _, grantedTB := range resp.GetGrantedRUTokens() { + typ := grantedTB.GetType() + // todo: check whether grant = 0 + counter, ok := gc.run.requestUnitTokens[typ] + if !ok { + log.Warn("not support this resource type", zap.String("type", rmpb.ResourceType_name[int32(typ)])) + continue + } + gc.modifyTokenCounter(counter, grantedTB.GetGrantedTokens(), grantedTB.GetTrickleTimeMs()) + } +} + +func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket *rmpb.TokenBucket, trickleTimeMs int64) { + granted := bucket.Tokens + remainder := 0. + if !counter.lastDeadline.IsZero() { + // If last request came with a trickle duration, we may have RUs that were + // not made available to the bucket yet; throw them together with the newly + // granted RUs. + if since := counter.lastDeadline.Sub(gc.run.now); since > 0 { + remainder = counter.lastRate * since.Seconds() + } + } + if counter.setupNotificationTimer != nil { + counter.setupNotificationTimer.Stop() + counter.setupNotificationTimer = nil + counter.setupNotificationCh = nil + } + notifyThreshold := granted * notifyFraction + if notifyThreshold < bufferRUs { + notifyThreshold = bufferRUs + } + + var cfg tokenBucketReconfigureArgs + if trickleTimeMs == 0 { + cfg.NewTokens = granted + cfg.NewRate = float64(bucket.GetSettings().Fillrate) + cfg.NewBrust = int(granted + 1) + cfg.NotifyThreshold = notifyThreshold + counter.lastDeadline = time.Time{} + } else { + cfg.NewTokens = remainder + trickleDuration := time.Duration(trickleTimeMs) * time.Millisecond + deadline := gc.run.now.Add(trickleDuration) + cfg.NewRate = float64(bucket.GetSettings().Fillrate) + bucket.Tokens/trickleDuration.Seconds() + + timerDuration := trickleDuration - time.Second + if timerDuration <= 0 { + timerDuration = (trickleDuration + time.Second) / 2 + } + log.Info("QQQ2 ", zap.Duration("timerDuration", timerDuration), zap.Float64("cfg.NewRate", cfg.NewRate)) + counter.setupNotificationTimer = time.NewTimer(timerDuration) + counter.setupNotificationCh = counter.setupNotificationTimer.C + counter.setupNotificationThreshold = notifyThreshold + + counter.lastDeadline = deadline + } + counter.lastRate = cfg.NewRate + counter.limiter.Reconfigure(gc.run.now, cfg) +} + +func (gc *groupCostController) collectRequestAndConsumption(low bool) *rmpb.TokenBucketRequst { + req := &rmpb.TokenBucketRequst{ + ResourceGroupName: gc.resourceGroupName, + } + // collect request resource + selected := !low + switch gc.mode { + case rmpb.GroupMode_NativeMode: + requests := make([]*rmpb.ResourceItem, 0, len(requestResourceList)) + for typ, counter := range gc.run.resourceTokens { + if low && counter.limiter.IsLowTokens() { + selected = true + } + request := &rmpb.ResourceItem{ + Type: typ, + Value: gc.calcRequest(counter), + } + requests = append(requests, request) + } + req.Request = &rmpb.TokenBucketRequst_ResourceItems{ + ResourceItems: &rmpb.TokenBucketRequst_RequestResource{ + RequestResource: requests, + }, + } + case rmpb.GroupMode_RUMode: + requests := make([]*rmpb.RequestUnitItem, 0, len(requestUnitList)) + for typ, counter := range gc.run.requestUnitTokens { + if low && counter.limiter.IsLowTokens() { + selected = true + } + request := &rmpb.RequestUnitItem{ + Type: typ, + Value: gc.calcRequest(counter), + } + requests = append(requests, request) + } + req.Request = &rmpb.TokenBucketRequst_RuItems{ + RuItems: &rmpb.TokenBucketRequst_RequestRU{ + RequestRU: requests, + }, + } + } + if !selected { + return nil + } + + // collect resource consumption + deltaResourceConsumption := make([]*rmpb.ResourceItem, resourceLen) + for typ, cons := range gc.run.resourceConsumptions { + deltaResourceConsumption[typ] = &rmpb.ResourceItem{ + Type: rmpb.ResourceType(typ), + Value: Sub(cons.Value, gc.run.lastResourceConsumptions[typ].Value), + } + } + // collect request unit consumption + deltaRequestUnitConsumption := make([]*rmpb.RequestUnitItem, ruLen) + for typ, cons := range gc.run.requestUnitConsumptions { + deltaRequestUnitConsumption[typ] = &rmpb.RequestUnitItem{ + Type: rmpb.RequestUnitType(typ), + Value: Sub(cons.Value, gc.run.lastRequestUnitConsumptions[typ].Value), + } + } + req.ConsumptionResourceSinceLastRequest = deltaResourceConsumption + req.ConsumptionRUSinceLastRequest = deltaRequestUnitConsumption + + copy(gc.run.lastRequestUnitConsumptions, gc.run.requestUnitConsumptions) + copy(gc.run.lastResourceConsumptions, gc.run.resourceConsumptions) + return req +} + +func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { + value := counter.avgRUPerSec*gc.run.targetPeriod.Seconds() + bufferRUs + value -= float64(counter.limiter.AvailableTokens(gc.run.now)) + if value < 0 { + value = 0 + } + return value +} + +func (gc *groupCostController) OnRequestWait( + ctx context.Context, info RequestInfo, +) error { + deltaResource := make(map[rmpb.ResourceType]float64) + deltaRequestUnit := make(map[rmpb.RequestUnitType]float64) + for _, calc := range gc.calculators { + calc.BeforeKVRequest(deltaResource, deltaRequestUnit, info) + } + var wg sync.WaitGroup + var errReturn error + switch gc.mode { + case rmpb.GroupMode_NativeMode: + wg.Add(len(requestResourceList)) + for typ, counter := range gc.run.resourceTokens { + v, ok := deltaResource[typ] + if ok { + go func(value float64, counter *tokenCounter) { + if ok { + err := counter.limiter.WaitN(ctx, int(v)) + if err != nil { + errReturn = err + } + } + wg.Done() + }(v, counter) + } else { + wg.Done() + } + } + wg.Wait() + if errReturn != nil { + return errReturn + } + gc.mu.Lock() + for typ, detail := range deltaResource { + gc.mu.requestUnitConsumptions[typ].Value += detail + } + gc.mu.Unlock() + case rmpb.GroupMode_RUMode: + wg.Add(len(requestUnitList)) + for typ, counter := range gc.run.requestUnitTokens { + v, ok := deltaRequestUnit[typ] + if ok { + go func(value float64, counter *tokenCounter) { + if ok { + err := counter.limiter.WaitN(ctx, int(v)) + if err != nil { + errReturn = err + } + } + wg.Done() + }(v, counter) + } else { + wg.Done() + } + } + wg.Wait() + if errReturn != nil { + return errReturn + } + gc.mu.Lock() + for typ, detail := range deltaRequestUnit { + gc.mu.resourceConsumptions[typ].Value += detail + } + gc.mu.Unlock() + } + + return nil +} + +func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, resp ResponseInfo) { + deltaResource := make(map[rmpb.ResourceType]float64) + deltaRequestUnit := make(map[rmpb.RequestUnitType]float64) + for _, calc := range gc.calculators { + calc.AfterKVRequest(deltaResource, deltaRequestUnit, req, resp) + } + + switch gc.mode { + case rmpb.GroupMode_NativeMode: + for typ, counter := range gc.run.resourceTokens { + v, ok := deltaResource[typ] + if ok { + counter.limiter.RemoveTokens(time.Now(), float64(v)) + } + } + gc.mu.Lock() + for typ, detail := range deltaResource { + gc.mu.requestUnitConsumptions[typ].Value += detail + } + gc.mu.Unlock() + case rmpb.GroupMode_RUMode: + for typ, counter := range gc.run.requestUnitTokens { + v, ok := deltaRequestUnit[typ] + if ok { + counter.limiter.RemoveTokens(time.Now(), float64(v)) + } + } + gc.mu.Lock() + for typ, detail := range deltaRequestUnit { + gc.mu.resourceConsumptions[typ].Value += detail + } + gc.mu.Unlock() + } +} + +func (c *resourceGroupsController) addDemoResourceGroup(ctx context.Context) error { + setting := &rmpb.GroupSettings{ + Mode: rmpb.GroupMode_RUMode, + RUSettings: &rmpb.GroupRequestUnitSettings{ + RRU: &rmpb.TokenBucket{ + Tokens: 200000, + Settings: &rmpb.TokenLimitSettings{ + Fillrate: 2000, + BurstLimit: 20000000, + }, + }, + WRU: &rmpb.TokenBucket{ + Tokens: 200000, + Settings: &rmpb.TokenLimitSettings{ + Fillrate: 20000, + BurstLimit: 2000000, + }, + }, + }, + } + context, err := c.provider.AddResourceGroup(ctx, "demo", setting) + if err != nil { + return err + } + log.Info("add resource group", zap.String("resp", string(context)), zap.Any("setting", setting)) + return err +} diff --git a/pkg/mcs/resource_manager/tenant_client/config.go b/pkg/mcs/resource_manager/tenant_client/config.go new file mode 100644 index 00000000000..67b84c63e36 --- /dev/null +++ b/pkg/mcs/resource_manager/tenant_client/config.go @@ -0,0 +1,92 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tenantclient + +import ( + "time" + + "github.com/pingcap/kvproto/pkg/resource_manager" +) + +var ruLen = len(resource_manager.RequestUnitType_name) +var resourceLen = len(resource_manager.ResourceType_name) +var requestUnitList map[resource_manager.RequestUnitType]struct{} = map[resource_manager.RequestUnitType]struct{}{ + resource_manager.RequestUnitType_RRU: {}, + resource_manager.RequestUnitType_WRU: {}, +} + +var requestResourceList map[resource_manager.ResourceType]struct{} = map[resource_manager.ResourceType]struct{}{ + resource_manager.ResourceType_ReadBytes: {}, + resource_manager.ResourceType_WriteBytes: {}, + resource_manager.ResourceType_TotalCPUTimeMs: {}, +} + +const initialRquestUnits = 10000 + +const bufferRUs = 5000 + +// movingAvgFactor is the weight applied to a new "sample" of RU usage (with one +// sample per mainLoopUpdateInterval). +// +// If we want a factor of 0.5 per second, this should be: +// +// 0.5^(1 second / mainLoopUpdateInterval) +const movingAvgFactor = 0.5 + +const notifyFraction = 0.1 + +const consumptionsReportingThreshold = 100 + +const extendedReportingPeriodFactor = 4 + +const defaultGroupLoopUpdateInterval = 1 * time.Second +const defaultTargetPeriod = 10 * time.Second +const ( + readRequestCost = 1 + readCostPerByte = 0.5 / 1024 / 1024 + writeRequestCost = 5 + writeCostPerByte = 200. / 1024 / 1024 + readCPUMsCost = 1 + writeCPUMsCost = 1 + sqlCPUSecondCost = 0 +) + +type Config struct { + groupLoopUpdateInterval time.Duration + targetPeriod time.Duration + + ReadRequestCost RequestUnit + ReadBytesCost RequestUnit + ReadCPUMsCost RequestUnit + WriteRequestCost RequestUnit + WriteBytesCost RequestUnit + WriteCPUMsCost RequestUnit + SQLCPUSecondCost RequestUnit +} + +func DefaultConfig() *Config { + cfg := &Config{ + groupLoopUpdateInterval: defaultGroupLoopUpdateInterval, + targetPeriod: defaultTargetPeriod, + ReadRequestCost: RequestUnit(readRequestCost), + ReadBytesCost: RequestUnit(readCostPerByte), + ReadCPUMsCost: RequestUnit(readCPUMsCost), + WriteRequestCost: RequestUnit(writeRequestCost), + WriteBytesCost: RequestUnit(writeCostPerByte), + WriteCPUMsCost: RequestUnit(writeCPUMsCost), + SQLCPUSecondCost: RequestUnit(sqlCPUSecondCost), + } + return cfg +} diff --git a/pkg/mcs/resource_manager/tenant_client/limiter.go b/pkg/mcs/resource_manager/tenant_client/limiter.go new file mode 100644 index 00000000000..22a20ada818 --- /dev/null +++ b/pkg/mcs/resource_manager/tenant_client/limiter.go @@ -0,0 +1,527 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tenantclient + +import ( + "context" + "fmt" + "math" + "sync" + "time" + + "github.com/pingcap/log" + "go.uber.org/zap" +) + +// Limit defines the maximum frequency of some events. +// Limit is represented as number of events per second. +// A zero Limit allows no events. +type Limit float64 + +// Inf is the infinite rate limit; it allows all events (even if burst is zero). +const Inf = Limit(math.MaxFloat64) + +// Every converts a minimum time interval between events to a Limit. +func Every(interval time.Duration) Limit { + if interval <= 0 { + return Inf + } + return 1 / Limit(interval.Seconds()) +} + +const burst = 1e8 + +// A Limiter controls how frequently events are allowed to happen. +// It implements a "token bucket" of size b, initially full and refilled +// at rate r tokens per second. +// Informally, in any large enough time interval, the Limiter limits the +// rate to r tokens per second, with a maximum burst size of b events. +// As a special case, if r == Inf (the infinite rate), b is ignored. +// See https://en.wikipedia.org/wiki/Token_bucket for more about token buckets. +// +// The zero value is a valid Limiter, but it will reject all events. +// Use NewLimiter to create non-zero Limiters. +// +// Limiter has three main methods, Allow, Reserve, and Wait. +// Most callers should use Wait. +// +// Each of the three methods consumes a single token. +// They differ in their behavior when no token is available. +// If no token is available, Allow returns false. +// If no token is available, Reserve returns a reservation for a future token +// and the amount of time the caller must wait before using it. +// If no token is available, Wait blocks until one can be obtained +// or its associated context.Context is canceled. +// +// The methods AllowN, ReserveN, and WaitN consume n tokens. +type Limiter struct { + mu sync.Mutex + limit Limit + burst int + tokens float64 + // last is the last time the limiter's tokens field was updated + last time.Time + // lastEvent is the latest time of a rate-limited event (past or future) + lastEvent time.Time + notifyThreshold float64 + lowTokensNotifyChan chan struct{} +} + +// Limit returns the maximum overall event rate. +func (lim *Limiter) Limit() Limit { + lim.mu.Lock() + defer lim.mu.Unlock() + return lim.limit +} + +// Burst returns the maximum burst size. Burst is the maximum number of tokens +// that can be consumed in a single call to Allow, Reserve, or Wait, so higher +// Burst values allow more events to happen at once. +// A zero Burst allows no events, unless limit == Inf. +func (lim *Limiter) Burst() int { + lim.mu.Lock() + defer lim.mu.Unlock() + return lim.burst +} + +// NewLimiter returns a new Limiter that allows events up to rate r and permits +// bursts of at most b tokens. +func NewLimiter(r Limit, tokens float64, lowTokensNotifyChan chan struct{}) *Limiter { + lim := &Limiter{ + limit: r, + last: time.Now(), + tokens: tokens, + burst: burst, + lowTokensNotifyChan: lowTokensNotifyChan, + } + log.Info("new limiter", zap.String("limiter", fmt.Sprintf("%+v", lim))) + return lim +} + +// Allow is shorthand for AllowN(time.Now(), 1). +func (lim *Limiter) Allow() bool { + return lim.AllowN(time.Now(), 1) +} + +// AllowN reports whether n events may happen at time now. +// Use this method if you intend to drop / skip events that exceed the rate limit. +// Otherwise use Reserve or Wait. +func (lim *Limiter) AllowN(now time.Time, n int) bool { + return lim.reserveN(now, n, 0).ok +} + +// A Reservation holds information about events that are permitted by a Limiter to happen after a delay. +// A Reservation may be canceled, which may enable the Limiter to permit additional events. +type Reservation struct { + ok bool + lim *Limiter + tokens int + timeToAct time.Time + // This is the Limit at reservation time, it can change later. + limit Limit +} + +// OK returns whether the limiter can provide the requested number of tokens +// within the maximum wait time. If OK is false, Delay returns InfDuration, and +// Cancel does nothing. +func (r *Reservation) OK() bool { + return r.ok +} + +// Delay is shorthand for DelayFrom(time.Now()). +func (r *Reservation) Delay() time.Duration { + return r.DelayFrom(time.Now()) +} + +// InfDuration is the duration returned by Delay when a Reservation is not OK. +const InfDuration = time.Duration(1<<63 - 1) + +// DelayFrom returns the duration for which the reservation holder must wait +// before taking the reserved action. Zero duration means act immediately. +// InfDuration means the limiter cannot grant the tokens requested in this +// Reservation within the maximum wait time. +func (r *Reservation) DelayFrom(now time.Time) time.Duration { + if !r.ok { + return InfDuration + } + delay := r.timeToAct.Sub(now) + if delay < 0 { + return 0 + } + return delay +} + +// Cancel is shorthand for CancelAt(time.Now()). +func (r *Reservation) Cancel() { + r.CancelAt(time.Now()) +} + +// CancelAt indicates that the reservation holder will not perform the reserved action +// and reverses the effects of this Reservation on the rate limit as much as possible, +// considering that other reservations may have already been made. +func (r *Reservation) CancelAt(now time.Time) { + if !r.ok { + return + } + + r.lim.mu.Lock() + defer r.lim.mu.Unlock() + + if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(now) { + return + } + + // calculate tokens to restore + // The duration between lim.lastEvent and r.timeToAct tells us how many tokens were reserved + // after r was obtained. These tokens should not be restored. + restoreTokens := float64(r.tokens) - r.limit.tokensFromDuration(r.lim.lastEvent.Sub(r.timeToAct)) + if restoreTokens <= 0 { + return + } + // advance time to now + now, _, tokens := r.lim.advance(now) + // calculate new number of tokens + tokens += restoreTokens + + // update state + r.lim.last = now + r.lim.tokens = tokens + if r.timeToAct == r.lim.lastEvent { + prevEvent := r.timeToAct.Add(r.limit.durationFromTokens(float64(-r.tokens))) + if !prevEvent.Before(now) { + r.lim.lastEvent = prevEvent + } + } +} + +// Reserve is shorthand for ReserveN(time.Now(), 1). +func (lim *Limiter) Reserve() *Reservation { + return lim.ReserveN(time.Now(), 1) +} + +// ReserveN returns a Reservation that indicates how long the caller must wait before n events happen. +// The Limiter takes this Reservation into account when allowing future events. +// The returned Reservation’s OK() method returns false if n exceeds the Limiter's burst size. +// Usage example: +// +// r := lim.ReserveN(time.Now(), 1) +// if !r.OK() { +// // Not allowed to act! Did you remember to set lim.burst to be > 0 ? +// return +// } +// time.Sleep(r.Delay()) +// Act() +// +// Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. +// If you need to respect a deadline or cancel the delay, use Wait instead. +// To drop or skip events exceeding rate limit, use Allow instead. +func (lim *Limiter) ReserveN(now time.Time, n int) *Reservation { + r := lim.reserveN(now, n, InfDuration) + return &r +} + +// Wait is shorthand for WaitN(ctx, 1). +func (lim *Limiter) Wait(ctx context.Context) (err error) { + return lim.WaitN(ctx, 1) +} + +// WaitN blocks until lim permits n events to happen. +// It returns an error if n exceeds the Limiter's burst size, the Context is +// canceled, or the expected wait time exceeds the Context's Deadline. +// The burst limit is ignored if the rate limit is Inf. + +// Todo: support float64 n +func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { + lim.mu.Lock() + burst := lim.burst + limit := lim.limit + lim.mu.Unlock() + + if n > burst && limit != Inf { + return fmt.Errorf("rate: Wait(n=%d) exceeds limiter's burst %d", n, burst) + } + // Check if ctx is already cancelled + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + // Determine wait limit + now := time.Now() + waitLimit := InfDuration + if deadline, ok := ctx.Deadline(); ok { + waitLimit = deadline.Sub(now) + } + // Reserve + r := lim.reserveN(now, n, waitLimit) + if !r.ok { + return fmt.Errorf("rate: Wait(n=%d) Burst(b=%d) tokens(t=%f) rate(r=%f) would exceed context deadline", n, burst, lim.tokens, limit) + } + // Wait if necessary + delay := r.DelayFrom(now) + if delay == 0 { + return nil + } + t := time.NewTimer(delay) + defer t.Stop() + if delay > 1000*time.Millisecond { + log.Warn("[tenant controllor] Need wait N", zap.Time("now", now), zap.Duration("delay", delay), zap.Int("n", n)) + } + select { + case <-t.C: + // We can proceed. + return nil + case <-ctx.Done(): + // Context was canceled before we could proceed. Cancel the + // reservation, which may permit other events to proceed sooner. + r.Cancel() + return ctx.Err() + } +} + +// SetLimit is shorthand for SetLimitAt(time.Now(), newLimit). +func (lim *Limiter) SetLimit(newLimit Limit) { + lim.SetLimitAt(time.Now(), newLimit) +} + +// SetLimitAt sets a new Limit for the limiter. The new Limit, and Burst, may be violated +// or underutilized by those which reserved (using Reserve or Wait) but did not yet act +// before SetLimitAt was called. +func (lim *Limiter) SetLimitAt(now time.Time, newLimit Limit) { + select { + case <-lim.lowTokensNotifyChan: + default: + } + lim.mu.Lock() + defer lim.mu.Unlock() + + now, _, tokens := lim.advance(now) + + lim.last = now + lim.tokens = tokens + lim.limit = newLimit + lim.maybeNotify(now) +} + +// SetupNotification enables the notification at the given threshold. +func (lim *Limiter) SetupNotification(now time.Time, threshold float64) { + lim.advance(now) + lim.notifyThreshold = threshold +} + +// notify tries to send a non-blocking notification on notifyCh and disables +// further notifications (until the next Reconfigure or StartNotification). +func (lim *Limiter) notify() { + lim.notifyThreshold = 0 + select { + case lim.lowTokensNotifyChan <- struct{}{}: + default: + } +} + +// maybeNotify checks if it's time to send the notification and if so, performs +// the notification. +func (lim *Limiter) maybeNotify(now time.Time) { + if lim.IsLowTokens() { + lim.notify() + } +} + +func (lim *Limiter) IsLowTokens() bool { + if lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold { + return true + } + return false +} + +// SetBurst is shorthand for SetBurstAt(time.Now(), newBurst). +func (lim *Limiter) SetBurst(newBurst int) { + lim.SetBurstAt(time.Now(), newBurst) +} + +// SetBurstAt sets a new burst size for the limiter. +func (lim *Limiter) SetBurstAt(now time.Time, newBurst int) { + lim.mu.Lock() + defer lim.mu.Unlock() + + now, _, tokens := lim.advance(now) + + lim.last = now + lim.tokens = tokens + lim.burst = newBurst +} + +// RemoveTokens decreases the amount of tokens currently available. +func (lim *Limiter) RemoveTokens(now time.Time, amount float64) { + lim.mu.Lock() + defer lim.mu.Unlock() + now, _, tokens := lim.advance(now) + lim.last = now + lim.tokens = tokens - amount + lim.maybeNotify(now) +} + +type tokenBucketReconfigureArgs struct { + NewTokens float64 + + NewRate float64 + + NewBrust int + + NotifyThreshold float64 +} + +func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) { + select { + case <-lim.lowTokensNotifyChan: + default: + } + lim.mu.Lock() + defer lim.mu.Unlock() + log.Debug("[tenant controllor] before reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) + now, _, tokens := lim.advance(now) + lim.last = now + lim.tokens = tokens + args.NewTokens + lim.limit = Limit(args.NewRate) + lim.notifyThreshold = args.NotifyThreshold + lim.maybeNotify(now) + log.Debug("[tenant controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) +} + +// SetTokens decreases the amount of tokens currently available. +func (lim *Limiter) SetTokens(now time.Time, amount float64) { + select { + case <-lim.lowTokensNotifyChan: + default: + } + lim.mu.Lock() + defer lim.mu.Unlock() + now, _, _ = lim.advance(now) + lim.last = now + lim.tokens = amount +} + +// AvailableTokens decreases the amount of tokens currently available. +func (lim *Limiter) AvailableTokens(now time.Time) float64 { + lim.mu.Lock() + defer lim.mu.Unlock() + _, _, tokens := lim.advance(now) + return tokens +} + +// reserveN is a helper method for AllowN, ReserveN, and WaitN. +// maxFutureReserve specifies the maximum reservation wait duration allowed. +// reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. +func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duration) Reservation { + lim.mu.Lock() + defer lim.mu.Unlock() + + if lim.limit == Inf { + return Reservation{ + ok: true, + lim: lim, + tokens: n, + timeToAct: now, + } + } else if lim.limit == 0 { + // TODO(nolouch), remove burst, just use tokens + var ok bool + if lim.tokens >= float64(n) { + ok = true + lim.tokens -= float64(n) + } + return Reservation{ + ok: ok, + lim: lim, + tokens: int(lim.tokens), + timeToAct: now, + } + } + + now, last, tokens := lim.advance(now) + + // Calculate the remaining number of tokens resulting from the request. + //log.Info("advance token", zap.Float64("tokens", tokens), zap.Float64("new tokens", tokens-float64(n))) + tokens -= float64(n) + lim.maybeNotify(now) + // Calculate the wait duration + var waitDuration time.Duration + if tokens < 0 { + waitDuration = lim.limit.durationFromTokens(-tokens) + } + + // Decide result + ok := n <= lim.burst && waitDuration <= maxFutureReserve + + // Prepare reservation + r := Reservation{ + ok: ok, + lim: lim, + limit: lim.limit, + } + if ok { + r.tokens = n + r.timeToAct = now.Add(waitDuration) + } + // Update state + if ok { + lim.last = now + lim.tokens = tokens + lim.lastEvent = r.timeToAct + } else { + lim.last = last + } + + return r +} + +// advance calculates and returns an updated state for lim resulting from the passage of time. +// lim is not changed. +// advance requires that lim.mu is held. +func (lim *Limiter) advance(now time.Time) (newNow time.Time, newLast time.Time, newTokens float64) { + last := lim.last + if now.Before(last) { + last = now + } + + // Calculate the new number of tokens, due to time that passed. + elapsed := now.Sub(last) + delta := lim.limit.tokensFromDuration(elapsed) + tokens := lim.tokens + delta + return now, last, tokens +} + +// durationFromTokens is a unit conversion function from the number of tokens to the duration +// of time it takes to accumulate them at a rate of limit tokens per second. +func (limit Limit) durationFromTokens(tokens float64) time.Duration { + if limit <= 0 { + return InfDuration + } + seconds := tokens / float64(limit) + return time.Duration(float64(time.Second) * seconds) +} + +// tokensFromDuration is a unit conversion function from a time duration to the number of tokens +// which could be accumulated during that duration at a rate of limit tokens per second. +func (limit Limit) tokensFromDuration(d time.Duration) float64 { + if limit <= 0 { + return 0 + } + return d.Seconds() * float64(limit) +} diff --git a/pkg/mcs/resource_manager/tenant_client/model.go b/pkg/mcs/resource_manager/tenant_client/model.go new file mode 100644 index 00000000000..cf8b4e4ac9f --- /dev/null +++ b/pkg/mcs/resource_manager/tenant_client/model.go @@ -0,0 +1,110 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tenantclient + +import ( + "context" + + rmpb "github.com/pingcap/kvproto/pkg/resource_manager" +) + +type RequestUnit float64 + +type RequestInfo interface { + IsWrite() bool + WriteBytes() uint64 +} + +type ResponseInfo interface { + ReadBytes() uint64 + KVCPUms() uint64 +} + +func Sub(c float64, other float64) float64 { + if c < other { + return 0 + } else { + return c - other + } +} + +type ResourceCalculator interface { + Trickle(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, context.Context) + BeforeKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, RequestInfo) + AfterKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, RequestInfo, ResponseInfo) +} + +type KVCalculator struct { + *Config +} + +func newKVCalculator(cfg *Config) *KVCalculator { + return &KVCalculator{Config: cfg} +} + +func (dwc *KVCalculator) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, ctx context.Context) { +} + +func (dwc *KVCalculator) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo) { + if req.IsWrite() { + resource[rmpb.ResourceType_KVWriteRPCCount] += 1 + + writeBytes := req.WriteBytes() + resource[rmpb.ResourceType_WriteBytes] += float64(writeBytes) + + ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteRequestCost) + ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteBytesCost) * float64(writeBytes) + } else { + resource[rmpb.ResourceType_KVReadRPCCount] += 1 + ru[rmpb.RequestUnitType_RRU] += float64(dwc.ReadRequestCost) + } +} +func (dwc *KVCalculator) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { + readBytes := res.ReadBytes() + resource[rmpb.ResourceType_ReadBytes] += float64(readBytes) + + ru[rmpb.RequestUnitType_RRU] += float64(readBytes) * float64(dwc.ReadBytesCost) + + kvCPUms := float64(res.KVCPUms()) + resource[rmpb.ResourceType_TotalCPUTimeMs] += kvCPUms + if req.IsWrite() { + ru[rmpb.RequestUnitType_WRU] += kvCPUms * float64(dwc.WriteCPUMsCost) + } else { + ru[rmpb.RequestUnitType_RRU] += kvCPUms * float64(dwc.ReadCPUMsCost) + } +} + +type SQLLayerCPUCalculateor struct { + *Config +} + +func newSQLLayerCPUCalculateor(cfg *Config) *SQLLayerCPUCalculateor { + return &SQLLayerCPUCalculateor{Config: cfg} +} + +func (dsc *SQLLayerCPUCalculateor) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, ctx context.Context) { + // TODO: SQL Layer RU/resource custom + cpuFunc := func(ctx context.Context) float64 { + return 0. + } + cpu := cpuFunc(ctx) + resource[rmpb.ResourceType_TotalCPUTimeMs] += cpu + resource[rmpb.ResourceType_SQLLayerCPUTimeMs] += cpu +} + +func (dsc *SQLLayerCPUCalculateor) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo) { +} +func (dsc *SQLLayerCPUCalculateor) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { +} From cf6b0d07e8c1bf44d204673f9563c8464bf56cc7 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Wed, 11 Jan 2023 11:03:26 +0800 Subject: [PATCH 02/32] Introduce the RU coefficient config and refine some code Signed-off-by: JmPotato --- .../resource_manager/tenant_client/client.go | 17 +-- .../resource_manager/tenant_client/config.go | 144 +++++++++++------- .../resource_manager/tenant_client/limiter.go | 1 - .../resource_manager/tenant_client/model.go | 38 +++-- 4 files changed, 119 insertions(+), 81 deletions(-) diff --git a/pkg/mcs/resource_manager/tenant_client/client.go b/pkg/mcs/resource_manager/tenant_client/client.go index 8bf89726c4d..86432eadffa 100644 --- a/pkg/mcs/resource_manager/tenant_client/client.go +++ b/pkg/mcs/resource_manager/tenant_client/client.go @@ -273,7 +273,6 @@ func (c *resourceGroupsController) mainLoop(ctx context.Context) { default: c.handleTokenBucketTrickEvent(ctx) } - } } @@ -426,8 +425,8 @@ func (gc *groupCostController) initRunState(ctx context.Context) { gc.run.requestUnitTokens = make(map[rmpb.RequestUnitType]*tokenCounter) for typ := range requestUnitList { counter := &tokenCounter{ - limiter: NewLimiter(0, initialRquestUnits, gc.lowRUNotifyChan), - avgRUPerSec: initialRquestUnits / gc.run.targetPeriod.Seconds(), + limiter: NewLimiter(0, initialRequestUnits, gc.lowRUNotifyChan), + avgRUPerSec: initialRequestUnits / gc.run.targetPeriod.Seconds(), avgLastTime: now, } gc.run.requestUnitTokens[typ] = counter @@ -436,8 +435,8 @@ func (gc *groupCostController) initRunState(ctx context.Context) { gc.run.resourceTokens = make(map[rmpb.ResourceType]*tokenCounter) for typ := range requestResourceList { counter := &tokenCounter{ - limiter: NewLimiter(0, initialRquestUnits, gc.lowRUNotifyChan), - avgRUPerSec: initialRquestUnits / gc.run.targetPeriod.Seconds(), + limiter: NewLimiter(0, initialRequestUnits, gc.lowRUNotifyChan), + avgRUPerSec: initialRequestUnits / gc.run.targetPeriod.Seconds(), avgLastTime: now, } gc.run.resourceTokens[typ] = counter @@ -501,7 +500,7 @@ func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) case <-counter.setupNotificationCh: counter.setupNotificationTimer = nil counter.setupNotificationCh = nil - counter.limiter.SetupNotification(gc.run.now, float64(counter.setupNotificationThreshold)) + counter.limiter.SetupNotification(gc.run.now, counter.setupNotificationThreshold) gc.updateRunState(ctx) default: } @@ -512,7 +511,7 @@ func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) case <-counter.setupNotificationCh: counter.setupNotificationTimer = nil counter.setupNotificationCh = nil - counter.limiter.SetupNotification(gc.run.now, float64(counter.setupNotificationThreshold)) + counter.limiter.SetupNotification(gc.run.now, counter.setupNotificationThreshold) gc.updateRunState(ctx) default: } @@ -571,7 +570,7 @@ func (gc *groupCostController) handleTokenBucketResponse(ctx context.Context, re // This is the first successful request. Take back the initial RUs that we // used to pre-fill the bucket. for _, counter := range gc.run.resourceTokens { - counter.limiter.RemoveTokens(gc.run.now, initialRquestUnits) + counter.limiter.RemoveTokens(gc.run.now, initialRequestUnits) } } } @@ -723,7 +722,7 @@ func (gc *groupCostController) collectRequestAndConsumption(low bool) *rmpb.Toke func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { value := counter.avgRUPerSec*gc.run.targetPeriod.Seconds() + bufferRUs - value -= float64(counter.limiter.AvailableTokens(gc.run.now)) + value -= counter.limiter.AvailableTokens(gc.run.now) if value < 0 { value = 0 } diff --git a/pkg/mcs/resource_manager/tenant_client/config.go b/pkg/mcs/resource_manager/tenant_client/config.go index 67b84c63e36..777abf4bc02 100644 --- a/pkg/mcs/resource_manager/tenant_client/config.go +++ b/pkg/mcs/resource_manager/tenant_client/config.go @@ -20,73 +20,105 @@ import ( "github.com/pingcap/kvproto/pkg/resource_manager" ) -var ruLen = len(resource_manager.RequestUnitType_name) -var resourceLen = len(resource_manager.ResourceType_name) -var requestUnitList map[resource_manager.RequestUnitType]struct{} = map[resource_manager.RequestUnitType]struct{}{ - resource_manager.RequestUnitType_RRU: {}, - resource_manager.RequestUnitType_WRU: {}, -} - -var requestResourceList map[resource_manager.ResourceType]struct{} = map[resource_manager.ResourceType]struct{}{ - resource_manager.ResourceType_ReadBytes: {}, - resource_manager.ResourceType_WriteBytes: {}, - resource_manager.ResourceType_TotalCPUTimeMs: {}, -} - -const initialRquestUnits = 10000 - -const bufferRUs = 5000 - -// movingAvgFactor is the weight applied to a new "sample" of RU usage (with one -// sample per mainLoopUpdateInterval). -// -// If we want a factor of 0.5 per second, this should be: -// -// 0.5^(1 second / mainLoopUpdateInterval) -const movingAvgFactor = 0.5 - -const notifyFraction = 0.1 - -const consumptionsReportingThreshold = 100 +var ( + ruLen = len(resource_manager.RequestUnitType_name) + resourceLen = len(resource_manager.ResourceType_name) + requestUnitList map[resource_manager.RequestUnitType]struct{} = map[resource_manager.RequestUnitType]struct{}{ + resource_manager.RequestUnitType_RRU: {}, + resource_manager.RequestUnitType_WRU: {}, + } + requestResourceList map[resource_manager.ResourceType]struct{} = map[resource_manager.ResourceType]struct{}{ + resource_manager.ResourceType_ReadBytes: {}, + resource_manager.ResourceType_WriteBytes: {}, + resource_manager.ResourceType_TotalCPUTimeMs: {}, + } +) -const extendedReportingPeriodFactor = 4 +const ( + initialRequestUnits = 10000 + bufferRUs = 5000 + // movingAvgFactor is the weight applied to a new "sample" of RU usage (with one + // sample per mainLoopUpdateInterval). + // + // If we want a factor of 0.5 per second, this should be: + // + // 0.5^(1 second / mainLoopUpdateInterval) + movingAvgFactor = 0.5 + notifyFraction = 0.1 + consumptionsReportingThreshold = 100 + extendedReportingPeriodFactor = 4 + defaultGroupLoopUpdateInterval = 1 * time.Second + defaultTargetPeriod = 10 * time.Second +) -const defaultGroupLoopUpdateInterval = 1 * time.Second -const defaultTargetPeriod = 10 * time.Second const ( - readRequestCost = 1 - readCostPerByte = 0.5 / 1024 / 1024 - writeRequestCost = 5 - writeCostPerByte = 200. / 1024 / 1024 - readCPUMsCost = 1 - writeCPUMsCost = 1 - sqlCPUSecondCost = 0 + defaultReadBaseCost = 1 + defaultReadCostPerByte = 1. / 1024 / 1024 + defaultWriteBaseCost = 5 + defaultWriteCostPerByte = 10. / 1024 / 1024 + defaultWriteCPUMsCost = 1 ) +// RequestUnitConfig is the configuration of the request units, which determines the coefficients of +// the RRU and WRU cost. +type RequestUnitConfig struct { + // ReadBaseCost is the base cost for a read request. No matter how many bytes read/written or + // the CPU times taken for a request, this cost is inevitable. + ReadBaseCost float64 `toml:"read-base-cost" json:"read-base-cost"` + // ReadCostPerByte is the cost for each byte read. It's 1 MiB = 1 RRU by default. + ReadCostPerByte float64 `toml:"read-cost-per-byte" json:"read-cost-per-byte"` + // WriteBaseCost is the base cost for a write request. No matter how many bytes read/written or + // the CPU times taken for a request, this cost is inevitable. + WriteBaseCost float64 `toml:"write-base-cost" json:"write-base-cost"` + // WriteCostPerByte is the cost for each byte written. It's 1 MiB = 10 WRU by default. + WriteCostPerByte float64 `toml:"write-cost-per-byte" json:"write-cost-per-byte"` + // WriteCPUMsCost is the cost for each millisecond of CPU time taken by a write request. + // It's 1 millisecond = 1 WRU by default. + WriteCPUMsCost float64 `toml:"write-cpu-ms-cost" json:"write-cpu-ms-cost"` +} + +// DefaultRequestUnitConfig returns the default request unit configuration. +func DefaultRequestUnitConfig() *RequestUnitConfig { + return &RequestUnitConfig{ + ReadBaseCost: defaultReadBaseCost, + ReadCostPerByte: defaultReadCostPerByte, + WriteBaseCost: defaultWriteBaseCost, + WriteCostPerByte: defaultWriteCostPerByte, + WriteCPUMsCost: defaultWriteCPUMsCost, + } +} + +// Config is the configuration of the resource units, which gives the read/write request +// units or request resource cost standards. It should be calculated by a given `RequestUnitConfig` +// or `RequestResourceConfig`. type Config struct { groupLoopUpdateInterval time.Duration targetPeriod time.Duration - ReadRequestCost RequestUnit - ReadBytesCost RequestUnit - ReadCPUMsCost RequestUnit - WriteRequestCost RequestUnit - WriteBytesCost RequestUnit - WriteCPUMsCost RequestUnit - SQLCPUSecondCost RequestUnit + ReadBaseCost RequestUnit + ReadBytesCost RequestUnit + WriteBaseCost RequestUnit + WriteBytesCost RequestUnit + WriteCPUMsCost RequestUnit + // TODO: add SQL computing CPU cost. } +// DefaultConfig returns the default configuration. func DefaultConfig() *Config { - cfg := &Config{ - groupLoopUpdateInterval: defaultGroupLoopUpdateInterval, - targetPeriod: defaultTargetPeriod, - ReadRequestCost: RequestUnit(readRequestCost), - ReadBytesCost: RequestUnit(readCostPerByte), - ReadCPUMsCost: RequestUnit(readCPUMsCost), - WriteRequestCost: RequestUnit(writeRequestCost), - WriteBytesCost: RequestUnit(writeCostPerByte), - WriteCPUMsCost: RequestUnit(writeCPUMsCost), - SQLCPUSecondCost: RequestUnit(sqlCPUSecondCost), - } + cfg := generateConfig( + DefaultRequestUnitConfig(), + ) + cfg.groupLoopUpdateInterval = defaultGroupLoopUpdateInterval + cfg.targetPeriod = defaultTargetPeriod return cfg } + +func generateConfig(ruConfig *RequestUnitConfig) *Config { + return &Config{ + ReadBaseCost: RequestUnit(ruConfig.ReadBaseCost), + ReadBytesCost: RequestUnit(ruConfig.ReadCostPerByte), + WriteBaseCost: RequestUnit(ruConfig.WriteBaseCost), + WriteBytesCost: RequestUnit(ruConfig.WriteCostPerByte), + WriteCPUMsCost: RequestUnit(ruConfig.WriteCPUMsCost), + } +} diff --git a/pkg/mcs/resource_manager/tenant_client/limiter.go b/pkg/mcs/resource_manager/tenant_client/limiter.go index 22a20ada818..cf02e99a6de 100644 --- a/pkg/mcs/resource_manager/tenant_client/limiter.go +++ b/pkg/mcs/resource_manager/tenant_client/limiter.go @@ -457,7 +457,6 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio now, last, tokens := lim.advance(now) // Calculate the remaining number of tokens resulting from the request. - //log.Info("advance token", zap.Float64("tokens", tokens), zap.Float64("new tokens", tokens-float64(n))) tokens -= float64(n) lim.maybeNotify(now) // Calculate the wait duration diff --git a/pkg/mcs/resource_manager/tenant_client/model.go b/pkg/mcs/resource_manager/tenant_client/model.go index cf8b4e4ac9f..94615f9ed46 100644 --- a/pkg/mcs/resource_manager/tenant_client/model.go +++ b/pkg/mcs/resource_manager/tenant_client/model.go @@ -20,16 +20,23 @@ import ( rmpb "github.com/pingcap/kvproto/pkg/resource_manager" ) +// RequestUnit is the basic unit of the resource request management, which has two types: +// - RRU: read request unit +// - WRU: write request unit type RequestUnit float64 +// RequestInfo is the interface of the request information provider. A request should be +// able tell whether it's a write request and if so, the written bytes would also be provided. type RequestInfo interface { IsWrite() bool WriteBytes() uint64 } +// ResponseInfo is the interface of the response information provider. A response should be +// able tell how many bytes it read and KV CPU cost in milliseconds. type ResponseInfo interface { ReadBytes() uint64 - KVCPUms() uint64 + KVCPUMs() uint64 } func Sub(c float64, other float64) float64 { @@ -40,12 +47,14 @@ func Sub(c float64, other float64) float64 { } } +// ResourceCalculator is used to calculate the resource consumption of a request. type ResourceCalculator interface { Trickle(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, context.Context) BeforeKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, RequestInfo) AfterKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, RequestInfo, ResponseInfo) } +// KVCalculator is used to calculate the KV request consumption. type KVCalculator struct { *Config } @@ -61,28 +70,26 @@ func (dwc *KVCalculator) BeforeKVRequest(resource map[rmpb.ResourceType]float64, if req.IsWrite() { resource[rmpb.ResourceType_KVWriteRPCCount] += 1 - writeBytes := req.WriteBytes() - resource[rmpb.ResourceType_WriteBytes] += float64(writeBytes) + writeBytes := float64(req.WriteBytes()) + resource[rmpb.ResourceType_WriteBytes] += writeBytes - ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteRequestCost) - ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteBytesCost) * float64(writeBytes) + ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteBaseCost) + ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteBytesCost) * writeBytes } else { resource[rmpb.ResourceType_KVReadRPCCount] += 1 - ru[rmpb.RequestUnitType_RRU] += float64(dwc.ReadRequestCost) + ru[rmpb.RequestUnitType_RRU] += float64(dwc.ReadBaseCost) } } -func (dwc *KVCalculator) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { - readBytes := res.ReadBytes() - resource[rmpb.ResourceType_ReadBytes] += float64(readBytes) - ru[rmpb.RequestUnitType_RRU] += float64(readBytes) * float64(dwc.ReadBytesCost) +func (dwc *KVCalculator) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { + readBytes := float64(res.ReadBytes()) + resource[rmpb.ResourceType_ReadBytes] += readBytes + ru[rmpb.RequestUnitType_RRU] += readBytes * float64(dwc.ReadBytesCost) - kvCPUms := float64(res.KVCPUms()) - resource[rmpb.ResourceType_TotalCPUTimeMs] += kvCPUms + kvCPUMs := float64(res.KVCPUMs()) + resource[rmpb.ResourceType_TotalCPUTimeMs] += kvCPUMs if req.IsWrite() { - ru[rmpb.RequestUnitType_WRU] += kvCPUms * float64(dwc.WriteCPUMsCost) - } else { - ru[rmpb.RequestUnitType_RRU] += kvCPUms * float64(dwc.ReadCPUMsCost) + ru[rmpb.RequestUnitType_WRU] += kvCPUMs * float64(dwc.WriteCPUMsCost) } } @@ -106,5 +113,6 @@ func (dsc *SQLLayerCPUCalculateor) Trickle(resource map[rmpb.ResourceType]float6 func (dsc *SQLLayerCPUCalculateor) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo) { } + func (dsc *SQLLayerCPUCalculateor) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { } From 1a6271d014f5552fc293e113bd22ee0d6b6f3759 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Wed, 11 Jan 2023 11:21:24 +0800 Subject: [PATCH 03/32] Refine the code Signed-off-by: JmPotato --- .../resource_manager/tenant_client/client.go | 6 +++--- .../resource_manager/tenant_client/config.go | 20 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pkg/mcs/resource_manager/tenant_client/client.go b/pkg/mcs/resource_manager/tenant_client/client.go index 86432eadffa..d6d2b6e7b8a 100644 --- a/pkg/mcs/resource_manager/tenant_client/client.go +++ b/pkg/mcs/resource_manager/tenant_client/client.go @@ -811,7 +811,7 @@ func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, for typ, counter := range gc.run.resourceTokens { v, ok := deltaResource[typ] if ok { - counter.limiter.RemoveTokens(time.Now(), float64(v)) + counter.limiter.RemoveTokens(time.Now(), v) } } gc.mu.Lock() @@ -823,7 +823,7 @@ func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, for typ, counter := range gc.run.requestUnitTokens { v, ok := deltaRequestUnit[typ] if ok { - counter.limiter.RemoveTokens(time.Now(), float64(v)) + counter.limiter.RemoveTokens(time.Now(), v) } } gc.mu.Lock() @@ -858,6 +858,6 @@ func (c *resourceGroupsController) addDemoResourceGroup(ctx context.Context) err if err != nil { return err } - log.Info("add resource group", zap.String("resp", string(context)), zap.Any("setting", setting)) + log.Info("add resource group", zap.String("resp", context), zap.Any("setting", setting)) return err } diff --git a/pkg/mcs/resource_manager/tenant_client/config.go b/pkg/mcs/resource_manager/tenant_client/config.go index 777abf4bc02..b6e655794bc 100644 --- a/pkg/mcs/resource_manager/tenant_client/config.go +++ b/pkg/mcs/resource_manager/tenant_client/config.go @@ -17,20 +17,20 @@ package tenantclient import ( "time" - "github.com/pingcap/kvproto/pkg/resource_manager" + rmpb "github.com/pingcap/kvproto/pkg/resource_manager" ) var ( - ruLen = len(resource_manager.RequestUnitType_name) - resourceLen = len(resource_manager.ResourceType_name) - requestUnitList map[resource_manager.RequestUnitType]struct{} = map[resource_manager.RequestUnitType]struct{}{ - resource_manager.RequestUnitType_RRU: {}, - resource_manager.RequestUnitType_WRU: {}, + ruLen = len(rmpb.RequestUnitType_name) + resourceLen = len(rmpb.ResourceType_name) + requestUnitList map[rmpb.RequestUnitType]struct{} = map[rmpb.RequestUnitType]struct{}{ + rmpb.RequestUnitType_RRU: {}, + rmpb.RequestUnitType_WRU: {}, } - requestResourceList map[resource_manager.ResourceType]struct{} = map[resource_manager.ResourceType]struct{}{ - resource_manager.ResourceType_ReadBytes: {}, - resource_manager.ResourceType_WriteBytes: {}, - resource_manager.ResourceType_TotalCPUTimeMs: {}, + requestResourceList map[rmpb.ResourceType]struct{} = map[rmpb.ResourceType]struct{}{ + rmpb.ResourceType_ReadBytes: {}, + rmpb.ResourceType_WriteBytes: {}, + rmpb.ResourceType_TotalCPUTimeMs: {}, } ) From d1a17b1f0a0aaf56019cf1036647a55d5d7bd777 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 12 Jan 2023 16:56:41 +0800 Subject: [PATCH 04/32] refactor Signed-off-by: Cabinfever_B --- .../resource_manager/tenant_client/client.go | 864 ------------------ .../resource_manager/tenant_client/config.go | 92 -- .../resource_manager/tenant_client/limiter.go | 527 ----------- .../resource_manager/tenant_client/model.go | 110 --- 4 files changed, 1593 deletions(-) delete mode 100644 pkg/mcs/resource_manager/tenant_client/client.go delete mode 100644 pkg/mcs/resource_manager/tenant_client/config.go delete mode 100644 pkg/mcs/resource_manager/tenant_client/limiter.go delete mode 100644 pkg/mcs/resource_manager/tenant_client/model.go diff --git a/pkg/mcs/resource_manager/tenant_client/client.go b/pkg/mcs/resource_manager/tenant_client/client.go deleted file mode 100644 index 8bf89726c4d..00000000000 --- a/pkg/mcs/resource_manager/tenant_client/client.go +++ /dev/null @@ -1,864 +0,0 @@ -// Copyright 2022 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS,g -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tenantclient - -import ( - "context" - "sync" - "time" - - "github.com/pingcap/errors" - rmpb "github.com/pingcap/kvproto/pkg/resource_manager" - "github.com/pingcap/log" - "go.uber.org/zap" -) - -type TenantSideKVInterceptor interface { - OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) error - OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error -} - -type ResourceGroupProvider interface { - ListResourceGroups(ctx context.Context) ([]*rmpb.ResourceGroup, error) - GetResourceGroup(ctx context.Context, resourceGroupName string) (*rmpb.ResourceGroup, error) - AddResourceGroup(ctx context.Context, resourceGroupName string, settings *rmpb.GroupSettings) (string, error) - ModifyResourceGroup(ctx context.Context, resourceGroupName string, settings *rmpb.GroupSettings) (string, error) - DeleteResourceGroup(ctx context.Context, resourceGroupName string) (string, error) - AcquireTokenBuckets(ctx context.Context, request *rmpb.TokenBucketsRequest) ([]*rmpb.TokenBucketResponse, error) -} - -func NewResourceGroupController( - provider ResourceGroupProvider, -) (*resourceGroupsController, error) { - return newTenantSideCostController(provider) -} - -var _ TenantSideKVInterceptor = (*resourceGroupsController)(nil) - -type resourceGroupsController struct { - instanceFingerprint string - provider ResourceGroupProvider - groupsController sync.Map - config *Config - - // responseChan is used to receive results from token bucket requests, which - // are run in a separate goroutine. A nil response indicates an error. - responseChan chan []*rmpb.TokenBucketResponse - - // lowRUNotifyChan is used when the number of available resource is running low and - // we need to send an early token bucket request. - lowRUNotifyChan chan struct{} - - run struct { - now time.Time - lastRequestTime time.Time - // requestInProgress is true if we are in the process of sending a request; - // it gets set to false when we process the response (in the main loop), - // even in error cases. - requestInProgress bool - - // requestNeedsRetry is set if the last token bucket request encountered an - // error. This triggers a retry attempt on the next tick. - // - // Note: requestNeedsRetry and requestInProgress are never true at the same - // time. - requestNeedsRetry bool - - // targetPeriod stores the value of the TargetPeriodSetting setting at the - // last update. - targetPeriod time.Duration - } -} - -func newTenantSideCostController(provider ResourceGroupProvider) (*resourceGroupsController, error) { - return &resourceGroupsController{ - provider: provider, - config: DefaultConfig(), - lowRUNotifyChan: make(chan struct{}, 1), - responseChan: make(chan []*rmpb.TokenBucketResponse, 1), - }, nil -} - -func (c *resourceGroupsController) Start(ctx context.Context, instanceFingerprint string) error { - if len(instanceFingerprint) == 0 { - return errors.New("invalid SQLInstanceID") - } - c.instanceFingerprint = instanceFingerprint - // just for demo - if err := c.addDemoResourceGroup(ctx); err != nil { - log.Error("add Demo ResourceGroup failed", zap.Error(err)) - } - if err := c.updateAllResourceGroups(ctx); err != nil { - log.Error("update ResourceGroup failed", zap.Error(err)) - } - - c.initRunState(ctx) - go c.mainLoop(ctx) - return nil -} - -func (c *resourceGroupsController) updateAllResourceGroups(ctx context.Context) error { - groups, err := c.provider.ListResourceGroups(ctx) - if err != nil { - return err - } - lastedGroups := make(map[string]struct{}) - for _, group := range groups { - // todo: check add or modify - log.Info("create resource group cost controller", zap.String("name", group.GetName())) - gc := newGroupCostController(ctx, group, c.config, c.lowRUNotifyChan) - c.groupsController.Store(group.GetName(), gc) - lastedGroups[group.GetName()] = struct{}{} - } - c.groupsController.Range(func(key, value any) bool { - resourceGroupName := key.(string) - if _, ok := lastedGroups[resourceGroupName]; !ok { - c.groupsController.Delete(key) - } - return true - }) - return nil -} - -func (c *resourceGroupsController) initRunState(ctx context.Context) { - now := time.Now() - c.run.now = now - c.run.lastRequestTime = now - c.run.targetPeriod = c.config.targetPeriod - c.groupsController.Range(func(name, value any) bool { - gc := value.(*groupCostController) - gc.initRunState(ctx) - return true - }) -} - -func (c *resourceGroupsController) updateRunState(ctx context.Context) { - c.run.now = time.Now() - c.groupsController.Range(func(name, value any) bool { - gc := value.(*groupCostController) - gc.updateRunState(ctx) - return true - }) -} - -func (c *resourceGroupsController) shouldReportConsumption() bool { - if c.run.requestInProgress { - return false - } - timeSinceLastRequest := c.run.now.Sub(c.run.lastRequestTime) - if timeSinceLastRequest >= c.run.targetPeriod { - if timeSinceLastRequest >= extendedReportingPeriodFactor*c.run.targetPeriod { - return true - } - ret := false - c.groupsController.Range(func(name, value any) bool { - gc := value.(*groupCostController) - ret = ret || gc.shouldReportConsumption() - return !ret - }) - } - return false -} - -func (c *resourceGroupsController) updateAvgRequestResourcePerSec(ctx context.Context) { - c.groupsController.Range(func(name, value any) bool { - gc := value.(*groupCostController) - gc.updateAvgRequestResourcePerSec(ctx) - return true - }) -} - -func (c *resourceGroupsController) handleTokenBucketResponse(ctx context.Context, resp []*rmpb.TokenBucketResponse) { - for _, res := range resp { - name := res.GetResourceGroupName() - v, ok := c.groupsController.Load(name) - if !ok { - log.Warn("A non-existent resource group was found when handle token response.", zap.String("name", name)) - } - gc := v.(*groupCostController) - gc.handleTokenBucketResponse(ctx, res) - } -} - -func (c *resourceGroupsController) collectTokenBucketRequests(ctx context.Context, source string, low bool) { - requests := make([]*rmpb.TokenBucketRequst, 0) - c.groupsController.Range(func(name, value any) bool { - gc := value.(*groupCostController) - request := gc.collectRequestAndConsumption(low) - if request != nil { - requests = append(requests, request) - } - return true - }) - if len(requests) > 0 { - c.sendTokenBucketRequests(ctx, requests, source) - } -} - -func (c *resourceGroupsController) sendTokenBucketRequests(ctx context.Context, requests []*rmpb.TokenBucketRequst, source string) { - c.run.requestInProgress = true - req := &rmpb.TokenBucketsRequest{ - Requests: requests, - TargetRequestPeriodMs: uint64(c.config.targetPeriod / time.Millisecond), - } - go func() { - now := time.Now() - log.Info("[tenant controllor] send token bucket request", zap.Time("now", now), zap.Any("req", req.Requests), zap.String("source", source)) - resp, err := c.provider.AcquireTokenBuckets(ctx, req) - if err != nil { - // Don't log any errors caused by the stopper canceling the context. - if !errors.ErrorEqual(err, context.Canceled) { - log.L().Sugar().Infof("TokenBucket RPC error: %v", err) - } - resp = nil - } - log.Info("[tenant controllor] token bucket response", zap.Time("now", time.Now()), zap.Any("resp", resp), zap.String("source", source), zap.Duration("latency", time.Since(now))) - c.responseChan <- resp - }() -} - -func (c *resourceGroupsController) handleTokenBucketTrickEvent(ctx context.Context) { - c.groupsController.Range(func(name, value any) bool { - gc := value.(*groupCostController) - gc.handleTokenBucketTrickEvent(ctx) - return true - }) -} - -func (c *resourceGroupsController) mainLoop(ctx context.Context) { - interval := c.config.groupLoopUpdateInterval - ticker := time.NewTicker(interval) - defer ticker.Stop() - - c.updateRunState(ctx) - c.collectTokenBucketRequests(ctx, "init", false /* select all */) - - for { - select { - case <-ctx.Done(): - return - case resp := <-c.responseChan: - c.run.requestInProgress = false - if resp != nil { - c.updateRunState(ctx) - c.handleTokenBucketResponse(ctx, resp) - } else { - // A nil response indicates a failure (which would have been logged). - c.run.requestNeedsRetry = true - } - case <-ticker.C: - c.updateRunState(ctx) - c.updateAvgRequestResourcePerSec(ctx) - if c.run.requestNeedsRetry || c.shouldReportConsumption() { - c.run.requestNeedsRetry = false - c.collectTokenBucketRequests(ctx, "report", false /* select all */) - } - case <-c.lowRUNotifyChan: - c.updateRunState(ctx) - if !c.run.requestInProgress { - c.collectTokenBucketRequests(ctx, "low_ru", true /* only select low tokens resource group */) - } - default: - c.handleTokenBucketTrickEvent(ctx) - } - - } -} - -func (c *resourceGroupsController) OnRequestWait( - ctx context.Context, resourceGroupName string, info RequestInfo, -) error { - tmp, ok := c.groupsController.Load(resourceGroupName) - if !ok { - return errors.Errorf("[resource group] resourceGroupName %s is not existed.", resourceGroupName) - } - gc := tmp.(*groupCostController) - err := gc.OnRequestWait(ctx, info) - return err -} - -func (c *resourceGroupsController) OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { - tmp, ok := c.groupsController.Load(resourceGroupName) - if !ok { - log.Warn("[resource group] resourceGroupName is not existed.", zap.String("resourceGroupName", resourceGroupName)) - } - gc := tmp.(*groupCostController) - gc.OnResponse(ctx, req, resp) - return nil -} - -type groupCostController struct { - resourceGroupName string - mainCfg *Config - groupSettings *rmpb.GroupSettings - calculators []ResourceCalculator - mode rmpb.GroupMode - - handleRespFunc func(*rmpb.TokenBucketResponse) - - mu struct { - sync.Mutex - requestUnitConsumptions []*rmpb.RequestUnitItem - resourceConsumptions []*rmpb.ResourceItem - } - - lowRUNotifyChan chan struct{} - // run contains the state that is updated by the main loop. - run struct { - now time.Time - - // targetPeriod stores the value of the TargetPeriodSetting setting at the - // last update. - targetPeriod time.Duration - - // consumptions stores the last value of mu.consumption. - requestUnitConsumptions []*rmpb.RequestUnitItem - resourceConsumptions []*rmpb.ResourceItem - - lastRequestUnitConsumptions []*rmpb.RequestUnitItem - lastResourceConsumptions []*rmpb.ResourceItem - - // initialRequestCompleted is set to true when the first token bucket - // request completes successfully. - initialRequestCompleted bool - - resourceTokens map[rmpb.ResourceType]*tokenCounter - requestUnitTokens map[rmpb.RequestUnitType]*tokenCounter - } -} - -type tokenCounter struct { - // avgRUPerSec is an exponentially-weighted moving average of the RU - // consumption per second; used to estimate the RU requirements for the next - // request. - avgRUPerSec float64 - // lastSecRU is the consumption.RU value when avgRUPerSec was last updated. - avgRUPerSecLastRU float64 - avgLastTime time.Time - - setupNotificationCh <-chan time.Time - setupNotificationThreshold float64 - setupNotificationTimer *time.Timer - - lastDeadline time.Time - lastRate float64 - - limiter *Limiter -} - -func newGroupCostController(ctx context.Context, group *rmpb.ResourceGroup, mainCfg *Config, lowRUNotifyChan chan struct{}) *groupCostController { - gc := &groupCostController{ - resourceGroupName: group.GetName(), - mainCfg: mainCfg, - groupSettings: group.Settings, - calculators: []ResourceCalculator{newKVCalculator(mainCfg), newSQLLayerCPUCalculateor(mainCfg)}, - mode: group.Settings.GetMode(), - lowRUNotifyChan: lowRUNotifyChan, - } - - switch gc.mode { - case rmpb.GroupMode_RUMode: - gc.handleRespFunc = gc.handleRUTokenResponse - case rmpb.GroupMode_NativeMode: - gc.handleRespFunc = gc.handleResourceTokenResponse - } - - gc.mu.requestUnitConsumptions = make([]*rmpb.RequestUnitItem, ruLen) - for typ := range gc.mu.requestUnitConsumptions { - gc.mu.requestUnitConsumptions[typ] = &rmpb.RequestUnitItem{ - Type: rmpb.RequestUnitType(typ), - } - } - gc.mu.resourceConsumptions = make([]*rmpb.ResourceItem, resourceLen) - for typ := range gc.mu.resourceConsumptions { - gc.mu.resourceConsumptions[typ] = &rmpb.ResourceItem{ - Type: rmpb.ResourceType(typ), - } - } - return gc -} - -func (gc *groupCostController) initRunState(ctx context.Context) { - now := time.Now() - gc.run.now = now - gc.run.targetPeriod = gc.mainCfg.targetPeriod - - gc.run.requestUnitConsumptions = make([]*rmpb.RequestUnitItem, ruLen) - for typ := range gc.run.requestUnitConsumptions { - gc.run.requestUnitConsumptions[typ] = &rmpb.RequestUnitItem{ - Type: rmpb.RequestUnitType(typ), - } - } - gc.run.resourceConsumptions = make([]*rmpb.ResourceItem, resourceLen) - for typ := range gc.run.resourceConsumptions { - gc.run.resourceConsumptions[typ] = &rmpb.ResourceItem{ - Type: rmpb.ResourceType(typ), - } - } - - gc.run.lastRequestUnitConsumptions = make([]*rmpb.RequestUnitItem, ruLen) - for typ := range gc.run.lastRequestUnitConsumptions { - gc.run.lastRequestUnitConsumptions[typ] = &rmpb.RequestUnitItem{ - Type: rmpb.RequestUnitType(typ), - } - } - gc.run.lastResourceConsumptions = make([]*rmpb.ResourceItem, resourceLen) - for typ := range gc.run.lastResourceConsumptions { - gc.run.lastResourceConsumptions[typ] = &rmpb.ResourceItem{ - Type: rmpb.ResourceType(typ), - } - } - - switch gc.mode { - case rmpb.GroupMode_RUMode: - gc.run.requestUnitTokens = make(map[rmpb.RequestUnitType]*tokenCounter) - for typ := range requestUnitList { - counter := &tokenCounter{ - limiter: NewLimiter(0, initialRquestUnits, gc.lowRUNotifyChan), - avgRUPerSec: initialRquestUnits / gc.run.targetPeriod.Seconds(), - avgLastTime: now, - } - gc.run.requestUnitTokens[typ] = counter - } - case rmpb.GroupMode_NativeMode: - gc.run.resourceTokens = make(map[rmpb.ResourceType]*tokenCounter) - for typ := range requestResourceList { - counter := &tokenCounter{ - limiter: NewLimiter(0, initialRquestUnits, gc.lowRUNotifyChan), - avgRUPerSec: initialRquestUnits / gc.run.targetPeriod.Seconds(), - avgLastTime: now, - } - gc.run.resourceTokens[typ] = counter - } - } -} - -func (gc *groupCostController) updateRunState(ctx context.Context) { - newTime := time.Now() - deltaResource := make(map[rmpb.ResourceType]float64) - deltaRequestUnit := make(map[rmpb.RequestUnitType]float64) - for _, calc := range gc.calculators { - calc.Trickle(deltaResource, deltaRequestUnit, ctx) - } - gc.mu.Lock() - for typ, detail := range deltaRequestUnit { - gc.mu.requestUnitConsumptions[typ].Value += detail - } - copy(gc.run.requestUnitConsumptions, gc.mu.requestUnitConsumptions) - copy(gc.run.resourceConsumptions, gc.mu.resourceConsumptions) - gc.mu.Unlock() - - // remove tokens - switch gc.mode { - case rmpb.GroupMode_RUMode: - for typ, counter := range gc.run.requestUnitTokens { - v, ok := deltaRequestUnit[typ] - if ok { - value := v - counter.limiter.RemoveTokens(newTime, value) - } - } - case rmpb.GroupMode_NativeMode: - for typ, counter := range gc.run.resourceTokens { - v, ok := deltaResource[typ] - if ok { - value := v - counter.limiter.RemoveTokens(newTime, value) - } - } - } - - log.Info("update run state", zap.Any("request unit comsumption", gc.run.requestUnitConsumptions), zap.Any("resource comsumption", gc.run.resourceConsumptions)) - gc.run.now = newTime -} - -func (gc *groupCostController) updateAvgRequestResourcePerSec(ctx context.Context) { - switch gc.mode { - case rmpb.GroupMode_NativeMode: - gc.updateAvgResourcePerSec(ctx) - case rmpb.GroupMode_RUMode: - gc.updateAvgRUPerSec(ctx) - } -} - -func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) { - switch gc.mode { - case rmpb.GroupMode_NativeMode: - for _, counter := range gc.run.resourceTokens { - select { - case <-counter.setupNotificationCh: - counter.setupNotificationTimer = nil - counter.setupNotificationCh = nil - counter.limiter.SetupNotification(gc.run.now, float64(counter.setupNotificationThreshold)) - gc.updateRunState(ctx) - default: - } - } - case rmpb.GroupMode_RUMode: - for _, counter := range gc.run.requestUnitTokens { - select { - case <-counter.setupNotificationCh: - counter.setupNotificationTimer = nil - counter.setupNotificationCh = nil - counter.limiter.SetupNotification(gc.run.now, float64(counter.setupNotificationThreshold)) - gc.updateRunState(ctx) - default: - } - } - } -} - -func (gc *groupCostController) updateAvgResourcePerSec(ctx context.Context) { - for typ, counter := range gc.run.resourceTokens { - if !gc.calcAvg(counter, gc.run.resourceConsumptions[typ].Value) { - continue - } - log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.resourceGroupName), zap.String("type", rmpb.ResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) - } -} - -func (gc *groupCostController) updateAvgRUPerSec(ctx context.Context) { - for typ, counter := range gc.run.requestUnitTokens { - if !gc.calcAvg(counter, gc.run.resourceConsumptions[typ].Value) { - continue - } - log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.resourceGroupName), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) - } -} - -func (gc *groupCostController) calcAvg(counter *tokenCounter, new float64) bool { - deltaDuration := gc.run.now.Sub(counter.avgLastTime) - if deltaDuration <= 10*time.Millisecond { - return false - } - delta := (new - counter.avgRUPerSecLastRU) / deltaDuration.Seconds() - counter.avgRUPerSec = movingAvgFactor*counter.avgRUPerSec + (1-movingAvgFactor)*delta - counter.avgLastTime = gc.run.now - counter.avgRUPerSecLastRU = new - return true -} - -func (gc *groupCostController) shouldReportConsumption() bool { - for typ := range requestUnitList { - if gc.run.requestUnitConsumptions[typ].Value-gc.run.lastRequestUnitConsumptions[typ].Value >= consumptionsReportingThreshold { - return true - } - } - for typ := range requestResourceList { - if gc.run.resourceConsumptions[typ].Value-gc.run.lastResourceConsumptions[typ].Value >= consumptionsReportingThreshold { - return true - } - } - return false -} - -func (gc *groupCostController) handleTokenBucketResponse(ctx context.Context, resp *rmpb.TokenBucketResponse) { - gc.handleRespFunc(resp) - if !gc.run.initialRequestCompleted { - gc.run.initialRequestCompleted = true - // This is the first successful request. Take back the initial RUs that we - // used to pre-fill the bucket. - for _, counter := range gc.run.resourceTokens { - counter.limiter.RemoveTokens(gc.run.now, initialRquestUnits) - } - } -} - -func (gc *groupCostController) handleResourceTokenResponse(resp *rmpb.TokenBucketResponse) { - for _, grantedTB := range resp.GetGrantedResourceTokens() { - typ := grantedTB.GetType() - // todo: check whether grant = 0 - counter, ok := gc.run.resourceTokens[typ] - if !ok { - log.Warn("not support this resource type", zap.String("type", rmpb.ResourceType_name[int32(typ)])) - continue - } - gc.modifyTokenCounter(counter, grantedTB.GetGrantedTokens(), grantedTB.GetTrickleTimeMs()) - } -} - -func (gc *groupCostController) handleRUTokenResponse(resp *rmpb.TokenBucketResponse) { - for _, grantedTB := range resp.GetGrantedRUTokens() { - typ := grantedTB.GetType() - // todo: check whether grant = 0 - counter, ok := gc.run.requestUnitTokens[typ] - if !ok { - log.Warn("not support this resource type", zap.String("type", rmpb.ResourceType_name[int32(typ)])) - continue - } - gc.modifyTokenCounter(counter, grantedTB.GetGrantedTokens(), grantedTB.GetTrickleTimeMs()) - } -} - -func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket *rmpb.TokenBucket, trickleTimeMs int64) { - granted := bucket.Tokens - remainder := 0. - if !counter.lastDeadline.IsZero() { - // If last request came with a trickle duration, we may have RUs that were - // not made available to the bucket yet; throw them together with the newly - // granted RUs. - if since := counter.lastDeadline.Sub(gc.run.now); since > 0 { - remainder = counter.lastRate * since.Seconds() - } - } - if counter.setupNotificationTimer != nil { - counter.setupNotificationTimer.Stop() - counter.setupNotificationTimer = nil - counter.setupNotificationCh = nil - } - notifyThreshold := granted * notifyFraction - if notifyThreshold < bufferRUs { - notifyThreshold = bufferRUs - } - - var cfg tokenBucketReconfigureArgs - if trickleTimeMs == 0 { - cfg.NewTokens = granted - cfg.NewRate = float64(bucket.GetSettings().Fillrate) - cfg.NewBrust = int(granted + 1) - cfg.NotifyThreshold = notifyThreshold - counter.lastDeadline = time.Time{} - } else { - cfg.NewTokens = remainder - trickleDuration := time.Duration(trickleTimeMs) * time.Millisecond - deadline := gc.run.now.Add(trickleDuration) - cfg.NewRate = float64(bucket.GetSettings().Fillrate) + bucket.Tokens/trickleDuration.Seconds() - - timerDuration := trickleDuration - time.Second - if timerDuration <= 0 { - timerDuration = (trickleDuration + time.Second) / 2 - } - log.Info("QQQ2 ", zap.Duration("timerDuration", timerDuration), zap.Float64("cfg.NewRate", cfg.NewRate)) - counter.setupNotificationTimer = time.NewTimer(timerDuration) - counter.setupNotificationCh = counter.setupNotificationTimer.C - counter.setupNotificationThreshold = notifyThreshold - - counter.lastDeadline = deadline - } - counter.lastRate = cfg.NewRate - counter.limiter.Reconfigure(gc.run.now, cfg) -} - -func (gc *groupCostController) collectRequestAndConsumption(low bool) *rmpb.TokenBucketRequst { - req := &rmpb.TokenBucketRequst{ - ResourceGroupName: gc.resourceGroupName, - } - // collect request resource - selected := !low - switch gc.mode { - case rmpb.GroupMode_NativeMode: - requests := make([]*rmpb.ResourceItem, 0, len(requestResourceList)) - for typ, counter := range gc.run.resourceTokens { - if low && counter.limiter.IsLowTokens() { - selected = true - } - request := &rmpb.ResourceItem{ - Type: typ, - Value: gc.calcRequest(counter), - } - requests = append(requests, request) - } - req.Request = &rmpb.TokenBucketRequst_ResourceItems{ - ResourceItems: &rmpb.TokenBucketRequst_RequestResource{ - RequestResource: requests, - }, - } - case rmpb.GroupMode_RUMode: - requests := make([]*rmpb.RequestUnitItem, 0, len(requestUnitList)) - for typ, counter := range gc.run.requestUnitTokens { - if low && counter.limiter.IsLowTokens() { - selected = true - } - request := &rmpb.RequestUnitItem{ - Type: typ, - Value: gc.calcRequest(counter), - } - requests = append(requests, request) - } - req.Request = &rmpb.TokenBucketRequst_RuItems{ - RuItems: &rmpb.TokenBucketRequst_RequestRU{ - RequestRU: requests, - }, - } - } - if !selected { - return nil - } - - // collect resource consumption - deltaResourceConsumption := make([]*rmpb.ResourceItem, resourceLen) - for typ, cons := range gc.run.resourceConsumptions { - deltaResourceConsumption[typ] = &rmpb.ResourceItem{ - Type: rmpb.ResourceType(typ), - Value: Sub(cons.Value, gc.run.lastResourceConsumptions[typ].Value), - } - } - // collect request unit consumption - deltaRequestUnitConsumption := make([]*rmpb.RequestUnitItem, ruLen) - for typ, cons := range gc.run.requestUnitConsumptions { - deltaRequestUnitConsumption[typ] = &rmpb.RequestUnitItem{ - Type: rmpb.RequestUnitType(typ), - Value: Sub(cons.Value, gc.run.lastRequestUnitConsumptions[typ].Value), - } - } - req.ConsumptionResourceSinceLastRequest = deltaResourceConsumption - req.ConsumptionRUSinceLastRequest = deltaRequestUnitConsumption - - copy(gc.run.lastRequestUnitConsumptions, gc.run.requestUnitConsumptions) - copy(gc.run.lastResourceConsumptions, gc.run.resourceConsumptions) - return req -} - -func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { - value := counter.avgRUPerSec*gc.run.targetPeriod.Seconds() + bufferRUs - value -= float64(counter.limiter.AvailableTokens(gc.run.now)) - if value < 0 { - value = 0 - } - return value -} - -func (gc *groupCostController) OnRequestWait( - ctx context.Context, info RequestInfo, -) error { - deltaResource := make(map[rmpb.ResourceType]float64) - deltaRequestUnit := make(map[rmpb.RequestUnitType]float64) - for _, calc := range gc.calculators { - calc.BeforeKVRequest(deltaResource, deltaRequestUnit, info) - } - var wg sync.WaitGroup - var errReturn error - switch gc.mode { - case rmpb.GroupMode_NativeMode: - wg.Add(len(requestResourceList)) - for typ, counter := range gc.run.resourceTokens { - v, ok := deltaResource[typ] - if ok { - go func(value float64, counter *tokenCounter) { - if ok { - err := counter.limiter.WaitN(ctx, int(v)) - if err != nil { - errReturn = err - } - } - wg.Done() - }(v, counter) - } else { - wg.Done() - } - } - wg.Wait() - if errReturn != nil { - return errReturn - } - gc.mu.Lock() - for typ, detail := range deltaResource { - gc.mu.requestUnitConsumptions[typ].Value += detail - } - gc.mu.Unlock() - case rmpb.GroupMode_RUMode: - wg.Add(len(requestUnitList)) - for typ, counter := range gc.run.requestUnitTokens { - v, ok := deltaRequestUnit[typ] - if ok { - go func(value float64, counter *tokenCounter) { - if ok { - err := counter.limiter.WaitN(ctx, int(v)) - if err != nil { - errReturn = err - } - } - wg.Done() - }(v, counter) - } else { - wg.Done() - } - } - wg.Wait() - if errReturn != nil { - return errReturn - } - gc.mu.Lock() - for typ, detail := range deltaRequestUnit { - gc.mu.resourceConsumptions[typ].Value += detail - } - gc.mu.Unlock() - } - - return nil -} - -func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, resp ResponseInfo) { - deltaResource := make(map[rmpb.ResourceType]float64) - deltaRequestUnit := make(map[rmpb.RequestUnitType]float64) - for _, calc := range gc.calculators { - calc.AfterKVRequest(deltaResource, deltaRequestUnit, req, resp) - } - - switch gc.mode { - case rmpb.GroupMode_NativeMode: - for typ, counter := range gc.run.resourceTokens { - v, ok := deltaResource[typ] - if ok { - counter.limiter.RemoveTokens(time.Now(), float64(v)) - } - } - gc.mu.Lock() - for typ, detail := range deltaResource { - gc.mu.requestUnitConsumptions[typ].Value += detail - } - gc.mu.Unlock() - case rmpb.GroupMode_RUMode: - for typ, counter := range gc.run.requestUnitTokens { - v, ok := deltaRequestUnit[typ] - if ok { - counter.limiter.RemoveTokens(time.Now(), float64(v)) - } - } - gc.mu.Lock() - for typ, detail := range deltaRequestUnit { - gc.mu.resourceConsumptions[typ].Value += detail - } - gc.mu.Unlock() - } -} - -func (c *resourceGroupsController) addDemoResourceGroup(ctx context.Context) error { - setting := &rmpb.GroupSettings{ - Mode: rmpb.GroupMode_RUMode, - RUSettings: &rmpb.GroupRequestUnitSettings{ - RRU: &rmpb.TokenBucket{ - Tokens: 200000, - Settings: &rmpb.TokenLimitSettings{ - Fillrate: 2000, - BurstLimit: 20000000, - }, - }, - WRU: &rmpb.TokenBucket{ - Tokens: 200000, - Settings: &rmpb.TokenLimitSettings{ - Fillrate: 20000, - BurstLimit: 2000000, - }, - }, - }, - } - context, err := c.provider.AddResourceGroup(ctx, "demo", setting) - if err != nil { - return err - } - log.Info("add resource group", zap.String("resp", string(context)), zap.Any("setting", setting)) - return err -} diff --git a/pkg/mcs/resource_manager/tenant_client/config.go b/pkg/mcs/resource_manager/tenant_client/config.go deleted file mode 100644 index 67b84c63e36..00000000000 --- a/pkg/mcs/resource_manager/tenant_client/config.go +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2022 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS,g -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tenantclient - -import ( - "time" - - "github.com/pingcap/kvproto/pkg/resource_manager" -) - -var ruLen = len(resource_manager.RequestUnitType_name) -var resourceLen = len(resource_manager.ResourceType_name) -var requestUnitList map[resource_manager.RequestUnitType]struct{} = map[resource_manager.RequestUnitType]struct{}{ - resource_manager.RequestUnitType_RRU: {}, - resource_manager.RequestUnitType_WRU: {}, -} - -var requestResourceList map[resource_manager.ResourceType]struct{} = map[resource_manager.ResourceType]struct{}{ - resource_manager.ResourceType_ReadBytes: {}, - resource_manager.ResourceType_WriteBytes: {}, - resource_manager.ResourceType_TotalCPUTimeMs: {}, -} - -const initialRquestUnits = 10000 - -const bufferRUs = 5000 - -// movingAvgFactor is the weight applied to a new "sample" of RU usage (with one -// sample per mainLoopUpdateInterval). -// -// If we want a factor of 0.5 per second, this should be: -// -// 0.5^(1 second / mainLoopUpdateInterval) -const movingAvgFactor = 0.5 - -const notifyFraction = 0.1 - -const consumptionsReportingThreshold = 100 - -const extendedReportingPeriodFactor = 4 - -const defaultGroupLoopUpdateInterval = 1 * time.Second -const defaultTargetPeriod = 10 * time.Second -const ( - readRequestCost = 1 - readCostPerByte = 0.5 / 1024 / 1024 - writeRequestCost = 5 - writeCostPerByte = 200. / 1024 / 1024 - readCPUMsCost = 1 - writeCPUMsCost = 1 - sqlCPUSecondCost = 0 -) - -type Config struct { - groupLoopUpdateInterval time.Duration - targetPeriod time.Duration - - ReadRequestCost RequestUnit - ReadBytesCost RequestUnit - ReadCPUMsCost RequestUnit - WriteRequestCost RequestUnit - WriteBytesCost RequestUnit - WriteCPUMsCost RequestUnit - SQLCPUSecondCost RequestUnit -} - -func DefaultConfig() *Config { - cfg := &Config{ - groupLoopUpdateInterval: defaultGroupLoopUpdateInterval, - targetPeriod: defaultTargetPeriod, - ReadRequestCost: RequestUnit(readRequestCost), - ReadBytesCost: RequestUnit(readCostPerByte), - ReadCPUMsCost: RequestUnit(readCPUMsCost), - WriteRequestCost: RequestUnit(writeRequestCost), - WriteBytesCost: RequestUnit(writeCostPerByte), - WriteCPUMsCost: RequestUnit(writeCPUMsCost), - SQLCPUSecondCost: RequestUnit(sqlCPUSecondCost), - } - return cfg -} diff --git a/pkg/mcs/resource_manager/tenant_client/limiter.go b/pkg/mcs/resource_manager/tenant_client/limiter.go deleted file mode 100644 index 22a20ada818..00000000000 --- a/pkg/mcs/resource_manager/tenant_client/limiter.go +++ /dev/null @@ -1,527 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Copyright 2022 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS,g -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tenantclient - -import ( - "context" - "fmt" - "math" - "sync" - "time" - - "github.com/pingcap/log" - "go.uber.org/zap" -) - -// Limit defines the maximum frequency of some events. -// Limit is represented as number of events per second. -// A zero Limit allows no events. -type Limit float64 - -// Inf is the infinite rate limit; it allows all events (even if burst is zero). -const Inf = Limit(math.MaxFloat64) - -// Every converts a minimum time interval between events to a Limit. -func Every(interval time.Duration) Limit { - if interval <= 0 { - return Inf - } - return 1 / Limit(interval.Seconds()) -} - -const burst = 1e8 - -// A Limiter controls how frequently events are allowed to happen. -// It implements a "token bucket" of size b, initially full and refilled -// at rate r tokens per second. -// Informally, in any large enough time interval, the Limiter limits the -// rate to r tokens per second, with a maximum burst size of b events. -// As a special case, if r == Inf (the infinite rate), b is ignored. -// See https://en.wikipedia.org/wiki/Token_bucket for more about token buckets. -// -// The zero value is a valid Limiter, but it will reject all events. -// Use NewLimiter to create non-zero Limiters. -// -// Limiter has three main methods, Allow, Reserve, and Wait. -// Most callers should use Wait. -// -// Each of the three methods consumes a single token. -// They differ in their behavior when no token is available. -// If no token is available, Allow returns false. -// If no token is available, Reserve returns a reservation for a future token -// and the amount of time the caller must wait before using it. -// If no token is available, Wait blocks until one can be obtained -// or its associated context.Context is canceled. -// -// The methods AllowN, ReserveN, and WaitN consume n tokens. -type Limiter struct { - mu sync.Mutex - limit Limit - burst int - tokens float64 - // last is the last time the limiter's tokens field was updated - last time.Time - // lastEvent is the latest time of a rate-limited event (past or future) - lastEvent time.Time - notifyThreshold float64 - lowTokensNotifyChan chan struct{} -} - -// Limit returns the maximum overall event rate. -func (lim *Limiter) Limit() Limit { - lim.mu.Lock() - defer lim.mu.Unlock() - return lim.limit -} - -// Burst returns the maximum burst size. Burst is the maximum number of tokens -// that can be consumed in a single call to Allow, Reserve, or Wait, so higher -// Burst values allow more events to happen at once. -// A zero Burst allows no events, unless limit == Inf. -func (lim *Limiter) Burst() int { - lim.mu.Lock() - defer lim.mu.Unlock() - return lim.burst -} - -// NewLimiter returns a new Limiter that allows events up to rate r and permits -// bursts of at most b tokens. -func NewLimiter(r Limit, tokens float64, lowTokensNotifyChan chan struct{}) *Limiter { - lim := &Limiter{ - limit: r, - last: time.Now(), - tokens: tokens, - burst: burst, - lowTokensNotifyChan: lowTokensNotifyChan, - } - log.Info("new limiter", zap.String("limiter", fmt.Sprintf("%+v", lim))) - return lim -} - -// Allow is shorthand for AllowN(time.Now(), 1). -func (lim *Limiter) Allow() bool { - return lim.AllowN(time.Now(), 1) -} - -// AllowN reports whether n events may happen at time now. -// Use this method if you intend to drop / skip events that exceed the rate limit. -// Otherwise use Reserve or Wait. -func (lim *Limiter) AllowN(now time.Time, n int) bool { - return lim.reserveN(now, n, 0).ok -} - -// A Reservation holds information about events that are permitted by a Limiter to happen after a delay. -// A Reservation may be canceled, which may enable the Limiter to permit additional events. -type Reservation struct { - ok bool - lim *Limiter - tokens int - timeToAct time.Time - // This is the Limit at reservation time, it can change later. - limit Limit -} - -// OK returns whether the limiter can provide the requested number of tokens -// within the maximum wait time. If OK is false, Delay returns InfDuration, and -// Cancel does nothing. -func (r *Reservation) OK() bool { - return r.ok -} - -// Delay is shorthand for DelayFrom(time.Now()). -func (r *Reservation) Delay() time.Duration { - return r.DelayFrom(time.Now()) -} - -// InfDuration is the duration returned by Delay when a Reservation is not OK. -const InfDuration = time.Duration(1<<63 - 1) - -// DelayFrom returns the duration for which the reservation holder must wait -// before taking the reserved action. Zero duration means act immediately. -// InfDuration means the limiter cannot grant the tokens requested in this -// Reservation within the maximum wait time. -func (r *Reservation) DelayFrom(now time.Time) time.Duration { - if !r.ok { - return InfDuration - } - delay := r.timeToAct.Sub(now) - if delay < 0 { - return 0 - } - return delay -} - -// Cancel is shorthand for CancelAt(time.Now()). -func (r *Reservation) Cancel() { - r.CancelAt(time.Now()) -} - -// CancelAt indicates that the reservation holder will not perform the reserved action -// and reverses the effects of this Reservation on the rate limit as much as possible, -// considering that other reservations may have already been made. -func (r *Reservation) CancelAt(now time.Time) { - if !r.ok { - return - } - - r.lim.mu.Lock() - defer r.lim.mu.Unlock() - - if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(now) { - return - } - - // calculate tokens to restore - // The duration between lim.lastEvent and r.timeToAct tells us how many tokens were reserved - // after r was obtained. These tokens should not be restored. - restoreTokens := float64(r.tokens) - r.limit.tokensFromDuration(r.lim.lastEvent.Sub(r.timeToAct)) - if restoreTokens <= 0 { - return - } - // advance time to now - now, _, tokens := r.lim.advance(now) - // calculate new number of tokens - tokens += restoreTokens - - // update state - r.lim.last = now - r.lim.tokens = tokens - if r.timeToAct == r.lim.lastEvent { - prevEvent := r.timeToAct.Add(r.limit.durationFromTokens(float64(-r.tokens))) - if !prevEvent.Before(now) { - r.lim.lastEvent = prevEvent - } - } -} - -// Reserve is shorthand for ReserveN(time.Now(), 1). -func (lim *Limiter) Reserve() *Reservation { - return lim.ReserveN(time.Now(), 1) -} - -// ReserveN returns a Reservation that indicates how long the caller must wait before n events happen. -// The Limiter takes this Reservation into account when allowing future events. -// The returned Reservation’s OK() method returns false if n exceeds the Limiter's burst size. -// Usage example: -// -// r := lim.ReserveN(time.Now(), 1) -// if !r.OK() { -// // Not allowed to act! Did you remember to set lim.burst to be > 0 ? -// return -// } -// time.Sleep(r.Delay()) -// Act() -// -// Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. -// If you need to respect a deadline or cancel the delay, use Wait instead. -// To drop or skip events exceeding rate limit, use Allow instead. -func (lim *Limiter) ReserveN(now time.Time, n int) *Reservation { - r := lim.reserveN(now, n, InfDuration) - return &r -} - -// Wait is shorthand for WaitN(ctx, 1). -func (lim *Limiter) Wait(ctx context.Context) (err error) { - return lim.WaitN(ctx, 1) -} - -// WaitN blocks until lim permits n events to happen. -// It returns an error if n exceeds the Limiter's burst size, the Context is -// canceled, or the expected wait time exceeds the Context's Deadline. -// The burst limit is ignored if the rate limit is Inf. - -// Todo: support float64 n -func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { - lim.mu.Lock() - burst := lim.burst - limit := lim.limit - lim.mu.Unlock() - - if n > burst && limit != Inf { - return fmt.Errorf("rate: Wait(n=%d) exceeds limiter's burst %d", n, burst) - } - // Check if ctx is already cancelled - select { - case <-ctx.Done(): - return ctx.Err() - default: - } - // Determine wait limit - now := time.Now() - waitLimit := InfDuration - if deadline, ok := ctx.Deadline(); ok { - waitLimit = deadline.Sub(now) - } - // Reserve - r := lim.reserveN(now, n, waitLimit) - if !r.ok { - return fmt.Errorf("rate: Wait(n=%d) Burst(b=%d) tokens(t=%f) rate(r=%f) would exceed context deadline", n, burst, lim.tokens, limit) - } - // Wait if necessary - delay := r.DelayFrom(now) - if delay == 0 { - return nil - } - t := time.NewTimer(delay) - defer t.Stop() - if delay > 1000*time.Millisecond { - log.Warn("[tenant controllor] Need wait N", zap.Time("now", now), zap.Duration("delay", delay), zap.Int("n", n)) - } - select { - case <-t.C: - // We can proceed. - return nil - case <-ctx.Done(): - // Context was canceled before we could proceed. Cancel the - // reservation, which may permit other events to proceed sooner. - r.Cancel() - return ctx.Err() - } -} - -// SetLimit is shorthand for SetLimitAt(time.Now(), newLimit). -func (lim *Limiter) SetLimit(newLimit Limit) { - lim.SetLimitAt(time.Now(), newLimit) -} - -// SetLimitAt sets a new Limit for the limiter. The new Limit, and Burst, may be violated -// or underutilized by those which reserved (using Reserve or Wait) but did not yet act -// before SetLimitAt was called. -func (lim *Limiter) SetLimitAt(now time.Time, newLimit Limit) { - select { - case <-lim.lowTokensNotifyChan: - default: - } - lim.mu.Lock() - defer lim.mu.Unlock() - - now, _, tokens := lim.advance(now) - - lim.last = now - lim.tokens = tokens - lim.limit = newLimit - lim.maybeNotify(now) -} - -// SetupNotification enables the notification at the given threshold. -func (lim *Limiter) SetupNotification(now time.Time, threshold float64) { - lim.advance(now) - lim.notifyThreshold = threshold -} - -// notify tries to send a non-blocking notification on notifyCh and disables -// further notifications (until the next Reconfigure or StartNotification). -func (lim *Limiter) notify() { - lim.notifyThreshold = 0 - select { - case lim.lowTokensNotifyChan <- struct{}{}: - default: - } -} - -// maybeNotify checks if it's time to send the notification and if so, performs -// the notification. -func (lim *Limiter) maybeNotify(now time.Time) { - if lim.IsLowTokens() { - lim.notify() - } -} - -func (lim *Limiter) IsLowTokens() bool { - if lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold { - return true - } - return false -} - -// SetBurst is shorthand for SetBurstAt(time.Now(), newBurst). -func (lim *Limiter) SetBurst(newBurst int) { - lim.SetBurstAt(time.Now(), newBurst) -} - -// SetBurstAt sets a new burst size for the limiter. -func (lim *Limiter) SetBurstAt(now time.Time, newBurst int) { - lim.mu.Lock() - defer lim.mu.Unlock() - - now, _, tokens := lim.advance(now) - - lim.last = now - lim.tokens = tokens - lim.burst = newBurst -} - -// RemoveTokens decreases the amount of tokens currently available. -func (lim *Limiter) RemoveTokens(now time.Time, amount float64) { - lim.mu.Lock() - defer lim.mu.Unlock() - now, _, tokens := lim.advance(now) - lim.last = now - lim.tokens = tokens - amount - lim.maybeNotify(now) -} - -type tokenBucketReconfigureArgs struct { - NewTokens float64 - - NewRate float64 - - NewBrust int - - NotifyThreshold float64 -} - -func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) { - select { - case <-lim.lowTokensNotifyChan: - default: - } - lim.mu.Lock() - defer lim.mu.Unlock() - log.Debug("[tenant controllor] before reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) - now, _, tokens := lim.advance(now) - lim.last = now - lim.tokens = tokens + args.NewTokens - lim.limit = Limit(args.NewRate) - lim.notifyThreshold = args.NotifyThreshold - lim.maybeNotify(now) - log.Debug("[tenant controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) -} - -// SetTokens decreases the amount of tokens currently available. -func (lim *Limiter) SetTokens(now time.Time, amount float64) { - select { - case <-lim.lowTokensNotifyChan: - default: - } - lim.mu.Lock() - defer lim.mu.Unlock() - now, _, _ = lim.advance(now) - lim.last = now - lim.tokens = amount -} - -// AvailableTokens decreases the amount of tokens currently available. -func (lim *Limiter) AvailableTokens(now time.Time) float64 { - lim.mu.Lock() - defer lim.mu.Unlock() - _, _, tokens := lim.advance(now) - return tokens -} - -// reserveN is a helper method for AllowN, ReserveN, and WaitN. -// maxFutureReserve specifies the maximum reservation wait duration allowed. -// reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. -func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duration) Reservation { - lim.mu.Lock() - defer lim.mu.Unlock() - - if lim.limit == Inf { - return Reservation{ - ok: true, - lim: lim, - tokens: n, - timeToAct: now, - } - } else if lim.limit == 0 { - // TODO(nolouch), remove burst, just use tokens - var ok bool - if lim.tokens >= float64(n) { - ok = true - lim.tokens -= float64(n) - } - return Reservation{ - ok: ok, - lim: lim, - tokens: int(lim.tokens), - timeToAct: now, - } - } - - now, last, tokens := lim.advance(now) - - // Calculate the remaining number of tokens resulting from the request. - //log.Info("advance token", zap.Float64("tokens", tokens), zap.Float64("new tokens", tokens-float64(n))) - tokens -= float64(n) - lim.maybeNotify(now) - // Calculate the wait duration - var waitDuration time.Duration - if tokens < 0 { - waitDuration = lim.limit.durationFromTokens(-tokens) - } - - // Decide result - ok := n <= lim.burst && waitDuration <= maxFutureReserve - - // Prepare reservation - r := Reservation{ - ok: ok, - lim: lim, - limit: lim.limit, - } - if ok { - r.tokens = n - r.timeToAct = now.Add(waitDuration) - } - // Update state - if ok { - lim.last = now - lim.tokens = tokens - lim.lastEvent = r.timeToAct - } else { - lim.last = last - } - - return r -} - -// advance calculates and returns an updated state for lim resulting from the passage of time. -// lim is not changed. -// advance requires that lim.mu is held. -func (lim *Limiter) advance(now time.Time) (newNow time.Time, newLast time.Time, newTokens float64) { - last := lim.last - if now.Before(last) { - last = now - } - - // Calculate the new number of tokens, due to time that passed. - elapsed := now.Sub(last) - delta := lim.limit.tokensFromDuration(elapsed) - tokens := lim.tokens + delta - return now, last, tokens -} - -// durationFromTokens is a unit conversion function from the number of tokens to the duration -// of time it takes to accumulate them at a rate of limit tokens per second. -func (limit Limit) durationFromTokens(tokens float64) time.Duration { - if limit <= 0 { - return InfDuration - } - seconds := tokens / float64(limit) - return time.Duration(float64(time.Second) * seconds) -} - -// tokensFromDuration is a unit conversion function from a time duration to the number of tokens -// which could be accumulated during that duration at a rate of limit tokens per second. -func (limit Limit) tokensFromDuration(d time.Duration) float64 { - if limit <= 0 { - return 0 - } - return d.Seconds() * float64(limit) -} diff --git a/pkg/mcs/resource_manager/tenant_client/model.go b/pkg/mcs/resource_manager/tenant_client/model.go deleted file mode 100644 index cf8b4e4ac9f..00000000000 --- a/pkg/mcs/resource_manager/tenant_client/model.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2022 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS,g -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tenantclient - -import ( - "context" - - rmpb "github.com/pingcap/kvproto/pkg/resource_manager" -) - -type RequestUnit float64 - -type RequestInfo interface { - IsWrite() bool - WriteBytes() uint64 -} - -type ResponseInfo interface { - ReadBytes() uint64 - KVCPUms() uint64 -} - -func Sub(c float64, other float64) float64 { - if c < other { - return 0 - } else { - return c - other - } -} - -type ResourceCalculator interface { - Trickle(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, context.Context) - BeforeKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, RequestInfo) - AfterKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, RequestInfo, ResponseInfo) -} - -type KVCalculator struct { - *Config -} - -func newKVCalculator(cfg *Config) *KVCalculator { - return &KVCalculator{Config: cfg} -} - -func (dwc *KVCalculator) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, ctx context.Context) { -} - -func (dwc *KVCalculator) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo) { - if req.IsWrite() { - resource[rmpb.ResourceType_KVWriteRPCCount] += 1 - - writeBytes := req.WriteBytes() - resource[rmpb.ResourceType_WriteBytes] += float64(writeBytes) - - ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteRequestCost) - ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteBytesCost) * float64(writeBytes) - } else { - resource[rmpb.ResourceType_KVReadRPCCount] += 1 - ru[rmpb.RequestUnitType_RRU] += float64(dwc.ReadRequestCost) - } -} -func (dwc *KVCalculator) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { - readBytes := res.ReadBytes() - resource[rmpb.ResourceType_ReadBytes] += float64(readBytes) - - ru[rmpb.RequestUnitType_RRU] += float64(readBytes) * float64(dwc.ReadBytesCost) - - kvCPUms := float64(res.KVCPUms()) - resource[rmpb.ResourceType_TotalCPUTimeMs] += kvCPUms - if req.IsWrite() { - ru[rmpb.RequestUnitType_WRU] += kvCPUms * float64(dwc.WriteCPUMsCost) - } else { - ru[rmpb.RequestUnitType_RRU] += kvCPUms * float64(dwc.ReadCPUMsCost) - } -} - -type SQLLayerCPUCalculateor struct { - *Config -} - -func newSQLLayerCPUCalculateor(cfg *Config) *SQLLayerCPUCalculateor { - return &SQLLayerCPUCalculateor{Config: cfg} -} - -func (dsc *SQLLayerCPUCalculateor) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, ctx context.Context) { - // TODO: SQL Layer RU/resource custom - cpuFunc := func(ctx context.Context) float64 { - return 0. - } - cpu := cpuFunc(ctx) - resource[rmpb.ResourceType_TotalCPUTimeMs] += cpu - resource[rmpb.ResourceType_SQLLayerCPUTimeMs] += cpu -} - -func (dsc *SQLLayerCPUCalculateor) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo) { -} -func (dsc *SQLLayerCPUCalculateor) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { -} From 14228c16b0d6594402e7680ab6adfab1599030c7 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 12 Jan 2023 16:56:50 +0800 Subject: [PATCH 05/32] refactor Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 775 +++++++++++++++++++++ pkg/mcs/resource_manager/client/config.go | 87 +++ pkg/mcs/resource_manager/client/limiter.go | 473 +++++++++++++ pkg/mcs/resource_manager/client/model.go | 231 ++++++ 4 files changed, 1566 insertions(+) create mode 100644 pkg/mcs/resource_manager/client/client.go create mode 100644 pkg/mcs/resource_manager/client/config.go create mode 100644 pkg/mcs/resource_manager/client/limiter.go create mode 100644 pkg/mcs/resource_manager/client/model.go diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go new file mode 100644 index 00000000000..fd9a3cfdf8b --- /dev/null +++ b/pkg/mcs/resource_manager/client/client.go @@ -0,0 +1,775 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + "context" + "sync" + "time" + + "github.com/pingcap/errors" + rmpb "github.com/pingcap/kvproto/pkg/resource_manager" + "github.com/pingcap/log" + "go.uber.org/zap" +) + +var defaultWhiteList = map[string]struct{}{"default": {}} + +type ResourceGroupKVInterceptor interface { + OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) error + OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error +} + +type ResourceGroupProvider interface { + ListResourceGroups(ctx context.Context) ([]*rmpb.ResourceGroup, error) + GetResourceGroup(ctx context.Context, resourceGroupName string) (*rmpb.ResourceGroup, error) + AddResourceGroup(ctx context.Context, metaGroup *rmpb.ResourceGroup) (string, error) + ModifyResourceGroup(ctx context.Context, metaGroup *rmpb.ResourceGroup) (string, error) + DeleteResourceGroup(ctx context.Context, resourceGroupName string) (string, error) + AcquireTokenBuckets(ctx context.Context, request *rmpb.TokenBucketsRequest) ([]*rmpb.TokenBucketResponse, error) +} + +func NewResourceGroupController( + clientUniqueId uint64, + provider ResourceGroupProvider, +) (*resourceGroupsController, error) { + return newResourceGroupController(clientUniqueId, provider) +} + +var _ ResourceGroupKVInterceptor = (*resourceGroupsController)(nil) + +type resourceGroupsController struct { + clientUniqueId uint64 + provider ResourceGroupProvider + groupsController sync.Map + config *Config + + calculators []ResourceCalculator + + // tokenResponseChan receives token bucket response from server. + // And it handles all resource group and runs in main loop + tokenResponseChan chan []*rmpb.TokenBucketResponse + + // lowTokenNotifyChan receives chan notification when the number of available token is low + lowTokenNotifyChan chan struct{} + + run struct { + now time.Time + lastRequestTime time.Time + + // requestInProgress is true if we are in the process of sending a request. + // It gets set to false when we receives the response in the main loop, + // even in error cases. + requestInProgress bool + + // requestNeedsRetry is set if the last token bucket request encountered an + // error. This triggers a retry attempt on the next tick. + // + // Note: requestNeedsRetry and requestInProgress are never true at the same time. + requestNeedsRetry bool + + // targetPeriod indicate how long it is expected to cost token when acquiring token. + // last update. + targetPeriod time.Duration + } +} + +func newResourceGroupController(clientUniqueId uint64, provider ResourceGroupProvider) (*resourceGroupsController, error) { + config := DefaultConfig() + return &resourceGroupsController{ + clientUniqueId: clientUniqueId, + provider: provider, + config: DefaultConfig(), + lowTokenNotifyChan: make(chan struct{}, 1), + tokenResponseChan: make(chan []*rmpb.TokenBucketResponse, 1), + calculators: []ResourceCalculator{newKVCalculator(config), newSQLLayerCPUCalculateor(config)}, + }, nil +} + +func (c *resourceGroupsController) Start(ctx context.Context) error { + if err := c.updateAllResourceGroups(ctx); err != nil { + log.Error("update ResourceGroup failed", zap.Error(err)) + } + c.initRunState(ctx) + go c.mainLoop(ctx) + return nil +} + +func (c *resourceGroupsController) putResourceGroup(ctx context.Context, name string) (*groupCostController, error) { + group, err := c.provider.GetResourceGroup(ctx, name) + if err != nil { + return nil, err + } + log.Info("create resource group cost controller", zap.String("name", group.GetName())) + gc := newGroupCostController(ctx, group, c.config, c.lowTokenNotifyChan) + c.groupsController.Store(group.GetName(), gc) + gc.initRunState(ctx) + return gc, nil +} + +func (c *resourceGroupsController) updateAllResourceGroups(ctx context.Context) error { + groups, err := c.provider.ListResourceGroups(ctx) + if err != nil { + return err + } + lastedGroups := make(map[string]struct{}) + for _, group := range groups { + log.Info("create resource group cost controller", zap.String("name", group.GetName())) + gc := newGroupCostController(ctx, group, c.config, c.lowTokenNotifyChan) + c.groupsController.Store(group.GetName(), gc) + lastedGroups[group.GetName()] = struct{}{} + } + c.groupsController.Range(func(key, value any) bool { + resourceGroupName := key.(string) + if _, ok := lastedGroups[resourceGroupName]; !ok { + c.groupsController.Delete(key) + } + return true + }) + return nil +} + +func (c *resourceGroupsController) initRunState(ctx context.Context) { + now := time.Now() + c.run.now = now + c.run.lastRequestTime = now + c.run.targetPeriod = c.config.targetPeriod + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + gc.initRunState(ctx) + return true + }) +} + +func (c *resourceGroupsController) updateRunState(ctx context.Context) { + c.run.now = time.Now() + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + gc.updateRunState(ctx) + return true + }) +} + +func (c *resourceGroupsController) shouldReportConsumption() bool { + if c.run.requestInProgress { + return false + } + timeSinceLastRequest := c.run.now.Sub(c.run.lastRequestTime) + if timeSinceLastRequest >= c.run.targetPeriod { + if timeSinceLastRequest >= extendedReportingPeriodFactor*c.run.targetPeriod { + return true + } + ret := false + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + ret = ret || gc.shouldReportConsumption() + return !ret + }) + } + return false +} + +func (c *resourceGroupsController) updateAvgRequestResourcePerSec(ctx context.Context) { + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + gc.updateAvgRequestResourcePerSec(ctx) + return true + }) +} + +func (c *resourceGroupsController) handleTokenBucketResponse(ctx context.Context, resp []*rmpb.TokenBucketResponse) { + for _, res := range resp { + name := res.GetResourceGroupName() + v, ok := c.groupsController.Load(name) + if !ok { + log.Warn("A non-existent resource group was found when handle token response.", zap.String("name", name)) + } + gc := v.(*groupCostController) + gc.handleTokenBucketResponse(ctx, res) + } +} + +func (c *resourceGroupsController) collectTokenBucketRequests(ctx context.Context, source string, low bool) { + requests := make([]*rmpb.TokenBucketRequest, 0) + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + request := gc.collectRequestAndConsumption(low) + if request != nil { + requests = append(requests, request) + } + return true + }) + if len(requests) > 0 { + c.sendTokenBucketRequests(ctx, requests, source) + } +} + +func (c *resourceGroupsController) sendTokenBucketRequests(ctx context.Context, requests []*rmpb.TokenBucketRequest, source string) { + now := time.Now() + c.run.lastRequestTime = now + c.run.requestInProgress = true + req := &rmpb.TokenBucketsRequest{ + Requests: requests, + TargetRequestPeriodMs: uint64(c.config.targetPeriod / time.Millisecond), + } + go func() { + log.Info("[resource group controllor] send token bucket request", zap.Time("now", now), zap.Any("req", req.Requests), zap.String("source", source)) + resp, err := c.provider.AcquireTokenBuckets(ctx, req) + if err != nil { + // Don't log any errors caused by the stopper canceling the context. + if !errors.ErrorEqual(err, context.Canceled) { + log.L().Sugar().Infof("TokenBucket RPC error: %v", err) + } + resp = nil + } + log.Info("[resource group controllor] token bucket response", zap.Time("now", time.Now()), zap.Any("resp", resp), zap.String("source", source), zap.Duration("latency", time.Since(now))) + c.tokenResponseChan <- resp + }() +} + +func (c *resourceGroupsController) handleTokenBucketTrickEvent(ctx context.Context) { + c.groupsController.Range(func(name, value any) bool { + gc := value.(*groupCostController) + gc.handleTokenBucketTrickEvent(ctx) + return true + }) +} + +func (c *resourceGroupsController) mainLoop(ctx context.Context) { + interval := c.config.groupLoopUpdateInterval + ticker := time.NewTicker(interval) + defer ticker.Stop() + + c.updateRunState(ctx) + c.collectTokenBucketRequests(ctx, "init", false /* select all */) + + for { + select { + case <-ctx.Done(): + return + case resp := <-c.tokenResponseChan: + c.run.requestInProgress = false + if resp != nil { + c.updateRunState(ctx) + c.handleTokenBucketResponse(ctx, resp) + } else { + // A nil response indicates a failure (which would have been logged). + c.run.requestNeedsRetry = true + } + case <-ticker.C: + c.updateRunState(ctx) + c.updateAvgRequestResourcePerSec(ctx) + if c.run.requestNeedsRetry || c.shouldReportConsumption() { + c.run.requestNeedsRetry = false + c.collectTokenBucketRequests(ctx, "report", false /* select all */) + } + case <-c.lowTokenNotifyChan: + c.updateRunState(ctx) + if !c.run.requestInProgress { + c.collectTokenBucketRequests(ctx, "low_ru", true /* only select low tokens resource group */) + } + default: + c.handleTokenBucketTrickEvent(ctx) + } + + } +} + +func (c *resourceGroupsController) OnRequestWait( + ctx context.Context, resourceGroupName string, info RequestInfo, +) (err error) { + if _, ok := defaultWhiteList[resourceGroupName]; ok { + return nil + } + var gc *groupCostController + if tmp, ok := c.groupsController.Load(resourceGroupName); ok { + gc = tmp.(*groupCostController) + } else { + gc, err = c.putResourceGroup(ctx, resourceGroupName) + if err != nil { + return errors.Errorf("[resource group] resourceGroupName %s is not existed.", resourceGroupName) + } + } + err = gc.OnRequestWait(ctx, info) + return err +} + +func (c *resourceGroupsController) OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { + if _, ok := defaultWhiteList[resourceGroupName]; ok { + return nil + } + tmp, ok := c.groupsController.Load(resourceGroupName) + if !ok { + log.Warn("[resource group] resourceGroupName is not existed.", zap.String("resourceGroupName", resourceGroupName)) + } + gc := tmp.(*groupCostController) + gc.OnResponse(ctx, req, resp) + return nil +} + +type groupCostController struct { + *rmpb.ResourceGroup + mainCfg *Config + calculators []ResourceCalculator + mode rmpb.GroupMode + + handleRespFunc func(*rmpb.TokenBucketResponse) + + mu struct { + sync.Mutex + consumption *rmpb.Consumption + } + + lowRUNotifyChan chan struct{} + // run contains the state that is updated by the main loop. + run struct { + now time.Time + + // targetPeriod stores the value of the TargetPeriodSetting setting at the + // last update. + targetPeriod time.Duration + + // consumptions stores the last value of mu.consumption. + // requestUnitConsumptions []*rmpb.RequestUnitItem + // resourceConsumptions []*rmpb.ResourceItem + consumption *rmpb.Consumption + + // lastRequestUnitConsumptions []*rmpb.RequestUnitItem + // lastResourceConsumptions []*rmpb.ResourceItem + lastRequestConsumption *rmpb.Consumption + + // initialRequestCompleted is set to true when the first token bucket + // request completes successfully. + initialRequestCompleted bool + + resourceTokens map[rmpb.ResourceType]*tokenCounter + requestUnitTokens map[rmpb.RequestUnitType]*tokenCounter + } +} + +type tokenCounter struct { + // avgRUPerSec is an exponentially-weighted moving average of the RU + // consumption per second; used to estimate the RU requirements for the next + // request. + avgRUPerSec float64 + // lastSecRU is the consumption.RU value when avgRUPerSec was last updated. + avgRUPerSecLastRU float64 + avgLastTime time.Time + + setupNotificationCh <-chan time.Time + setupNotificationThreshold float64 + setupNotificationTimer *time.Timer + + lastDeadline time.Time + lastRate float64 + + limiter *Limiter +} + +func newGroupCostController(ctx context.Context, group *rmpb.ResourceGroup, mainCfg *Config, lowRUNotifyChan chan struct{}) *groupCostController { + gc := &groupCostController{ + ResourceGroup: group, + mainCfg: mainCfg, + calculators: []ResourceCalculator{newKVCalculator(mainCfg), newSQLLayerCPUCalculateor(mainCfg)}, + mode: group.GetMode(), + lowRUNotifyChan: lowRUNotifyChan, + } + + switch gc.mode { + case rmpb.GroupMode_RUMode: + gc.handleRespFunc = gc.handleRUTokenResponse + case rmpb.GroupMode_RawMode: + gc.handleRespFunc = gc.handleResourceTokenResponse + } + + gc.mu.consumption = &rmpb.Consumption{} + return gc +} + +func (gc *groupCostController) initRunState(ctx context.Context) { + now := time.Now() + gc.run.now = now + gc.run.targetPeriod = gc.mainCfg.targetPeriod + + gc.run.consumption = &rmpb.Consumption{} + + gc.run.lastRequestConsumption = &rmpb.Consumption{} + + switch gc.mode { + case rmpb.GroupMode_RUMode: + gc.run.requestUnitTokens = make(map[rmpb.RequestUnitType]*tokenCounter) + for typ := range requestUnitList { + counter := &tokenCounter{ + limiter: NewLimiter(0, initialRquestUnits, gc.lowRUNotifyChan), + avgRUPerSec: initialRquestUnits / gc.run.targetPeriod.Seconds(), + avgLastTime: now, + } + gc.run.requestUnitTokens[typ] = counter + } + case rmpb.GroupMode_RawMode: + gc.run.resourceTokens = make(map[rmpb.ResourceType]*tokenCounter) + for typ := range requestResourceList { + counter := &tokenCounter{ + limiter: NewLimiter(0, initialRquestUnits, gc.lowRUNotifyChan), + avgRUPerSec: initialRquestUnits / gc.run.targetPeriod.Seconds(), + avgLastTime: now, + } + gc.run.resourceTokens[typ] = counter + } + } +} + +func (gc *groupCostController) updateRunState(ctx context.Context) { + newTime := time.Now() + deltaConsumption := &rmpb.Consumption{} + for _, calc := range gc.calculators { + calc.Trickle(deltaConsumption, ctx) + } + gc.mu.Lock() + Add(gc.mu.consumption, deltaConsumption) + *gc.run.consumption = *gc.mu.consumption + gc.mu.Unlock() + // remove tokens + switch gc.mode { + case rmpb.GroupMode_RUMode: + for typ, counter := range gc.run.requestUnitTokens { + if v := GetRUValueFromConsumption(deltaConsumption, typ); v > 0 { + counter.limiter.RemoveTokens(newTime, v) + } + } + case rmpb.GroupMode_RawMode: + for typ, counter := range gc.run.resourceTokens { + if v := GetResourceValueFromConsumption(deltaConsumption, typ); v > 0 { + counter.limiter.RemoveTokens(newTime, v) + } + } + } + log.Info("update run state", zap.Any("request unit comsumption", gc.run.consumption)) + gc.run.now = newTime +} + +func (gc *groupCostController) updateAvgRequestResourcePerSec(ctx context.Context) { + switch gc.mode { + case rmpb.GroupMode_RawMode: + gc.updateAvgResourcePerSec(ctx) + case rmpb.GroupMode_RUMode: + gc.updateAvgRUPerSec(ctx) + } +} + +func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) { + switch gc.mode { + case rmpb.GroupMode_RawMode: + for _, counter := range gc.run.resourceTokens { + select { + case <-counter.setupNotificationCh: + counter.setupNotificationTimer = nil + counter.setupNotificationCh = nil + counter.limiter.SetupNotificationAt(gc.run.now, float64(counter.setupNotificationThreshold)) + gc.updateRunState(ctx) + default: + } + } + case rmpb.GroupMode_RUMode: + for _, counter := range gc.run.requestUnitTokens { + select { + case <-counter.setupNotificationCh: + counter.setupNotificationTimer = nil + counter.setupNotificationCh = nil + counter.limiter.SetupNotificationAt(gc.run.now, float64(counter.setupNotificationThreshold)) + gc.updateRunState(ctx) + default: + } + } + } +} + +func (gc *groupCostController) updateAvgResourcePerSec(ctx context.Context) { + for typ, counter := range gc.run.resourceTokens { + if !gc.calcAvg(counter, GetResourceValueFromConsumption(gc.run.consumption, typ)) { + continue + } + log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.ResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) + } +} + +func (gc *groupCostController) updateAvgRUPerSec(ctx context.Context) { + for typ, counter := range gc.run.requestUnitTokens { + if !gc.calcAvg(counter, GetRUValueFromConsumption(gc.run.consumption, typ)) { + continue + } + log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) + } +} + +func (gc *groupCostController) calcAvg(counter *tokenCounter, new float64) bool { + deltaDuration := gc.run.now.Sub(counter.avgLastTime) + if deltaDuration <= 10*time.Millisecond { + return false + } + delta := (new - counter.avgRUPerSecLastRU) / deltaDuration.Seconds() + counter.avgRUPerSec = movingAvgFactor*counter.avgRUPerSec + (1-movingAvgFactor)*delta + counter.avgLastTime = gc.run.now + counter.avgRUPerSecLastRU = new + return true +} + +func (gc *groupCostController) shouldReportConsumption() bool { + switch gc.Mode { + case rmpb.GroupMode_RUMode: + for typ := range requestUnitList { + if GetRUValueFromConsumption(gc.run.consumption, typ)-GetRUValueFromConsumption(gc.run.lastRequestConsumption, typ) >= consumptionsReportingThreshold { + return true + } + } + case rmpb.GroupMode_RawMode: + for typ := range requestResourceList { + if GetResourceValueFromConsumption(gc.run.consumption, typ)-GetResourceValueFromConsumption(gc.run.lastRequestConsumption, typ) >= consumptionsReportingThreshold { + return true + } + } + } + return false +} + +func (gc *groupCostController) handleTokenBucketResponse(ctx context.Context, resp *rmpb.TokenBucketResponse) { + gc.handleRespFunc(resp) + if !gc.run.initialRequestCompleted { + gc.run.initialRequestCompleted = true + // This is the first successful request. Take back the initial RUs that we + // used to pre-fill the bucket. + for _, counter := range gc.run.resourceTokens { + counter.limiter.RemoveTokens(gc.run.now, initialRquestUnits) + } + } +} + +func (gc *groupCostController) handleResourceTokenResponse(resp *rmpb.TokenBucketResponse) { + for _, grantedTB := range resp.GetGrantedResourceTokens() { + typ := grantedTB.GetType() + // todo: check whether grant = 0 + counter, ok := gc.run.resourceTokens[typ] + if !ok { + log.Warn("not support this resource type", zap.String("type", rmpb.ResourceType_name[int32(typ)])) + continue + } + gc.modifyTokenCounter(counter, grantedTB.GetGrantedTokens(), grantedTB.GetTrickleTimeMs()) + } +} + +func (gc *groupCostController) handleRUTokenResponse(resp *rmpb.TokenBucketResponse) { + for _, grantedTB := range resp.GetGrantedRUTokens() { + typ := grantedTB.GetType() + // todo: check whether grant = 0 + counter, ok := gc.run.requestUnitTokens[typ] + if !ok { + log.Warn("not support this resource type", zap.String("type", rmpb.ResourceType_name[int32(typ)])) + continue + } + gc.modifyTokenCounter(counter, grantedTB.GetGrantedTokens(), grantedTB.GetTrickleTimeMs()) + } +} + +func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket *rmpb.TokenBucket, trickleTimeMs int64) { + granted := bucket.Tokens + remainder := 0. + if !counter.lastDeadline.IsZero() { + // If last request came with a trickle duration, we may have RUs that were + // not made available to the bucket yet; throw them together with the newly + // granted RUs. + if since := counter.lastDeadline.Sub(gc.run.now); since > 0 { + remainder = counter.lastRate * since.Seconds() + } + } + if counter.setupNotificationTimer != nil { + counter.setupNotificationTimer.Stop() + counter.setupNotificationTimer = nil + counter.setupNotificationCh = nil + } + notifyThreshold := granted * notifyFraction + if notifyThreshold < bufferRUs { + notifyThreshold = bufferRUs + } + + var cfg tokenBucketReconfigureArgs + if trickleTimeMs == 0 { + cfg.NewTokens = granted + cfg.NewRate = float64(bucket.GetSettings().FillRate) + cfg.NotifyThreshold = notifyThreshold + counter.lastDeadline = time.Time{} + } else { + cfg.NewTokens = remainder + trickleDuration := time.Duration(trickleTimeMs) * time.Millisecond + deadline := gc.run.now.Add(trickleDuration) + cfg.NewRate = float64(bucket.GetSettings().FillRate) + bucket.Tokens/trickleDuration.Seconds() + + timerDuration := trickleDuration - time.Second + if timerDuration <= 0 { + timerDuration = (trickleDuration + time.Second) / 2 + } + log.Info("QQQ2 ", zap.Duration("timerDuration", timerDuration), zap.Float64("cfg.NewRate", cfg.NewRate)) + counter.setupNotificationTimer = time.NewTimer(timerDuration) + counter.setupNotificationCh = counter.setupNotificationTimer.C + counter.setupNotificationThreshold = notifyThreshold + + counter.lastDeadline = deadline + } + counter.lastRate = cfg.NewRate + counter.limiter.Reconfigure(gc.run.now, cfg) +} + +func (gc *groupCostController) collectRequestAndConsumption(low bool) *rmpb.TokenBucketRequest { + req := &rmpb.TokenBucketRequest{ + ResourceGroupName: gc.ResourceGroup.GetName(), + } + // collect request resource + selected := !low + switch gc.mode { + case rmpb.GroupMode_RawMode: + requests := make([]*rmpb.ResourceItem, 0, len(requestResourceList)) + for typ, counter := range gc.run.resourceTokens { + if low && counter.limiter.IsLowTokens() { + selected = true + } + request := &rmpb.ResourceItem{ + Type: typ, + Value: gc.calcRequest(counter), + } + requests = append(requests, request) + } + req.Request = &rmpb.TokenBucketRequest_ResourceItems{ + ResourceItems: &rmpb.TokenBucketRequest_RequestResource{ + RequestResource: requests, + }, + } + case rmpb.GroupMode_RUMode: + requests := make([]*rmpb.RequestUnitItem, 0, len(requestUnitList)) + for typ, counter := range gc.run.requestUnitTokens { + if low && counter.limiter.IsLowTokens() { + selected = true + } + request := &rmpb.RequestUnitItem{ + Type: typ, + Value: gc.calcRequest(counter), + } + requests = append(requests, request) + } + req.Request = &rmpb.TokenBucketRequest_RuItems{ + RuItems: &rmpb.TokenBucketRequest_RequestRU{ + RequestRU: requests, + }, + } + } + if !selected { + return nil + } + + deltaConsumption := &rmpb.Consumption{} + *deltaConsumption = *gc.run.consumption + Sub(deltaConsumption, gc.run.lastRequestConsumption) + req.ConsumptionSinceLastRequest = deltaConsumption + + *gc.run.lastRequestConsumption = *gc.run.consumption + return req +} + +func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { + value := counter.avgRUPerSec*gc.run.targetPeriod.Seconds() + bufferRUs + value -= float64(counter.limiter.AvailableTokens(gc.run.now)) + if value < 0 { + value = 0 + } + return value +} + +func (gc *groupCostController) OnRequestWait( + ctx context.Context, info RequestInfo, +) error { + delta := &rmpb.Consumption{} + for _, calc := range gc.calculators { + calc.BeforeKVRequest(delta, info) + } + var wg sync.WaitGroup + var errReturn error + switch gc.mode { + case rmpb.GroupMode_RawMode: + wg.Add(len(requestResourceList)) + for typ, counter := range gc.run.resourceTokens { + if v := GetResourceValueFromConsumption(delta, typ); v > 0 { + go func(value float64, counter *tokenCounter) { + err := counter.limiter.WaitN(ctx, int(v)) + if err != nil { + errReturn = err + } + wg.Done() + }(v, counter) + } else { + wg.Done() + } + } + wg.Wait() + if errReturn != nil { + return errReturn + } + case rmpb.GroupMode_RUMode: + wg.Add(len(requestUnitList)) + for typ, counter := range gc.run.requestUnitTokens { + if v := GetRUValueFromConsumption(delta, typ); v > 0 { + go func(value float64, counter *tokenCounter) { + err := counter.limiter.WaitN(ctx, int(v)) + if err != nil { + errReturn = err + } + wg.Done() + }(v, counter) + } else { + wg.Done() + } + } + wg.Wait() + if errReturn != nil { + return errReturn + } + } + gc.mu.Lock() + Add(gc.mu.consumption, delta) + gc.mu.Unlock() + return nil +} + +func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, resp ResponseInfo) { + delta := &rmpb.Consumption{} + for _, calc := range gc.calculators { + calc.AfterKVRequest(delta, req, resp) + } + + switch gc.mode { + case rmpb.GroupMode_RawMode: + for typ, counter := range gc.run.resourceTokens { + if v := GetResourceValueFromConsumption(delta, typ); v > 0 { + counter.limiter.RemoveTokens(time.Now(), float64(v)) + } + } + case rmpb.GroupMode_RUMode: + for typ, counter := range gc.run.requestUnitTokens { + if v := GetRUValueFromConsumption(delta, typ); v > 0 { + counter.limiter.RemoveTokens(time.Now(), float64(v)) + } + } + } + gc.mu.Lock() + Add(gc.mu.consumption, delta) + gc.mu.Unlock() +} diff --git a/pkg/mcs/resource_manager/client/config.go b/pkg/mcs/resource_manager/client/config.go new file mode 100644 index 00000000000..67e8f68f6d6 --- /dev/null +++ b/pkg/mcs/resource_manager/client/config.go @@ -0,0 +1,87 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + "time" + + "github.com/pingcap/kvproto/pkg/resource_manager" +) + +var requestUnitList map[resource_manager.RequestUnitType]struct{} = map[resource_manager.RequestUnitType]struct{}{ + resource_manager.RequestUnitType_RRU: {}, + resource_manager.RequestUnitType_WRU: {}, +} + +var requestResourceList map[resource_manager.ResourceType]struct{} = map[resource_manager.ResourceType]struct{}{ + resource_manager.ResourceType_IOReadFlow: {}, + resource_manager.ResourceType_IOWriteFlow: {}, + resource_manager.ResourceType_CPU: {}, +} + +const initialRquestUnits = 10000 + +const bufferRUs = 5000 + +// movingAvgFactor is the weight applied to a new "sample" of RU usage (with one +// sample per mainLoopUpdateInterval). +// +// If we want a factor of 0.5 per second, this should be: +// +// 0.5^(1 second / mainLoopUpdateInterval) +const movingAvgFactor = 0.5 + +const notifyFraction = 0.1 + +const consumptionsReportingThreshold = 100 + +const extendedReportingPeriodFactor = 4 + +const defaultGroupLoopUpdateInterval = 1 * time.Second +const defaultTargetPeriod = 10 * time.Second +const ( + readRequestCost = 1 + readCostPerByte = 0.5 / 1024 / 1024 + writeRequestCost = 5 + writeCostPerByte = 200. / 1024 / 1024 + kvCPUMsCost = 1 + sqlCPUSecondCost = 0 +) + +type Config struct { + groupLoopUpdateInterval time.Duration + targetPeriod time.Duration + + ReadRequestCost RequestUnit + ReadBytesCost RequestUnit + WriteRequestCost RequestUnit + WriteBytesCost RequestUnit + KVCPUMsCost RequestUnit + SQLCPUSecondCost RequestUnit +} + +func DefaultConfig() *Config { + cfg := &Config{ + groupLoopUpdateInterval: defaultGroupLoopUpdateInterval, + targetPeriod: defaultTargetPeriod, + ReadRequestCost: RequestUnit(readRequestCost), + ReadBytesCost: RequestUnit(readCostPerByte), + WriteRequestCost: RequestUnit(writeRequestCost), + WriteBytesCost: RequestUnit(writeCostPerByte), + KVCPUMsCost: RequestUnit(kvCPUMsCost), + SQLCPUSecondCost: RequestUnit(sqlCPUSecondCost), + } + return cfg +} diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go new file mode 100644 index 00000000000..75d83dd26d0 --- /dev/null +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -0,0 +1,473 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + "context" + "fmt" + "math" + "sync" + "time" + + "github.com/pingcap/log" + "go.uber.org/zap" +) + +// Limit defines the maximum frequency of some events. +// Limit is represented as number of events per second. +// A zero Limit allows no events. +type Limit float64 + +// Inf is the infinite rate limit; it allows all events (even if burst is zero). +const Inf = Limit(math.MaxFloat64) + +// Every converts a minimum time interval between events to a Limit. +func Every(interval time.Duration) Limit { + if interval <= 0 { + return Inf + } + return 1 / Limit(interval.Seconds()) +} + +const maxRequestTokens = 1e8 + +// A Limiter controls how frequently events are allowed to happen. +// It implements a "token bucket" of size b, initially full and refilled +// at rate r tokens per second. +// Informally, in any large enough time interval, the Limiter limits the +// rate to r tokens per second, with a maximum burst size of b events. +// As a special case, if r == Inf (the infinite rate), b is ignored. +// See https://en.wikipedia.org/wiki/Token_bucket for more about token buckets. +// +// The zero value is a valid Limiter, but it will reject all events. +// Use NewLimiter to create non-zero Limiters. +// +// Limiter has three main methods, Allow, Reserve, and Wait. +// Most callers should use Wait. +// +// Each of the three methods consumes a single token. +// They differ in their behavior when no token is available. +// If no token is available, Allow returns false. +// If no token is available, Reserve returns a reservation for a future token +// and the amount of time the caller must wait before using it. +// If no token is available, Wait blocks until one can be obtained +// or its associated context.Context is canceled. +// +// The methods AllowN, ReserveN, and WaitN consume n tokens. +type Limiter struct { + mu sync.Mutex + limit Limit + tokens float64 + // last is the last time the limiter's tokens field was updated + last time.Time + // lastEvent is the latest time of a rate-limited event (past or future) + lastEvent time.Time + notifyThreshold float64 + lowTokensNotifyChan chan struct{} +} + +// Limit returns the maximum overall event rate. +func (lim *Limiter) Limit() Limit { + lim.mu.Lock() + defer lim.mu.Unlock() + return lim.limit +} + +// NewLimiter returns a new Limiter that allows events up to rate r and permits +// bursts of at most b tokens. +func NewLimiter(r Limit, tokens float64, lowTokensNotifyChan chan struct{}) *Limiter { + lim := &Limiter{ + limit: r, + last: time.Now(), + tokens: tokens, + lowTokensNotifyChan: lowTokensNotifyChan, + } + log.Info("new limiter", zap.String("limiter", fmt.Sprintf("%+v", lim))) + return lim +} + +// A Reservation holds information about events that are permitted by a Limiter to happen after a delay. +// A Reservation may be canceled, which may enable the Limiter to permit additional events. +type Reservation struct { + ok bool + lim *Limiter + tokens int + timeToAct time.Time + // This is the Limit at reservation time, it can change later. + limit Limit +} + +// OK returns whether the limiter can provide the requested number of tokens +// within the maximum wait time. If OK is false, Delay returns InfDuration, and +// Cancel does nothing. +func (r *Reservation) OK() bool { + return r.ok +} + +// Delay is shorthand for DelayFrom(time.Now()). +func (r *Reservation) Delay() time.Duration { + return r.DelayFrom(time.Now()) +} + +// InfDuration is the duration returned by Delay when a Reservation is not OK. +const InfDuration = time.Duration(1<<63 - 1) + +// DelayFrom returns the duration for which the reservation holder must wait +// before taking the reserved action. Zero duration means act immediately. +// InfDuration means the limiter cannot grant the tokens requested in this +// Reservation within the maximum wait time. +func (r *Reservation) DelayFrom(now time.Time) time.Duration { + if !r.ok { + return InfDuration + } + delay := r.timeToAct.Sub(now) + if delay < 0 { + return 0 + } + return delay +} + +// Cancel is shorthand for CancelAt(time.Now()). +func (r *Reservation) Cancel() { + r.CancelAt(time.Now()) +} + +// CancelAt indicates that the reservation holder will not perform the reserved action +// and reverses the effects of this Reservation on the rate limit as much as possible, +// considering that other reservations may have already been made. +func (r *Reservation) CancelAt(now time.Time) { + if !r.ok { + return + } + + r.lim.mu.Lock() + defer r.lim.mu.Unlock() + + if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(now) { + return + } + + // calculate tokens to restore + // The duration between lim.lastEvent and r.timeToAct tells us how many tokens were reserved + // after r was obtained. These tokens should not be restored. + restoreTokens := float64(r.tokens) - r.limit.tokensFromDuration(r.lim.lastEvent.Sub(r.timeToAct)) + if restoreTokens <= 0 { + return + } + // advance time to now + now, _, tokens := r.lim.advance(now) + // calculate new number of tokens + tokens += restoreTokens + + // update state + r.lim.last = now + r.lim.tokens = tokens + if r.timeToAct == r.lim.lastEvent { + prevEvent := r.timeToAct.Add(r.limit.durationFromTokens(float64(-r.tokens))) + if !prevEvent.Before(now) { + r.lim.lastEvent = prevEvent + } + } +} + +// ReserveN returns a Reservation that indicates how long the caller must wait before n events happen. +// The Limiter takes this Reservation into account when allowing future events. +// The returned Reservation’s OK() method returns false if n exceeds the Limiter's burst size. +// Usage example: +// +// r := lim.ReserveN(time.Now(), 1) +// if !r.OK() { +// // Not allowed to act! Did you remember to set lim.burst to be > 0 ? +// return +// } +// time.Sleep(r.Delay()) +// Act() +// +// Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. +// If you need to respect a deadline or cancel the delay, use Wait instead. +// To drop or skip events exceeding rate limit, use Allow instead. +func (lim *Limiter) ReserveN(now time.Time, n int) *Reservation { + r := lim.reserveN(now, n, InfDuration) + return &r +} + +// WaitN blocks until lim permits n events to happen. +// It returns an error if n exceeds the Limiter's burst size, the Context is +// canceled, or the expected wait time exceeds the Context's Deadline. +// The burst limit is ignored if the rate limit is Inf. + +// Todo: support float64 n +func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { + lim.mu.Lock() + limit := lim.limit + lim.mu.Unlock() + + if n > maxRequestTokens && limit != Inf { + return fmt.Errorf("rate: Wait(n=%d) exceeds limiter's max request token %f", n, maxRequestTokens) + } + // Check if ctx is already cancelled + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + // Determine wait limit + now := time.Now() + waitLimit := InfDuration + if deadline, ok := ctx.Deadline(); ok { + waitLimit = deadline.Sub(now) + } + // Reserve + r := lim.reserveN(now, n, waitLimit) + if !r.ok { + return fmt.Errorf("rate: Wait(n=%d) tokens(t=%f) rate(r=%f) would exceed context deadline", n, lim.tokens, limit) + } + // Wait if necessary + delay := r.DelayFrom(now) + if delay == 0 { + return nil + } + t := time.NewTimer(delay) + defer t.Stop() + if delay > 500*time.Millisecond { + log.Warn("[tenant controllor] Need wait N", zap.Time("now", now), zap.Duration("delay", delay), zap.Int("n", n)) + } + select { + case <-t.C: + // We can proceed. + return nil + case <-ctx.Done(): + // Context was canceled before we could proceed. Cancel the + // reservation, which may permit other events to proceed sooner. + r.Cancel() + return ctx.Err() + } +} + +// SetLimit is shorthand for SetLimitAt(time.Now(), newLimit). +func (lim *Limiter) SetLimit(newLimit Limit) { + lim.setLimitAt(time.Now(), newLimit) +} + +// SetLimitAt sets a new Limit for the limiter. The new Limit, and Burst, may be violated +// or underutilized by those which reserved (using Reserve or Wait) but did not yet act +// before SetLimitAt was called. +func (lim *Limiter) setLimitAt(now time.Time, newLimit Limit) { + select { + case <-lim.lowTokensNotifyChan: + default: + } + lim.mu.Lock() + defer lim.mu.Unlock() + + now, _, tokens := lim.advance(now) + + lim.last = now + lim.tokens = tokens + lim.limit = newLimit + lim.maybeNotify(now) +} + +// SetupNotificationAt enables the notification at the given threshold. +func (lim *Limiter) SetupNotificationAt(now time.Time, threshold float64) { + lim.advance(now) + lim.notifyThreshold = threshold +} + +// notify tries to send a non-blocking notification on notifyCh and disables +// further notifications (until the next Reconfigure or StartNotification). +func (lim *Limiter) notify() { + lim.notifyThreshold = 0 + select { + case lim.lowTokensNotifyChan <- struct{}{}: + default: + } +} + +// maybeNotify checks if it's time to send the notification and if so, performs +// the notification. +func (lim *Limiter) maybeNotify(now time.Time) { + if lim.IsLowTokens() { + lim.notify() + } +} + +func (lim *Limiter) IsLowTokens() bool { + if lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold { + return true + } + return false +} + +// RemoveTokens decreases the amount of tokens currently available. +func (lim *Limiter) RemoveTokens(now time.Time, amount float64) { + lim.mu.Lock() + defer lim.mu.Unlock() + now, _, tokens := lim.advance(now) + lim.last = now + lim.tokens = tokens - amount + lim.maybeNotify(now) +} + +type tokenBucketReconfigureArgs struct { + NewTokens float64 + + NewRate float64 + + NotifyThreshold float64 +} + +func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) { + select { + case <-lim.lowTokensNotifyChan: + default: + } + lim.mu.Lock() + defer lim.mu.Unlock() + log.Debug("[tenant controllor] before reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) + now, _, tokens := lim.advance(now) + lim.last = now + lim.tokens = tokens + args.NewTokens + lim.limit = Limit(args.NewRate) + lim.notifyThreshold = args.NotifyThreshold + lim.maybeNotify(now) + log.Debug("[tenant controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) +} + +// SetTokens decreases the amount of tokens currently available. +func (lim *Limiter) SetTokens(now time.Time, amount float64) { + select { + case <-lim.lowTokensNotifyChan: + default: + } + lim.mu.Lock() + defer lim.mu.Unlock() + now, _, _ = lim.advance(now) + lim.last = now + lim.tokens = amount +} + +// AvailableTokens decreases the amount of tokens currently available. +func (lim *Limiter) AvailableTokens(now time.Time) float64 { + lim.mu.Lock() + defer lim.mu.Unlock() + _, _, tokens := lim.advance(now) + return tokens +} + +// reserveN is a helper method for ReserveN. +// maxFutureReserve specifies the maximum reservation wait duration allowed. +// reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. +func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duration) Reservation { + lim.mu.Lock() + defer lim.mu.Unlock() + + if lim.limit == Inf { + return Reservation{ + ok: true, + lim: lim, + tokens: n, + timeToAct: now, + } + } else if lim.limit == 0 { + // TODO(nolouch), remove burst, just use tokens + var ok bool + if lim.tokens >= float64(n) { + ok = true + lim.tokens -= float64(n) + } + return Reservation{ + ok: ok, + lim: lim, + tokens: int(lim.tokens), + timeToAct: now, + } + } + + now, last, tokens := lim.advance(now) + + // Calculate the remaining number of tokens resulting from the request. + //log.Info("advance token", zap.Float64("tokens", tokens), zap.Float64("new tokens", tokens-float64(n))) + tokens -= float64(n) + lim.maybeNotify(now) + // Calculate the wait duration + var waitDuration time.Duration + if tokens < 0 { + waitDuration = lim.limit.durationFromTokens(-tokens) + } + + // Decide result + ok := n <= maxRequestTokens && waitDuration <= maxFutureReserve + + // Prepare reservation + r := Reservation{ + ok: ok, + lim: lim, + limit: lim.limit, + } + if ok { + r.tokens = n + r.timeToAct = now.Add(waitDuration) + } + // Update state + if ok { + lim.last = now + lim.tokens = tokens + lim.lastEvent = r.timeToAct + } else { + lim.last = last + } + + return r +} + +// advance calculates and returns an updated state for lim resulting from the passage of time. +// lim is not changed. +// advance requires that lim.mu is held. +func (lim *Limiter) advance(now time.Time) (newNow time.Time, newLast time.Time, newTokens float64) { + last := lim.last + if now.Before(last) { + last = now + } + + // Calculate the new number of tokens, due to time that passed. + elapsed := now.Sub(last) + delta := lim.limit.tokensFromDuration(elapsed) + tokens := lim.tokens + delta + return now, last, tokens +} + +// durationFromTokens is a unit conversion function from the number of tokens to the duration +// of time it takes to accumulate them at a rate of limit tokens per second. +func (limit Limit) durationFromTokens(tokens float64) time.Duration { + if limit <= 0 { + return InfDuration + } + seconds := tokens / float64(limit) + return time.Duration(float64(time.Second) * seconds) +} + +// tokensFromDuration is a unit conversion function from a time duration to the number of tokens +// which could be accumulated during that duration at a rate of limit tokens per second. +func (limit Limit) tokensFromDuration(d time.Duration) float64 { + if limit <= 0 { + return 0 + } + return d.Seconds() * float64(limit) +} diff --git a/pkg/mcs/resource_manager/client/model.go b/pkg/mcs/resource_manager/client/model.go new file mode 100644 index 00000000000..79403406d31 --- /dev/null +++ b/pkg/mcs/resource_manager/client/model.go @@ -0,0 +1,231 @@ +// Copyright 2022 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + "context" + + rmpb "github.com/pingcap/kvproto/pkg/resource_manager" +) + +type RequestUnit float64 + +type RequestInfo interface { + IsWrite() bool + WriteBytes() uint64 +} + +type ResponseInfo interface { + ReadBytes() uint64 + KVCPUms() uint64 +} + +func GetRUValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RequestUnitType) float64 { + switch typ { + case 0: + return custom.RRU + case 1: + return custom.WRU + } + return 0 +} + +func GetResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.ResourceType) float64 { + switch typ { + case 0: + return custom.TotalCpuTimeMs + case 1: + return custom.ReadBytes + case 2: + return custom.WriteBytes + } + return 0 +} + +func Add(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { + custom1.RRU += custom1.RRU + custom1.WRU += custom1.WRU + custom1.ReadBytes += custom1.ReadBytes + custom1.WriteBytes += custom1.WriteBytes + custom1.TotalCpuTimeMs += custom1.TotalCpuTimeMs + custom1.SqlLayerCpuTimeMs += custom1.SqlLayerCpuTimeMs + custom1.KvReadRpcCount += custom1.KvReadRpcCount + custom1.KvWriteRpcCount += custom1.KvWriteRpcCount +} + +func Sub(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { + custom1.RRU -= custom1.RRU + custom1.WRU -= custom1.WRU + custom1.ReadBytes -= custom1.ReadBytes + custom1.WriteBytes -= custom1.WriteBytes + custom1.TotalCpuTimeMs -= custom1.TotalCpuTimeMs + custom1.SqlLayerCpuTimeMs -= custom1.SqlLayerCpuTimeMs + custom1.KvReadRpcCount -= custom1.KvReadRpcCount + custom1.KvWriteRpcCount -= custom1.KvWriteRpcCount +} + +// type ResourceCalculator interface { +// Trickle(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, *rmpb.Consumption, context.Context) +// BeforeKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, *rmpb.Consumption, RequestInfo) +// AfterKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, *rmpb.Consumption, RequestInfo, ResponseInfo) +// } + +// type KVCalculator struct { +// *Config +// } + +// func newKVCalculator(cfg *Config) *KVCalculator { +// return &KVCalculator{Config: cfg} +// } + +// func (dwc *KVCalculator) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, ctx context.Context) { +// } + +// func (dwc *KVCalculator) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, req RequestInfo) { +// if req.IsWrite() { +// writeBytes := float64(req.WriteBytes()) +// // for resource +// resource[rmpb.ResourceType_IOWriteFlow] += writeBytes +// // for RU +// wru := float64(dwc.WriteBytesCost) * writeBytes +// ru[rmpb.RequestUnitType_WRU] += wru +// // for consumption +// consumption.KvWriteRpcCount += 1 +// consumption.WRU += wru +// consumption.WriteBytes += writeBytes + +// } else { +// // none for resource +// // none for RU +// // for consumption +// consumption.KvReadRpcCount += 1 +// } +// } +// func (dwc *KVCalculator) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, req RequestInfo, res ResponseInfo) { +// readBytes := float64(res.ReadBytes()) +// kvCPUms := float64(res.KVCPUms()) +// // for resource +// resource[rmpb.ResourceType_IOReadFlow] += readBytes +// resource[rmpb.ResourceType_CPU] += kvCPUms +// // for RU +// ru_io := readBytes * float64(dwc.ReadBytesCost) +// ru_cpu := kvCPUms * float64(dwc.KVCPUMsCost) +// ru[rmpb.RequestUnitType_RRU] += ru_cpu + ru_io +// // for consumption +// consumption.RRU += ru_cpu + ru_io +// consumption.ReadBytes += readBytes +// consumption.TotalCpuTimeMs += kvCPUms +// } + +// type SQLLayerCPUCalculateor struct { +// *Config +// } + +// func newSQLLayerCPUCalculateor(cfg *Config) *SQLLayerCPUCalculateor { +// return &SQLLayerCPUCalculateor{Config: cfg} +// } + +// func (dsc *SQLLayerCPUCalculateor) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, ctx context.Context) { +// // TODO: SQL Layer RU/resource custom +// cpuFunc := func(ctx context.Context) float64 { +// return 0. +// } +// cpu := cpuFunc(ctx) +// // for resource +// resource[rmpb.ResourceType_CPU] += cpu +// // for RU +// ru_cpu := cpu * float64(dsc.SQLCPUSecondCost) +// // TODO: SQL Layer RU/resource custom type +// ru[rmpb.RequestUnitType_RRU] += ru_cpu / 2 +// ru[rmpb.RequestUnitType_WRU] += ru_cpu / 2 +// // for consumption +// // TODO: SQL Layer RU/resource custom type +// consumption.RRU += ru_cpu / 2 +// consumption.RRU += ru_cpu / 2 +// consumption.TotalCpuTimeMs += cpu +// consumption.SqlLayerCpuTimeMs += cpu +// } + +// func (dsc *SQLLayerCPUCalculateor) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, req RequestInfo) { +// } +// func (dsc *SQLLayerCPUCalculateor) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, req RequestInfo, res ResponseInfo) { +// } + +type ResourceCalculator interface { + Trickle(*rmpb.Consumption, context.Context) + BeforeKVRequest(*rmpb.Consumption, RequestInfo) + AfterKVRequest(*rmpb.Consumption, RequestInfo, ResponseInfo) +} + +type KVCalculator struct { + *Config +} + +func newKVCalculator(cfg *Config) *KVCalculator { + return &KVCalculator{Config: cfg} +} + +func (kc *KVCalculator) Trickle(consumption *rmpb.Consumption, ctx context.Context) { +} + +func (kc *KVCalculator) BeforeKVRequest(consumption *rmpb.Consumption, req RequestInfo) { + if req.IsWrite() { + writeBytes := float64(req.WriteBytes()) + wru := float64(kc.WriteBytesCost) * writeBytes + consumption.KvWriteRpcCount += 1 + consumption.WRU += wru + consumption.WriteBytes += writeBytes + + } else { + consumption.KvReadRpcCount += 1 + } +} +func (kc *KVCalculator) AfterKVRequest(consumption *rmpb.Consumption, req RequestInfo, res ResponseInfo) { + readBytes := float64(res.ReadBytes()) + kvCPUms := float64(res.KVCPUms()) + ru_io := readBytes * float64(kc.ReadBytesCost) + ru_cpu := kvCPUms * float64(kc.KVCPUMsCost) + // for consumption + consumption.RRU += ru_cpu + ru_io + consumption.ReadBytes += readBytes + consumption.TotalCpuTimeMs += kvCPUms +} + +type SQLLayerCPUCalculateor struct { + *Config +} + +func newSQLLayerCPUCalculateor(cfg *Config) *SQLLayerCPUCalculateor { + return &SQLLayerCPUCalculateor{Config: cfg} +} + +func (sc *SQLLayerCPUCalculateor) Trickle(consumption *rmpb.Consumption, ctx context.Context) { + // TODO: SQL Layer RU/resource custom + cpuFunc := func(ctx context.Context) float64 { + return 0. + } + cpu := cpuFunc(ctx) + ru_cpu := cpu * float64(sc.SQLCPUSecondCost) + // TODO: SQL Layer RU/resource custom type + consumption.RRU += ru_cpu / 2 + consumption.RRU += ru_cpu / 2 + consumption.TotalCpuTimeMs += cpu + consumption.SqlLayerCpuTimeMs += cpu +} + +func (sc *SQLLayerCPUCalculateor) BeforeKVRequest(consumption *rmpb.Consumption, req RequestInfo) { +} +func (sc *SQLLayerCPUCalculateor) AfterKVRequest(consumption *rmpb.Consumption, req RequestInfo, res ResponseInfo) { +} From 9141699a876920dffc86bed9f20fd6a5f18ce19c Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 12 Jan 2023 17:07:45 +0800 Subject: [PATCH 06/32] merge Signed-off-by: Cabinfever_B merge Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 65 ++-------- pkg/mcs/resource_manager/client/config.go | 9 +- pkg/mcs/resource_manager/client/model.go | 87 ------------- .../resource_manager/tenant_client/model.go | 118 ------------------ 4 files changed, 14 insertions(+), 265 deletions(-) delete mode 100644 pkg/mcs/resource_manager/tenant_client/model.go diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index b3692bb2139..3dfa583654f 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -44,8 +44,9 @@ type ResourceGroupProvider interface { func NewResourceGroupController( clientUniqueId uint64, provider ResourceGroupProvider, + requestUnitConfig *RequestUnitConfig, ) (*resourceGroupsController, error) { - return newResourceGroupController(clientUniqueId, provider) + return newResourceGroupController(clientUniqueId, provider, requestUnitConfig) } var _ ResourceGroupKVInterceptor = (*resourceGroupsController)(nil) @@ -86,12 +87,17 @@ type resourceGroupsController struct { } } -func newResourceGroupController(clientUniqueId uint64, provider ResourceGroupProvider) (*resourceGroupsController, error) { - config := DefaultConfig() +func newResourceGroupController(clientUniqueId uint64, provider ResourceGroupProvider, requestUnitConfig *RequestUnitConfig) (*resourceGroupsController, error) { + var config *Config + if requestUnitConfig != nil { + config = generateConfig(requestUnitConfig) + } else { + config = DefaultConfig() + } return &resourceGroupsController{ clientUniqueId: clientUniqueId, provider: provider, - config: DefaultConfig(), + config: config, lowTokenNotifyChan: make(chan struct{}, 1), tokenResponseChan: make(chan []*rmpb.TokenBucketResponse, 1), calculators: []ResourceCalculator{newKVCalculator(config), newSQLLayerCPUCalculateor(config)}, @@ -476,11 +482,7 @@ func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) case <-counter.setupNotificationCh: counter.setupNotificationTimer = nil counter.setupNotificationCh = nil -<<<<<<< HEAD:pkg/mcs/resource_manager/client/client.go counter.limiter.SetupNotificationAt(gc.run.now, float64(counter.setupNotificationThreshold)) -======= - counter.limiter.SetupNotification(gc.run.now, counter.setupNotificationThreshold) ->>>>>>> 49a78a80a7ba30505845094295fe5e3f2a802cf0:pkg/mcs/resource_manager/tenant_client/client.go gc.updateRunState(ctx) default: } @@ -491,11 +493,7 @@ func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) case <-counter.setupNotificationCh: counter.setupNotificationTimer = nil counter.setupNotificationCh = nil -<<<<<<< HEAD:pkg/mcs/resource_manager/client/client.go counter.limiter.SetupNotificationAt(gc.run.now, float64(counter.setupNotificationThreshold)) -======= - counter.limiter.SetupNotification(gc.run.now, counter.setupNotificationThreshold) ->>>>>>> 49a78a80a7ba30505845094295fe5e3f2a802cf0:pkg/mcs/resource_manager/tenant_client/client.go gc.updateRunState(ctx) default: } @@ -765,61 +763,18 @@ func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, switch gc.mode { case rmpb.GroupMode_RawMode: for typ, counter := range gc.run.resourceTokens { -<<<<<<< HEAD:pkg/mcs/resource_manager/client/client.go if v := GetResourceValueFromConsumption(delta, typ); v > 0 { counter.limiter.RemoveTokens(time.Now(), float64(v)) -======= - v, ok := deltaResource[typ] - if ok { - counter.limiter.RemoveTokens(time.Now(), v) ->>>>>>> 49a78a80a7ba30505845094295fe5e3f2a802cf0:pkg/mcs/resource_manager/tenant_client/client.go } } case rmpb.GroupMode_RUMode: for typ, counter := range gc.run.requestUnitTokens { -<<<<<<< HEAD:pkg/mcs/resource_manager/client/client.go if v := GetRUValueFromConsumption(delta, typ); v > 0 { counter.limiter.RemoveTokens(time.Now(), float64(v)) -======= - v, ok := deltaRequestUnit[typ] - if ok { - counter.limiter.RemoveTokens(time.Now(), v) ->>>>>>> 49a78a80a7ba30505845094295fe5e3f2a802cf0:pkg/mcs/resource_manager/tenant_client/client.go } } } -<<<<<<< HEAD:pkg/mcs/resource_manager/client/client.go gc.mu.Lock() Add(gc.mu.consumption, delta) gc.mu.Unlock() -======= -} - -func (c *resourceGroupsController) addDemoResourceGroup(ctx context.Context) error { - setting := &rmpb.GroupSettings{ - Mode: rmpb.GroupMode_RUMode, - RUSettings: &rmpb.GroupRequestUnitSettings{ - RRU: &rmpb.TokenBucket{ - Tokens: 200000, - Settings: &rmpb.TokenLimitSettings{ - Fillrate: 2000, - BurstLimit: 20000000, - }, - }, - WRU: &rmpb.TokenBucket{ - Tokens: 200000, - Settings: &rmpb.TokenLimitSettings{ - Fillrate: 20000, - BurstLimit: 2000000, - }, - }, - }, - } - context, err := c.provider.AddResourceGroup(ctx, "demo", setting) - if err != nil { - return err - } - log.Info("add resource group", zap.String("resp", context), zap.Any("setting", setting)) - return err ->>>>>>> 49a78a80a7ba30505845094295fe5e3f2a802cf0:pkg/mcs/resource_manager/tenant_client/client.go } diff --git a/pkg/mcs/resource_manager/client/config.go b/pkg/mcs/resource_manager/client/config.go index eef37d30ebb..f20fcece5c7 100644 --- a/pkg/mcs/resource_manager/client/config.go +++ b/pkg/mcs/resource_manager/client/config.go @@ -21,8 +21,6 @@ import ( ) var ( - ruLen = len(rmpb.RequestUnitType_name) - resourceLen = len(rmpb.ResourceType_name) requestUnitList map[rmpb.RequestUnitType]struct{} = map[rmpb.RequestUnitType]struct{}{ rmpb.RequestUnitType_RRU: {}, rmpb.RequestUnitType_WRU: {}, @@ -108,17 +106,18 @@ func DefaultConfig() *Config { cfg := generateConfig( DefaultRequestUnitConfig(), ) - cfg.groupLoopUpdateInterval = defaultGroupLoopUpdateInterval - cfg.targetPeriod = defaultTargetPeriod return cfg } func generateConfig(ruConfig *RequestUnitConfig) *Config { - return &Config{ + cfg := &Config{ ReadBaseCost: RequestUnit(ruConfig.ReadBaseCost), ReadBytesCost: RequestUnit(ruConfig.ReadCostPerByte), WriteBaseCost: RequestUnit(ruConfig.WriteBaseCost), WriteBytesCost: RequestUnit(ruConfig.WriteCostPerByte), WriteCPUMsCost: RequestUnit(ruConfig.WriteCPUMsCost), } + cfg.groupLoopUpdateInterval = defaultGroupLoopUpdateInterval + cfg.targetPeriod = defaultTargetPeriod + return cfg } diff --git a/pkg/mcs/resource_manager/client/model.go b/pkg/mcs/resource_manager/client/model.go index bb17d3f2785..e86de8701a1 100644 --- a/pkg/mcs/resource_manager/client/model.go +++ b/pkg/mcs/resource_manager/client/model.go @@ -76,93 +76,6 @@ func Sub(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { custom1.KvWriteRpcCount -= custom1.KvWriteRpcCount } -// type ResourceCalculator interface { -// Trickle(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, *rmpb.Consumption, context.Context) -// BeforeKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, *rmpb.Consumption, RequestInfo) -// AfterKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, *rmpb.Consumption, RequestInfo, ResponseInfo) -// } - -// type KVCalculator struct { -// *Config -// } - -// func newKVCalculator(cfg *Config) *KVCalculator { -// return &KVCalculator{Config: cfg} -// } - -// func (dwc *KVCalculator) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, ctx context.Context) { -// } - -// func (dwc *KVCalculator) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, req RequestInfo) { -// if req.IsWrite() { -// writeBytes := float64(req.WriteBytes()) -// // for resource -// resource[rmpb.ResourceType_IOWriteFlow] += writeBytes -// // for RU -// wru := float64(dwc.WriteBytesCost) * writeBytes -// ru[rmpb.RequestUnitType_WRU] += wru -// // for consumption -// consumption.KvWriteRpcCount += 1 -// consumption.WRU += wru -// consumption.WriteBytes += writeBytes - -// } else { -// // none for resource -// // none for RU -// // for consumption -// consumption.KvReadRpcCount += 1 -// } -// } -// func (dwc *KVCalculator) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, req RequestInfo, res ResponseInfo) { -// readBytes := float64(res.ReadBytes()) -// kvCPUms := float64(res.KVCPUms()) -// // for resource -// resource[rmpb.ResourceType_IOReadFlow] += readBytes -// resource[rmpb.ResourceType_CPU] += kvCPUms -// // for RU -// ru_io := readBytes * float64(dwc.ReadBytesCost) -// ru_cpu := kvCPUms * float64(dwc.KVCPUMsCost) -// ru[rmpb.RequestUnitType_RRU] += ru_cpu + ru_io -// // for consumption -// consumption.RRU += ru_cpu + ru_io -// consumption.ReadBytes += readBytes -// consumption.TotalCpuTimeMs += kvCPUms -// } - -// type SQLLayerCPUCalculateor struct { -// *Config -// } - -// func newSQLLayerCPUCalculateor(cfg *Config) *SQLLayerCPUCalculateor { -// return &SQLLayerCPUCalculateor{Config: cfg} -// } - -// func (dsc *SQLLayerCPUCalculateor) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, ctx context.Context) { -// // TODO: SQL Layer RU/resource custom -// cpuFunc := func(ctx context.Context) float64 { -// return 0. -// } -// cpu := cpuFunc(ctx) -// // for resource -// resource[rmpb.ResourceType_CPU] += cpu -// // for RU -// ru_cpu := cpu * float64(dsc.SQLCPUSecondCost) -// // TODO: SQL Layer RU/resource custom type -// ru[rmpb.RequestUnitType_RRU] += ru_cpu / 2 -// ru[rmpb.RequestUnitType_WRU] += ru_cpu / 2 -// // for consumption -// // TODO: SQL Layer RU/resource custom type -// consumption.RRU += ru_cpu / 2 -// consumption.RRU += ru_cpu / 2 -// consumption.TotalCpuTimeMs += cpu -// consumption.SqlLayerCpuTimeMs += cpu -// } - -// func (dsc *SQLLayerCPUCalculateor) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, req RequestInfo) { -// } -// func (dsc *SQLLayerCPUCalculateor) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, consumption *rmpb.Consumption, req RequestInfo, res ResponseInfo) { -// } - type ResourceCalculator interface { Trickle(*rmpb.Consumption, context.Context) BeforeKVRequest(*rmpb.Consumption, RequestInfo) diff --git a/pkg/mcs/resource_manager/tenant_client/model.go b/pkg/mcs/resource_manager/tenant_client/model.go deleted file mode 100644 index 94615f9ed46..00000000000 --- a/pkg/mcs/resource_manager/tenant_client/model.go +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2022 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS,g -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tenantclient - -import ( - "context" - - rmpb "github.com/pingcap/kvproto/pkg/resource_manager" -) - -// RequestUnit is the basic unit of the resource request management, which has two types: -// - RRU: read request unit -// - WRU: write request unit -type RequestUnit float64 - -// RequestInfo is the interface of the request information provider. A request should be -// able tell whether it's a write request and if so, the written bytes would also be provided. -type RequestInfo interface { - IsWrite() bool - WriteBytes() uint64 -} - -// ResponseInfo is the interface of the response information provider. A response should be -// able tell how many bytes it read and KV CPU cost in milliseconds. -type ResponseInfo interface { - ReadBytes() uint64 - KVCPUMs() uint64 -} - -func Sub(c float64, other float64) float64 { - if c < other { - return 0 - } else { - return c - other - } -} - -// ResourceCalculator is used to calculate the resource consumption of a request. -type ResourceCalculator interface { - Trickle(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, context.Context) - BeforeKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, RequestInfo) - AfterKVRequest(map[rmpb.ResourceType]float64, map[rmpb.RequestUnitType]float64, RequestInfo, ResponseInfo) -} - -// KVCalculator is used to calculate the KV request consumption. -type KVCalculator struct { - *Config -} - -func newKVCalculator(cfg *Config) *KVCalculator { - return &KVCalculator{Config: cfg} -} - -func (dwc *KVCalculator) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, ctx context.Context) { -} - -func (dwc *KVCalculator) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo) { - if req.IsWrite() { - resource[rmpb.ResourceType_KVWriteRPCCount] += 1 - - writeBytes := float64(req.WriteBytes()) - resource[rmpb.ResourceType_WriteBytes] += writeBytes - - ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteBaseCost) - ru[rmpb.RequestUnitType_WRU] += float64(dwc.WriteBytesCost) * writeBytes - } else { - resource[rmpb.ResourceType_KVReadRPCCount] += 1 - ru[rmpb.RequestUnitType_RRU] += float64(dwc.ReadBaseCost) - } -} - -func (dwc *KVCalculator) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { - readBytes := float64(res.ReadBytes()) - resource[rmpb.ResourceType_ReadBytes] += readBytes - ru[rmpb.RequestUnitType_RRU] += readBytes * float64(dwc.ReadBytesCost) - - kvCPUMs := float64(res.KVCPUMs()) - resource[rmpb.ResourceType_TotalCPUTimeMs] += kvCPUMs - if req.IsWrite() { - ru[rmpb.RequestUnitType_WRU] += kvCPUMs * float64(dwc.WriteCPUMsCost) - } -} - -type SQLLayerCPUCalculateor struct { - *Config -} - -func newSQLLayerCPUCalculateor(cfg *Config) *SQLLayerCPUCalculateor { - return &SQLLayerCPUCalculateor{Config: cfg} -} - -func (dsc *SQLLayerCPUCalculateor) Trickle(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, ctx context.Context) { - // TODO: SQL Layer RU/resource custom - cpuFunc := func(ctx context.Context) float64 { - return 0. - } - cpu := cpuFunc(ctx) - resource[rmpb.ResourceType_TotalCPUTimeMs] += cpu - resource[rmpb.ResourceType_SQLLayerCPUTimeMs] += cpu -} - -func (dsc *SQLLayerCPUCalculateor) BeforeKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo) { -} - -func (dsc *SQLLayerCPUCalculateor) AfterKVRequest(resource map[rmpb.ResourceType]float64, ru map[rmpb.RequestUnitType]float64, req RequestInfo, res ResponseInfo) { -} From 395a01cabb2490c3efa8f3c8c48c8bdb41eba09c Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 12 Jan 2023 23:04:47 +0800 Subject: [PATCH 07/32] refactor limiter Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 37 ++----- pkg/mcs/resource_manager/client/limiter.go | 111 ++++++++++++--------- 2 files changed, 73 insertions(+), 75 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 3dfa583654f..a177dc96488 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -706,46 +706,27 @@ func (gc *groupCostController) OnRequestWait( for _, calc := range gc.calculators { calc.BeforeKVRequest(delta, info) } - var wg sync.WaitGroup - var errReturn error + now := time.Now() switch gc.mode { case rmpb.GroupMode_RawMode: - wg.Add(len(requestResourceList)) + res := make([]*Reservation, 0, len(requestResourceList)) for typ, counter := range gc.run.resourceTokens { if v := GetResourceValueFromConsumption(delta, typ); v > 0 { - go func(value float64, counter *tokenCounter) { - err := counter.limiter.WaitN(ctx, int(v)) - if err != nil { - errReturn = err - } - wg.Done() - }(v, counter) - } else { - wg.Done() + res = append(res, counter.limiter.ReserveN(ctx, now, int(v))) } } - wg.Wait() - if errReturn != nil { - return errReturn + if err := waitReservations(ctx, res); err != nil { + return err } case rmpb.GroupMode_RUMode: - wg.Add(len(requestUnitList)) + res := make([]*Reservation, 0, len(requestUnitList)) for typ, counter := range gc.run.requestUnitTokens { if v := GetRUValueFromConsumption(delta, typ); v > 0 { - go func(value float64, counter *tokenCounter) { - err := counter.limiter.WaitN(ctx, int(v)) - if err != nil { - errReturn = err - } - wg.Done() - }(v, counter) - } else { - wg.Done() + res = append(res, counter.limiter.ReserveN(ctx, now, int(v))) } } - wg.Wait() - if errReturn != nil { - return errReturn + if err := waitReservations(ctx, res); err != nil { + return err } } gc.mu.Lock() diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index e98e9f02291..0457427d34f 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -202,8 +202,21 @@ func (r *Reservation) CancelAt(now time.Time) { // Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. // If you need to respect a deadline or cancel the delay, use Wait instead. // To drop or skip events exceeding rate limit, use Allow instead. -func (lim *Limiter) ReserveN(now time.Time, n int) *Reservation { - r := lim.reserveN(now, n, InfDuration) +func (lim *Limiter) ReserveN(ctx context.Context, now time.Time, n int) *Reservation { + // Check if ctx is already cancelled + select { + case <-ctx.Done(): + return &Reservation{ + ok: false, + } + default: + } + // Determine wait limit + waitLimit := InfDuration + if deadline, ok := ctx.Deadline(); ok { + waitLimit = deadline.Sub(now) + } + r := lim.reserveN(now, n, waitLimit) return &r } @@ -218,9 +231,6 @@ func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { limit := lim.limit lim.mu.Unlock() - if n > maxRequestTokens && limit != Inf { - return fmt.Errorf("rate: Wait(n=%d) exceeds limiter's max request token %f", n, maxRequestTokens) - } // Check if ctx is already cancelled select { case <-ctx.Done(): @@ -246,7 +256,7 @@ func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { t := time.NewTimer(delay) defer t.Stop() if delay > 500*time.Millisecond { - log.Warn("[tenant controllor] Need wait N", zap.Time("now", now), zap.Duration("delay", delay), zap.Int("n", n)) + log.Warn("[resource group controllor] Need wait N", zap.Time("now", now), zap.Duration("delay", delay), zap.Int("n", n)) } select { case <-t.C: @@ -260,30 +270,6 @@ func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { } } -// SetLimit is shorthand for SetLimitAt(time.Now(), newLimit). -func (lim *Limiter) SetLimit(newLimit Limit) { - lim.setLimitAt(time.Now(), newLimit) -} - -// SetLimitAt sets a new Limit for the limiter. The new Limit, and Burst, may be violated -// or underutilized by those which reserved (using Reserve or Wait) but did not yet act -// before SetLimitAt was called. -func (lim *Limiter) setLimitAt(now time.Time, newLimit Limit) { - select { - case <-lim.lowTokensNotifyChan: - default: - } - lim.mu.Lock() - defer lim.mu.Unlock() - - now, _, tokens := lim.advance(now) - - lim.last = now - lim.tokens = tokens - lim.limit = newLimit - lim.maybeNotify(now) -} - // SetupNotificationAt enables the notification at the given threshold. func (lim *Limiter) SetupNotificationAt(now time.Time, threshold float64) { lim.advance(now) @@ -340,27 +326,14 @@ func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) } lim.mu.Lock() defer lim.mu.Unlock() - log.Debug("[tenant controllor] before reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) + log.Debug("[resource group controllor] before reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) now, _, tokens := lim.advance(now) lim.last = now lim.tokens = tokens + args.NewTokens lim.limit = Limit(args.NewRate) lim.notifyThreshold = args.NotifyThreshold lim.maybeNotify(now) - log.Debug("[tenant controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) -} - -// SetTokens decreases the amount of tokens currently available. -func (lim *Limiter) SetTokens(now time.Time, amount float64) { - select { - case <-lim.lowTokensNotifyChan: - default: - } - lim.mu.Lock() - defer lim.mu.Unlock() - now, _, _ = lim.advance(now) - lim.last = now - lim.tokens = amount + log.Debug("[resource group controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) } // AvailableTokens decreases the amount of tokens currently available. @@ -386,7 +359,6 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio timeToAct: now, } } else if lim.limit == 0 { - // TODO(nolouch), remove burst, just use tokens var ok bool if lim.tokens >= float64(n) { ok = true @@ -398,8 +370,14 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio tokens: int(lim.tokens), timeToAct: now, } + } else if n > maxRequestTokens { + return Reservation{ + ok: false, + lim: lim, + tokens: int(lim.tokens), + timeToAct: now, + } } - now, last, tokens := lim.advance(now) // Calculate the remaining number of tokens resulting from the request. @@ -470,3 +448,42 @@ func (limit Limit) tokensFromDuration(d time.Duration) float64 { } return d.Seconds() * float64(limit) } + +func waitReservations(ctx context.Context, reservations []*Reservation) error { + if len(reservations) == 0 { + return nil + } + now := reservations[0].timeToAct + cancel := func() { + for _, res := range reservations { + res.CancelAt(now) + } + } + longestDelayDuration := time.Duration(0) + for _, res := range reservations { + if !res.ok { + cancel() + return fmt.Errorf("[resource group controller] limiter has no enough token") + } + delay := res.DelayFrom(now) + if delay > longestDelayDuration { + longestDelayDuration = delay + } + } + if longestDelayDuration > 500*time.Millisecond { + log.Warn("[resource group controllor] limiter needs wait ", zap.Time("now", now), zap.Duration("delay", longestDelayDuration)) + } + t := time.NewTimer(longestDelayDuration) + defer t.Stop() + + select { + case <-t.C: + // We can proceed. + return nil + case <-ctx.Done(): + // Context was canceled before we could proceed. Cancel the + // reservation, which may permit other events to proceed sooner. + cancel() + return ctx.Err() + } +} From c31b6f77835b096be238f1d8b45e08bc9d0f8830 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Mon, 16 Jan 2023 01:51:33 +0800 Subject: [PATCH 08/32] fix bug Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 19 +++-- pkg/mcs/resource_manager/client/config.go | 2 +- pkg/mcs/resource_manager/client/limiter.go | 74 +++---------------- pkg/mcs/resource_manager/client/model.go | 42 ++++++----- .../resource_manager/server/grpc_service.go | 2 +- pkg/mcs/resource_manager/server/manager.go | 25 +++++-- 6 files changed, 68 insertions(+), 96 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index a177dc96488..2649c20f313 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -183,6 +183,7 @@ func (c *resourceGroupsController) shouldReportConsumption() bool { ret = ret || gc.shouldReportConsumption() return !ret }) + return ret } return false } @@ -283,8 +284,10 @@ func (c *resourceGroupsController) mainLoop(ctx context.Context) { } case <-c.lowTokenNotifyChan: c.updateRunState(ctx) + c.updateAvgRequestResourcePerSec(ctx) if !c.run.requestInProgress { - c.collectTokenBucketRequests(ctx, "low_ru", true /* only select low tokens resource group */) + c.collectTokenBucketRequests(ctx, "low_ru", false /* only select low tokens resource group */) + //c.collectTokenBucketRequests(ctx, "low_ru", true /* only select low tokens resource group */) } default: c.handleTokenBucketTrickEvent(ctx) @@ -418,7 +421,7 @@ func (gc *groupCostController) initRunState(ctx context.Context) { for typ := range requestUnitList { counter := &tokenCounter{ limiter: NewLimiter(0, initialRequestUnits, gc.lowRUNotifyChan), - avgRUPerSec: initialRequestUnits / gc.run.targetPeriod.Seconds(), + avgRUPerSec: initialRequestUnits / gc.run.targetPeriod.Seconds() * 2, avgLastTime: now, } gc.run.requestUnitTokens[typ] = counter @@ -428,7 +431,7 @@ func (gc *groupCostController) initRunState(ctx context.Context) { for typ := range requestResourceList { counter := &tokenCounter{ limiter: NewLimiter(0, initialRequestUnits, gc.lowRUNotifyChan), - avgRUPerSec: initialRequestUnits / gc.run.targetPeriod.Seconds(), + avgRUPerSec: initialRequestUnits / gc.run.targetPeriod.Seconds() * 2, avgLastTime: now, } gc.run.resourceTokens[typ] = counter @@ -521,7 +524,7 @@ func (gc *groupCostController) updateAvgRUPerSec(ctx context.Context) { func (gc *groupCostController) calcAvg(counter *tokenCounter, new float64) bool { deltaDuration := gc.run.now.Sub(counter.avgLastTime) - if deltaDuration <= 10*time.Millisecond { + if deltaDuration <= 500*time.Millisecond { return false } delta := (new - counter.avgRUPerSecLastRU) / deltaDuration.Seconds() @@ -558,6 +561,9 @@ func (gc *groupCostController) handleTokenBucketResponse(ctx context.Context, re for _, counter := range gc.run.resourceTokens { counter.limiter.RemoveTokens(gc.run.now, initialRequestUnits) } + for _, counter := range gc.run.requestUnitTokens { + counter.limiter.RemoveTokens(gc.run.now, initialRequestUnits) + } } } @@ -624,7 +630,6 @@ func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket if timerDuration <= 0 { timerDuration = (trickleDuration + time.Second) / 2 } - log.Info("QQQ2 ", zap.Duration("timerDuration", timerDuration), zap.Float64("cfg.NewRate", cfg.NewRate)) counter.setupNotificationTimer = time.NewTimer(timerDuration) counter.setupNotificationCh = counter.setupNotificationTimer.C counter.setupNotificationThreshold = notifyThreshold @@ -715,7 +720,7 @@ func (gc *groupCostController) OnRequestWait( res = append(res, counter.limiter.ReserveN(ctx, now, int(v))) } } - if err := waitReservations(ctx, res); err != nil { + if err := waitReservations(now, ctx, res); err != nil { return err } case rmpb.GroupMode_RUMode: @@ -725,7 +730,7 @@ func (gc *groupCostController) OnRequestWait( res = append(res, counter.limiter.ReserveN(ctx, now, int(v))) } } - if err := waitReservations(ctx, res); err != nil { + if err := waitReservations(now, ctx, res); err != nil { return err } } diff --git a/pkg/mcs/resource_manager/client/config.go b/pkg/mcs/resource_manager/client/config.go index f20fcece5c7..0527288fa17 100644 --- a/pkg/mcs/resource_manager/client/config.go +++ b/pkg/mcs/resource_manager/client/config.go @@ -34,7 +34,7 @@ var ( const ( initialRequestUnits = 10000 - bufferRUs = 5000 + bufferRUs = 2000 // movingAvgFactor is the weight applied to a new "sample" of RU usage (with one // sample per mainLoopUpdateInterval). // diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index 0457427d34f..c27dcbc38d6 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -80,6 +80,7 @@ type Limiter struct { lastEvent time.Time notifyThreshold float64 lowTokensNotifyChan chan struct{} + isLowProcess bool } // Limit returns the maximum overall event rate. @@ -143,11 +144,6 @@ func (r *Reservation) DelayFrom(now time.Time) time.Duration { return delay } -// Cancel is shorthand for CancelAt(time.Now()). -func (r *Reservation) Cancel() { - r.CancelAt(time.Now()) -} - // CancelAt indicates that the reservation holder will not perform the reserved action // and reverses the effects of this Reservation on the rate limit as much as possible, // considering that other reservations may have already been made. @@ -220,56 +216,6 @@ func (lim *Limiter) ReserveN(ctx context.Context, now time.Time, n int) *Reserva return &r } -// WaitN blocks until lim permits n events to happen. -// It returns an error if n exceeds the Limiter's burst size, the Context is -// canceled, or the expected wait time exceeds the Context's Deadline. -// The burst limit is ignored if the rate limit is Inf. - -// Todo: support float64 n -func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { - lim.mu.Lock() - limit := lim.limit - lim.mu.Unlock() - - // Check if ctx is already cancelled - select { - case <-ctx.Done(): - return ctx.Err() - default: - } - // Determine wait limit - now := time.Now() - waitLimit := InfDuration - if deadline, ok := ctx.Deadline(); ok { - waitLimit = deadline.Sub(now) - } - // Reserve - r := lim.reserveN(now, n, waitLimit) - if !r.ok { - return fmt.Errorf("rate: Wait(n=%d) tokens(t=%f) rate(r=%f) would exceed context deadline", n, lim.tokens, limit) - } - // Wait if necessary - delay := r.DelayFrom(now) - if delay == 0 { - return nil - } - t := time.NewTimer(delay) - defer t.Stop() - if delay > 500*time.Millisecond { - log.Warn("[resource group controllor] Need wait N", zap.Time("now", now), zap.Duration("delay", delay), zap.Int("n", n)) - } - select { - case <-t.C: - // We can proceed. - return nil - case <-ctx.Done(): - // Context was canceled before we could proceed. Cancel the - // reservation, which may permit other events to proceed sooner. - r.Cancel() - return ctx.Err() - } -} - // SetupNotificationAt enables the notification at the given threshold. func (lim *Limiter) SetupNotificationAt(now time.Time, threshold float64) { lim.advance(now) @@ -279,7 +225,11 @@ func (lim *Limiter) SetupNotificationAt(now time.Time, threshold float64) { // notify tries to send a non-blocking notification on notifyCh and disables // further notifications (until the next Reconfigure or StartNotification). func (lim *Limiter) notify() { + if lim.isLowProcess { + return + } lim.notifyThreshold = 0 + lim.isLowProcess = true select { case lim.lowTokensNotifyChan <- struct{}{}: default: @@ -295,7 +245,7 @@ func (lim *Limiter) maybeNotify(now time.Time) { } func (lim *Limiter) IsLowTokens() bool { - if lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold { + if lim.isLowProcess || (lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold) { return true } return false @@ -320,10 +270,6 @@ type tokenBucketReconfigureArgs struct { } func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) { - select { - case <-lim.lowTokensNotifyChan: - default: - } lim.mu.Lock() defer lim.mu.Unlock() log.Debug("[resource group controllor] before reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) @@ -332,6 +278,7 @@ func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) lim.tokens = tokens + args.NewTokens lim.limit = Limit(args.NewRate) lim.notifyThreshold = args.NotifyThreshold + lim.isLowProcess = false lim.maybeNotify(now) log.Debug("[resource group controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) } @@ -364,6 +311,7 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio ok = true lim.tokens -= float64(n) } + lim.maybeNotify(now) return Reservation{ ok: ok, lim: lim, @@ -449,11 +397,10 @@ func (limit Limit) tokensFromDuration(d time.Duration) float64 { return d.Seconds() * float64(limit) } -func waitReservations(ctx context.Context, reservations []*Reservation) error { +func waitReservations(now time.Time, ctx context.Context, reservations []*Reservation) error { if len(reservations) == 0 { return nil } - now := reservations[0].timeToAct cancel := func() { for _, res := range reservations { res.CancelAt(now) @@ -470,6 +417,9 @@ func waitReservations(ctx context.Context, reservations []*Reservation) error { longestDelayDuration = delay } } + if longestDelayDuration <= 0 { + return nil + } if longestDelayDuration > 500*time.Millisecond { log.Warn("[resource group controllor] limiter needs wait ", zap.Time("now", now), zap.Duration("delay", longestDelayDuration)) } diff --git a/pkg/mcs/resource_manager/client/model.go b/pkg/mcs/resource_manager/client/model.go index e86de8701a1..28a6bb41732 100644 --- a/pkg/mcs/resource_manager/client/model.go +++ b/pkg/mcs/resource_manager/client/model.go @@ -55,25 +55,25 @@ func GetResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.Resource } func Add(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { - custom1.RRU += custom1.RRU - custom1.WRU += custom1.WRU - custom1.ReadBytes += custom1.ReadBytes - custom1.WriteBytes += custom1.WriteBytes - custom1.TotalCpuTimeMs += custom1.TotalCpuTimeMs - custom1.SqlLayerCpuTimeMs += custom1.SqlLayerCpuTimeMs - custom1.KvReadRpcCount += custom1.KvReadRpcCount - custom1.KvWriteRpcCount += custom1.KvWriteRpcCount + custom1.RRU += custom2.RRU + custom1.WRU += custom2.WRU + custom1.ReadBytes += custom2.ReadBytes + custom1.WriteBytes += custom2.WriteBytes + custom1.TotalCpuTimeMs += custom2.TotalCpuTimeMs + custom1.SqlLayerCpuTimeMs += custom2.SqlLayerCpuTimeMs + custom1.KvReadRpcCount += custom2.KvReadRpcCount + custom1.KvWriteRpcCount += custom2.KvWriteRpcCount } func Sub(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { - custom1.RRU -= custom1.RRU - custom1.WRU -= custom1.WRU - custom1.ReadBytes -= custom1.ReadBytes - custom1.WriteBytes -= custom1.WriteBytes - custom1.TotalCpuTimeMs -= custom1.TotalCpuTimeMs - custom1.SqlLayerCpuTimeMs -= custom1.SqlLayerCpuTimeMs - custom1.KvReadRpcCount -= custom1.KvReadRpcCount - custom1.KvWriteRpcCount -= custom1.KvWriteRpcCount + custom1.RRU -= custom2.RRU + custom1.WRU -= custom2.WRU + custom1.ReadBytes -= custom2.ReadBytes + custom1.WriteBytes -= custom2.WriteBytes + custom1.TotalCpuTimeMs -= custom2.TotalCpuTimeMs + custom1.SqlLayerCpuTimeMs -= custom2.SqlLayerCpuTimeMs + custom1.KvReadRpcCount -= custom2.KvReadRpcCount + custom1.KvWriteRpcCount -= custom2.KvWriteRpcCount } type ResourceCalculator interface { @@ -96,12 +96,13 @@ func (kc *KVCalculator) Trickle(consumption *rmpb.Consumption, ctx context.Conte func (kc *KVCalculator) BeforeKVRequest(consumption *rmpb.Consumption, req RequestInfo) { if req.IsWrite() { writeBytes := float64(req.WriteBytes()) - wru := float64(kc.WriteBytesCost) * writeBytes + wru := float64(kc.WriteBytesCost)*writeBytes + float64(kc.WriteBaseCost) consumption.KvWriteRpcCount += 1 consumption.WRU += wru consumption.WriteBytes += writeBytes - } else { + rru := float64(kc.ReadBaseCost) + consumption.RRU += rru consumption.KvReadRpcCount += 1 } } @@ -109,7 +110,10 @@ func (kc *KVCalculator) AfterKVRequest(consumption *rmpb.Consumption, req Reques readBytes := float64(res.ReadBytes()) kvCPUms := float64(res.KVCPUms()) ru_io := readBytes * float64(kc.ReadBytesCost) - ru_cpu := kvCPUms * float64(kc.WriteCPUMsCost) + var ru_cpu = 0. + if req.IsWrite() { + ru_cpu = kvCPUms * float64(kc.WriteCPUMsCost) + } // for consumption consumption.RRU += ru_cpu + ru_io consumption.ReadBytes += readBytes diff --git a/pkg/mcs/resource_manager/server/grpc_service.go b/pkg/mcs/resource_manager/server/grpc_service.go index a2c827de334..5730fb16f9d 100644 --- a/pkg/mcs/resource_manager/server/grpc_service.go +++ b/pkg/mcs/resource_manager/server/grpc_service.go @@ -78,7 +78,7 @@ func (s *Service) GetManager() *Manager { // GetResourceGroup implements ResourceManagerServer.GetResourceGroup. func (s *Service) GetResourceGroup(ctx context.Context, req *rmpb.GetResourceGroupRequest) (*rmpb.GetResourceGroupResponse, error) { - rg := s.manager.GetResourceGroup(req.ResourceGroupName) + rg := s.manager.GetResourceDuplicateGroup(req.ResourceGroupName) if rg == nil { return nil, errors.New("resource group not found") } diff --git a/pkg/mcs/resource_manager/server/manager.go b/pkg/mcs/resource_manager/server/manager.go index 831e6cb734f..91353a05c12 100644 --- a/pkg/mcs/resource_manager/server/manager.go +++ b/pkg/mcs/resource_manager/server/manager.go @@ -15,14 +15,16 @@ package server import ( - "encoding/json" "sort" "sync" + "github.com/gogo/protobuf/proto" "github.com/pingcap/errors" rmpb "github.com/pingcap/kvproto/pkg/resource_manager" + "github.com/pingcap/log" "github.com/tikv/pd/server" "github.com/tikv/pd/server/storage" + "go.uber.org/zap" ) // Manager is the manager of resource group. @@ -45,11 +47,12 @@ func NewManager(srv *server.Server) *Manager { // Init initializes the resource group manager. func (m *Manager) Init() { handler := func(k, v string) { - var group ResourceGroup - if err := json.Unmarshal([]byte(v), &group); err != nil { + group := &rmpb.ResourceGroup{} + if err := proto.Unmarshal([]byte(v), group); err != nil { + log.Error("err", zap.Error(err), zap.String("k", k), zap.String("v", v)) panic(err) } - m.groups[group.Name] = &group + m.groups[group.Name] = FromProtoResourceGroup(group) } m.storage().LoadResourceGroups(handler) } @@ -109,8 +112,8 @@ func (m *Manager) DeleteResourceGroup(name string) error { return nil } -// GetResourceGroup returns a copy of a resource group. -func (m *Manager) GetResourceGroup(name string) *ResourceGroup { +// GetResourceDuplicateGroup returns a copy of a resource group. +func (m *Manager) GetResourceDuplicateGroup(name string) *ResourceGroup { m.RLock() defer m.RUnlock() if group, ok := m.groups[name]; ok { @@ -119,6 +122,16 @@ func (m *Manager) GetResourceGroup(name string) *ResourceGroup { return nil } +// GetResourceGroup returns a resource group. +func (m *Manager) GetResourceGroup(name string) *ResourceGroup { + m.RLock() + defer m.RUnlock() + if group, ok := m.groups[name]; ok { + return group + } + return nil +} + // GetResourceGroupList returns copies of resource group list. func (m *Manager) GetResourceGroupList() []*ResourceGroup { m.RLock() From f1bc87bf8b709d85ae8a02e8fac1cf524ff75fe5 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Mon, 16 Jan 2023 02:24:36 +0800 Subject: [PATCH 09/32] add limtier Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/limiter.go | 441 +++++++++++++++++++++ 1 file changed, 441 insertions(+) create mode 100644 pkg/mcs/resource_manager/client/limiter.go diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go new file mode 100644 index 00000000000..8a05d599b29 --- /dev/null +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -0,0 +1,441 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + "context" + "fmt" + "math" + "sync" + "time" + + "github.com/pingcap/log" + "go.uber.org/zap" +) + +// Limit defines the maximum frequency of some events. +// Limit is represented as number of events per second. +// A zero Limit allows no events. +type Limit float64 + +// Inf is the infinite rate limit; it allows all events (even if burst is zero). +const Inf = Limit(math.MaxFloat64) + +// Every converts a minimum time interval between events to a Limit. +func Every(interval time.Duration) Limit { + if interval <= 0 { + return Inf + } + return 1 / Limit(interval.Seconds()) +} + +const maxRequestTokens = 1e8 + +// A Limiter controls how frequently events are allowed to happen. +// It implements a "token bucket" of size b, initially full and refilled +// at rate r tokens per second. +// Informally, in any large enough time interval, the Limiter limits the +// rate to r tokens per second, with a maximum burst size of b events. +// As a special case, if r == Inf (the infinite rate), b is ignored. +// See https://en.wikipedia.org/wiki/Token_bucket for more about token buckets. +// +// The zero value is a valid Limiter, but it will reject all events. +// Use NewLimiter to create non-zero Limiters. +// +// Limiter has three main methods, Allow, Reserve, and Wait. +// Most callers should use Wait. +// +// Each of the three methods consumes a single token. +// They differ in their behavior when no token is available. +// If no token is available, Allow returns false. +// If no token is available, Reserve returns a reservation for a future token +// and the amount of time the caller must wait before using it. +// If no token is available, Wait blocks until one can be obtained +// or its associated context.Context is canceled. +// +// The methods AllowN, ReserveN, and WaitN consume n tokens. +type Limiter struct { + mu sync.Mutex + limit Limit + tokens float64 + // last is the last time the limiter's tokens field was updated + last time.Time + // lastEvent is the latest time of a rate-limited event (past or future) + lastEvent time.Time + notifyThreshold float64 + lowTokensNotifyChan chan struct{} + isLowProcess bool +} + +// Limit returns the maximum overall event rate. +func (lim *Limiter) Limit() Limit { + lim.mu.Lock() + defer lim.mu.Unlock() + return lim.limit +} + +// NewLimiter returns a new Limiter that allows events up to rate r and permits +// bursts of at most b tokens. +func NewLimiter(r Limit, tokens float64, lowTokensNotifyChan chan struct{}) *Limiter { + lim := &Limiter{ + limit: r, + last: time.Now(), + tokens: tokens, + lowTokensNotifyChan: lowTokensNotifyChan, + } + log.Info("new limiter", zap.String("limiter", fmt.Sprintf("%+v", lim))) + return lim +} + +// A Reservation holds information about events that are permitted by a Limiter to happen after a delay. +// A Reservation may be canceled, which may enable the Limiter to permit additional events. +type Reservation struct { + ok bool + lim *Limiter + tokens int + timeToAct time.Time + // This is the Limit at reservation time, it can change later. + limit Limit +} + +// OK returns whether the limiter can provide the requested number of tokens +// within the maximum wait time. If OK is false, Delay returns InfDuration, and +// Cancel does nothing. +func (r *Reservation) OK() bool { + return r.ok +} + +// Delay is shorthand for DelayFrom(time.Now()). +func (r *Reservation) Delay() time.Duration { + return r.DelayFrom(time.Now()) +} + +// InfDuration is the duration returned by Delay when a Reservation is not OK. +const InfDuration = time.Duration(1<<63 - 1) + +// DelayFrom returns the duration for which the reservation holder must wait +// before taking the reserved action. Zero duration means act immediately. +// InfDuration means the limiter cannot grant the tokens requested in this +// Reservation within the maximum wait time. +func (r *Reservation) DelayFrom(now time.Time) time.Duration { + if !r.ok { + return InfDuration + } + delay := r.timeToAct.Sub(now) + if delay < 0 { + return 0 + } + return delay +} + +// CancelAt indicates that the reservation holder will not perform the reserved action +// and reverses the effects of this Reservation on the rate limit as much as possible, +// considering that other reservations may have already been made. +func (r *Reservation) CancelAt(now time.Time) { + if !r.ok { + return + } + + r.lim.mu.Lock() + defer r.lim.mu.Unlock() + + if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(now) { + return + } + + // calculate tokens to restore + // The duration between lim.lastEvent and r.timeToAct tells us how many tokens were reserved + // after r was obtained. These tokens should not be restored. + restoreTokens := float64(r.tokens) - r.limit.tokensFromDuration(r.lim.lastEvent.Sub(r.timeToAct)) + if restoreTokens <= 0 { + return + } + // advance time to now + now, _, tokens := r.lim.advance(now) + // calculate new number of tokens + tokens += restoreTokens + + // update state + r.lim.last = now + r.lim.tokens = tokens + if r.timeToAct == r.lim.lastEvent { + prevEvent := r.timeToAct.Add(r.limit.durationFromTokens(float64(-r.tokens))) + if !prevEvent.Before(now) { + r.lim.lastEvent = prevEvent + } + } +} + +// ReserveN returns a Reservation that indicates how long the caller must wait before n events happen. +// The Limiter takes this Reservation into account when allowing future events. +// The returned Reservation’s OK() method returns false if n exceeds the Limiter's burst size. +// Usage example: +// +// r := lim.ReserveN(time.Now(), 1) +// if !r.OK() { +// // Not allowed to act! Did you remember to set lim.burst to be > 0 ? +// return +// } +// time.Sleep(r.Delay()) +// Act() +// +// Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. +// If you need to respect a deadline or cancel the delay, use Wait instead. +// To drop or skip events exceeding rate limit, use Allow instead. +func (lim *Limiter) ReserveN(ctx context.Context, now time.Time, n int) *Reservation { + // Check if ctx is already cancelled + select { + case <-ctx.Done(): + return &Reservation{ + ok: false, + } + default: + } + // Determine wait limit + waitLimit := InfDuration + if deadline, ok := ctx.Deadline(); ok { + waitLimit = deadline.Sub(now) + } + r := lim.reserveN(now, n, waitLimit) + return &r +} + +// SetupNotificationAt enables the notification at the given threshold. +func (lim *Limiter) SetupNotificationAt(now time.Time, threshold float64) { + lim.advance(now) + lim.notifyThreshold = threshold +} + +// notify tries to send a non-blocking notification on notifyCh and disables +// further notifications (until the next Reconfigure or StartNotification). +func (lim *Limiter) notify() { + if lim.isLowProcess { + return + } + lim.notifyThreshold = 0 + lim.isLowProcess = true + select { + case lim.lowTokensNotifyChan <- struct{}{}: + default: + } +} + +// maybeNotify checks if it's time to send the notification and if so, performs +// the notification. +func (lim *Limiter) maybeNotify(now time.Time) { + if lim.IsLowTokens() { + lim.notify() + } +} + +func (lim *Limiter) IsLowTokens() bool { + if lim.isLowProcess || (lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold) { + return true + } + return false +} + +// RemoveTokens decreases the amount of tokens currently available. +func (lim *Limiter) RemoveTokens(now time.Time, amount float64) { + lim.mu.Lock() + defer lim.mu.Unlock() + now, _, tokens := lim.advance(now) + lim.last = now + lim.tokens = tokens - amount + lim.maybeNotify(now) +} + +type tokenBucketReconfigureArgs struct { + NewTokens float64 + + NewRate float64 + + NotifyThreshold float64 +} + +func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) { + lim.mu.Lock() + defer lim.mu.Unlock() + log.Debug("[resource group controllor] before reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) + now, _, tokens := lim.advance(now) + lim.last = now + lim.tokens = tokens + args.NewTokens + lim.limit = Limit(args.NewRate) + lim.notifyThreshold = args.NotifyThreshold + lim.isLowProcess = false + lim.maybeNotify(now) + log.Debug("[resource group controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) +} + +// AvailableTokens decreases the amount of tokens currently available. +func (lim *Limiter) AvailableTokens(now time.Time) float64 { + lim.mu.Lock() + defer lim.mu.Unlock() + _, _, tokens := lim.advance(now) + return tokens +} + +// reserveN is a helper method for ReserveN. +// maxFutureReserve specifies the maximum reservation wait duration allowed. +// reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. +func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duration) Reservation { + lim.mu.Lock() + defer lim.mu.Unlock() + + if lim.limit == Inf { + return Reservation{ + ok: true, + lim: lim, + tokens: n, + timeToAct: now, + } + } else if lim.limit == 0 { + var ok bool + if lim.tokens >= float64(n) { + ok = true + lim.tokens -= float64(n) + } + lim.maybeNotify(now) + return Reservation{ + ok: ok, + lim: lim, + tokens: int(lim.tokens), + timeToAct: now, + } + } else if n > maxRequestTokens { + return Reservation{ + ok: false, + lim: lim, + tokens: int(lim.tokens), + timeToAct: now, + } + } + now, last, tokens := lim.advance(now) + + // Calculate the remaining number of tokens resulting from the request. + tokens -= float64(n) + lim.maybeNotify(now) + // Calculate the wait duration + var waitDuration time.Duration + if tokens < 0 { + waitDuration = lim.limit.durationFromTokens(-tokens) + } + + // Decide result + ok := n <= maxRequestTokens && waitDuration <= maxFutureReserve + + // Prepare reservation + r := Reservation{ + ok: ok, + lim: lim, + limit: lim.limit, + } + if ok { + r.tokens = n + r.timeToAct = now.Add(waitDuration) + } + // Update state + if ok { + lim.last = now + lim.tokens = tokens + lim.lastEvent = r.timeToAct + } else { + lim.last = last + } + + return r +} + +// advance calculates and returns an updated state for lim resulting from the passage of time. +// lim is not changed. +// advance requires that lim.mu is held. +func (lim *Limiter) advance(now time.Time) (newNow time.Time, newLast time.Time, newTokens float64) { + last := lim.last + if now.Before(last) { + last = now + } + + // Calculate the new number of tokens, due to time that passed. + elapsed := now.Sub(last) + delta := lim.limit.tokensFromDuration(elapsed) + tokens := lim.tokens + delta + return now, last, tokens +} + +// durationFromTokens is a unit conversion function from the number of tokens to the duration +// of time it takes to accumulate them at a rate of limit tokens per second. +func (limit Limit) durationFromTokens(tokens float64) time.Duration { + if limit <= 0 { + return InfDuration + } + seconds := tokens / float64(limit) + return time.Duration(float64(time.Second) * seconds) +} + +// tokensFromDuration is a unit conversion function from a time duration to the number of tokens +// which could be accumulated during that duration at a rate of limit tokens per second. +func (limit Limit) tokensFromDuration(d time.Duration) float64 { + if limit <= 0 { + return 0 + } + return d.Seconds() * float64(limit) +} + +// WaitReservations is used to process a series of reservations +// so that all limiter tokens are returned if one reservation fails +func WaitReservations(now time.Time, ctx context.Context, reservations []*Reservation) error { + if len(reservations) == 0 { + return nil + } + cancel := func() { + for _, res := range reservations { + res.CancelAt(now) + } + } + longestDelayDuration := time.Duration(0) + for _, res := range reservations { + if !res.ok { + cancel() + return fmt.Errorf("[resource group controller] limiter has no enough token") + } + delay := res.DelayFrom(now) + if delay > longestDelayDuration { + longestDelayDuration = delay + } + } + if longestDelayDuration <= 0 { + return nil + } + if longestDelayDuration > 500*time.Millisecond { + log.Warn("[resource group controllor] limiter needs wait ", zap.Time("now", now), zap.Duration("delay", longestDelayDuration)) + } + t := time.NewTimer(longestDelayDuration) + defer t.Stop() + + select { + case <-t.C: + // We can proceed. + return nil + case <-ctx.Done(): + // Context was canceled before we could proceed. Cancel the + // reservation, which may permit other events to proceed sooner. + cancel() + return ctx.Err() + } +} From d74e60046da39ce0b42392bbb7234ad74b3a72f4 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Mon, 16 Jan 2023 02:25:15 +0800 Subject: [PATCH 10/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/limiter.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index 8a05d599b29..b79b4d401c6 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -238,7 +238,7 @@ func (lim *Limiter) notify() { // maybeNotify checks if it's time to send the notification and if so, performs // the notification. -func (lim *Limiter) maybeNotify(now time.Time) { +func (lim *Limiter) maybeNotify() { if lim.IsLowTokens() { lim.notify() } @@ -258,7 +258,7 @@ func (lim *Limiter) RemoveTokens(now time.Time, amount float64) { now, _, tokens := lim.advance(now) lim.last = now lim.tokens = tokens - amount - lim.maybeNotify(now) + lim.maybeNotify() } type tokenBucketReconfigureArgs struct { @@ -279,7 +279,7 @@ func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) lim.limit = Limit(args.NewRate) lim.notifyThreshold = args.NotifyThreshold lim.isLowProcess = false - lim.maybeNotify(now) + lim.maybeNotify() log.Debug("[resource group controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) } @@ -311,7 +311,7 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio ok = true lim.tokens -= float64(n) } - lim.maybeNotify(now) + lim.maybeNotify() return Reservation{ ok: ok, lim: lim, @@ -330,7 +330,7 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio // Calculate the remaining number of tokens resulting from the request. tokens -= float64(n) - lim.maybeNotify(now) + lim.maybeNotify() // Calculate the wait duration var waitDuration time.Duration if tokens < 0 { From d68fd7f5e1e78511eb4715531ab0b1412de12e55 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Mon, 16 Jan 2023 17:31:19 +0800 Subject: [PATCH 11/32] remove useless code Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/limiter.go | 72 +++++----------------- 1 file changed, 16 insertions(+), 56 deletions(-) diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index b79b4d401c6..d629185e702 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -58,26 +58,16 @@ const maxRequestTokens = 1e8 // The zero value is a valid Limiter, but it will reject all events. // Use NewLimiter to create non-zero Limiters. // -// Limiter has three main methods, Allow, Reserve, and Wait. -// Most callers should use Wait. -// -// Each of the three methods consumes a single token. -// They differ in their behavior when no token is available. -// If no token is available, Allow returns false. +// Limiter has one main methods Reserve. // If no token is available, Reserve returns a reservation for a future token -// and the amount of time the caller must wait before using it. -// If no token is available, Wait blocks until one can be obtained +// and the amount of time the caller must wait before using it, // or its associated context.Context is canceled. -// -// The methods AllowN, ReserveN, and WaitN consume n tokens. type Limiter struct { mu sync.Mutex limit Limit tokens float64 // last is the last time the limiter's tokens field was updated - last time.Time - // lastEvent is the latest time of a rate-limited event (past or future) - lastEvent time.Time + last time.Time notifyThreshold float64 lowTokensNotifyChan chan struct{} isLowProcess bool @@ -108,7 +98,7 @@ func NewLimiter(r Limit, tokens float64, lowTokensNotifyChan chan struct{}) *Lim type Reservation struct { ok bool lim *Limiter - tokens int + tokens float64 timeToAct time.Time // This is the Limit at reservation time, it can change later. limit Limit @@ -145,8 +135,7 @@ func (r *Reservation) DelayFrom(now time.Time) time.Duration { } // CancelAt indicates that the reservation holder will not perform the reserved action -// and reverses the effects of this Reservation on the rate limit as much as possible, -// considering that other reservations may have already been made. +// and reverses tokens which be refilled into limiter. func (r *Reservation) CancelAt(now time.Time) { if !r.ok { return @@ -158,36 +147,22 @@ func (r *Reservation) CancelAt(now time.Time) { if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(now) { return } - - // calculate tokens to restore - // The duration between lim.lastEvent and r.timeToAct tells us how many tokens were reserved - // after r was obtained. These tokens should not be restored. - restoreTokens := float64(r.tokens) - r.limit.tokensFromDuration(r.lim.lastEvent.Sub(r.timeToAct)) - if restoreTokens <= 0 { - return - } // advance time to now now, _, tokens := r.lim.advance(now) // calculate new number of tokens - tokens += restoreTokens + tokens += float64(r.tokens) // update state r.lim.last = now r.lim.tokens = tokens - if r.timeToAct == r.lim.lastEvent { - prevEvent := r.timeToAct.Add(r.limit.durationFromTokens(float64(-r.tokens))) - if !prevEvent.Before(now) { - r.lim.lastEvent = prevEvent - } - } } -// ReserveN returns a Reservation that indicates how long the caller must wait before n events happen. +// Reserve returns a Reservation that indicates how long the caller must wait before n events happen. // The Limiter takes this Reservation into account when allowing future events. -// The returned Reservation’s OK() method returns false if n exceeds the Limiter's burst size. +// The returned Reservation’s OK() method returns false if waitting duration exceeds deadline. // Usage example: // -// r := lim.ReserveN(time.Now(), 1) +// r := lim.Reserve(time.Now(), 1) // if !r.OK() { // // Not allowed to act! Did you remember to set lim.burst to be > 0 ? // return @@ -196,9 +171,7 @@ func (r *Reservation) CancelAt(now time.Time) { // Act() // // Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. -// If you need to respect a deadline or cancel the delay, use Wait instead. -// To drop or skip events exceeding rate limit, use Allow instead. -func (lim *Limiter) ReserveN(ctx context.Context, now time.Time, n int) *Reservation { +func (lim *Limiter) Reserve(ctx context.Context, now time.Time, n float64) *Reservation { // Check if ctx is already cancelled select { case <-ctx.Done(): @@ -244,6 +217,7 @@ func (lim *Limiter) maybeNotify() { } } +// IsLowTokens returns whether the limiter is in low tokens func (lim *Limiter) IsLowTokens() bool { if lim.isLowProcess || (lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold) { return true @@ -269,6 +243,7 @@ type tokenBucketReconfigureArgs struct { NotifyThreshold float64 } +// Reconfigure modifies all setting for limiter func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) { lim.mu.Lock() defer lim.mu.Unlock() @@ -291,10 +266,10 @@ func (lim *Limiter) AvailableTokens(now time.Time) float64 { return tokens } -// reserveN is a helper method for ReserveN. +// reserveN is a helper method for Reserve. // maxFutureReserve specifies the maximum reservation wait duration allowed. // reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. -func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duration) Reservation { +func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Duration) Reservation { lim.mu.Lock() defer lim.mu.Unlock() @@ -305,24 +280,11 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio tokens: n, timeToAct: now, } - } else if lim.limit == 0 { - var ok bool - if lim.tokens >= float64(n) { - ok = true - lim.tokens -= float64(n) - } - lim.maybeNotify() - return Reservation{ - ok: ok, - lim: lim, - tokens: int(lim.tokens), - timeToAct: now, - } } else if n > maxRequestTokens { return Reservation{ ok: false, lim: lim, - tokens: int(lim.tokens), + tokens: lim.tokens, timeToAct: now, } } @@ -354,11 +316,9 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio if ok { lim.last = now lim.tokens = tokens - lim.lastEvent = r.timeToAct } else { lim.last = last } - return r } @@ -399,7 +359,7 @@ func (limit Limit) tokensFromDuration(d time.Duration) float64 { // WaitReservations is used to process a series of reservations // so that all limiter tokens are returned if one reservation fails -func WaitReservations(now time.Time, ctx context.Context, reservations []*Reservation) error { +func WaitReservations(ctx context.Context, now time.Time, reservations []*Reservation) error { if len(reservations) == 0 { return nil } From 4b26a4b5640724edf896cd3d954c10826b6a8b56 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Mon, 16 Jan 2023 17:38:20 +0800 Subject: [PATCH 12/32] merge limiter Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 8 +-- pkg/mcs/resource_manager/client/limiter.go | 82 ++++++---------------- 2 files changed, 26 insertions(+), 64 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 2649c20f313..77804ae46db 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -717,20 +717,20 @@ func (gc *groupCostController) OnRequestWait( res := make([]*Reservation, 0, len(requestResourceList)) for typ, counter := range gc.run.resourceTokens { if v := GetResourceValueFromConsumption(delta, typ); v > 0 { - res = append(res, counter.limiter.ReserveN(ctx, now, int(v))) + res = append(res, counter.limiter.Reserve(ctx, now, v)) } } - if err := waitReservations(now, ctx, res); err != nil { + if err := WaitReservations(ctx, now, res); err != nil { return err } case rmpb.GroupMode_RUMode: res := make([]*Reservation, 0, len(requestUnitList)) for typ, counter := range gc.run.requestUnitTokens { if v := GetRUValueFromConsumption(delta, typ); v > 0 { - res = append(res, counter.limiter.ReserveN(ctx, now, int(v))) + res = append(res, counter.limiter.Reserve(ctx, now, v)) } } - if err := waitReservations(now, ctx, res); err != nil { + if err := WaitReservations(ctx, now, res); err != nil { return err } } diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index c27dcbc38d6..d629185e702 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -58,26 +58,16 @@ const maxRequestTokens = 1e8 // The zero value is a valid Limiter, but it will reject all events. // Use NewLimiter to create non-zero Limiters. // -// Limiter has three main methods, Allow, Reserve, and Wait. -// Most callers should use Wait. -// -// Each of the three methods consumes a single token. -// They differ in their behavior when no token is available. -// If no token is available, Allow returns false. +// Limiter has one main methods Reserve. // If no token is available, Reserve returns a reservation for a future token -// and the amount of time the caller must wait before using it. -// If no token is available, Wait blocks until one can be obtained +// and the amount of time the caller must wait before using it, // or its associated context.Context is canceled. -// -// The methods AllowN, ReserveN, and WaitN consume n tokens. type Limiter struct { mu sync.Mutex limit Limit tokens float64 // last is the last time the limiter's tokens field was updated - last time.Time - // lastEvent is the latest time of a rate-limited event (past or future) - lastEvent time.Time + last time.Time notifyThreshold float64 lowTokensNotifyChan chan struct{} isLowProcess bool @@ -108,7 +98,7 @@ func NewLimiter(r Limit, tokens float64, lowTokensNotifyChan chan struct{}) *Lim type Reservation struct { ok bool lim *Limiter - tokens int + tokens float64 timeToAct time.Time // This is the Limit at reservation time, it can change later. limit Limit @@ -145,8 +135,7 @@ func (r *Reservation) DelayFrom(now time.Time) time.Duration { } // CancelAt indicates that the reservation holder will not perform the reserved action -// and reverses the effects of this Reservation on the rate limit as much as possible, -// considering that other reservations may have already been made. +// and reverses tokens which be refilled into limiter. func (r *Reservation) CancelAt(now time.Time) { if !r.ok { return @@ -158,36 +147,22 @@ func (r *Reservation) CancelAt(now time.Time) { if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(now) { return } - - // calculate tokens to restore - // The duration between lim.lastEvent and r.timeToAct tells us how many tokens were reserved - // after r was obtained. These tokens should not be restored. - restoreTokens := float64(r.tokens) - r.limit.tokensFromDuration(r.lim.lastEvent.Sub(r.timeToAct)) - if restoreTokens <= 0 { - return - } // advance time to now now, _, tokens := r.lim.advance(now) // calculate new number of tokens - tokens += restoreTokens + tokens += float64(r.tokens) // update state r.lim.last = now r.lim.tokens = tokens - if r.timeToAct == r.lim.lastEvent { - prevEvent := r.timeToAct.Add(r.limit.durationFromTokens(float64(-r.tokens))) - if !prevEvent.Before(now) { - r.lim.lastEvent = prevEvent - } - } } -// ReserveN returns a Reservation that indicates how long the caller must wait before n events happen. +// Reserve returns a Reservation that indicates how long the caller must wait before n events happen. // The Limiter takes this Reservation into account when allowing future events. -// The returned Reservation’s OK() method returns false if n exceeds the Limiter's burst size. +// The returned Reservation’s OK() method returns false if waitting duration exceeds deadline. // Usage example: // -// r := lim.ReserveN(time.Now(), 1) +// r := lim.Reserve(time.Now(), 1) // if !r.OK() { // // Not allowed to act! Did you remember to set lim.burst to be > 0 ? // return @@ -196,9 +171,7 @@ func (r *Reservation) CancelAt(now time.Time) { // Act() // // Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. -// If you need to respect a deadline or cancel the delay, use Wait instead. -// To drop or skip events exceeding rate limit, use Allow instead. -func (lim *Limiter) ReserveN(ctx context.Context, now time.Time, n int) *Reservation { +func (lim *Limiter) Reserve(ctx context.Context, now time.Time, n float64) *Reservation { // Check if ctx is already cancelled select { case <-ctx.Done(): @@ -238,12 +211,13 @@ func (lim *Limiter) notify() { // maybeNotify checks if it's time to send the notification and if so, performs // the notification. -func (lim *Limiter) maybeNotify(now time.Time) { +func (lim *Limiter) maybeNotify() { if lim.IsLowTokens() { lim.notify() } } +// IsLowTokens returns whether the limiter is in low tokens func (lim *Limiter) IsLowTokens() bool { if lim.isLowProcess || (lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold) { return true @@ -258,7 +232,7 @@ func (lim *Limiter) RemoveTokens(now time.Time, amount float64) { now, _, tokens := lim.advance(now) lim.last = now lim.tokens = tokens - amount - lim.maybeNotify(now) + lim.maybeNotify() } type tokenBucketReconfigureArgs struct { @@ -269,6 +243,7 @@ type tokenBucketReconfigureArgs struct { NotifyThreshold float64 } +// Reconfigure modifies all setting for limiter func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) { lim.mu.Lock() defer lim.mu.Unlock() @@ -279,7 +254,7 @@ func (lim *Limiter) Reconfigure(now time.Time, args tokenBucketReconfigureArgs) lim.limit = Limit(args.NewRate) lim.notifyThreshold = args.NotifyThreshold lim.isLowProcess = false - lim.maybeNotify(now) + lim.maybeNotify() log.Debug("[resource group controllor] after reconfigure", zap.Float64("NewTokens", lim.tokens), zap.Float64("NewRate", float64(lim.limit)), zap.Float64("NotifyThreshold", args.NotifyThreshold)) } @@ -291,10 +266,10 @@ func (lim *Limiter) AvailableTokens(now time.Time) float64 { return tokens } -// reserveN is a helper method for ReserveN. +// reserveN is a helper method for Reserve. // maxFutureReserve specifies the maximum reservation wait duration allowed. // reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. -func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duration) Reservation { +func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Duration) Reservation { lim.mu.Lock() defer lim.mu.Unlock() @@ -305,24 +280,11 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio tokens: n, timeToAct: now, } - } else if lim.limit == 0 { - var ok bool - if lim.tokens >= float64(n) { - ok = true - lim.tokens -= float64(n) - } - lim.maybeNotify(now) - return Reservation{ - ok: ok, - lim: lim, - tokens: int(lim.tokens), - timeToAct: now, - } } else if n > maxRequestTokens { return Reservation{ ok: false, lim: lim, - tokens: int(lim.tokens), + tokens: lim.tokens, timeToAct: now, } } @@ -330,7 +292,7 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio // Calculate the remaining number of tokens resulting from the request. tokens -= float64(n) - lim.maybeNotify(now) + lim.maybeNotify() // Calculate the wait duration var waitDuration time.Duration if tokens < 0 { @@ -354,11 +316,9 @@ func (lim *Limiter) reserveN(now time.Time, n int, maxFutureReserve time.Duratio if ok { lim.last = now lim.tokens = tokens - lim.lastEvent = r.timeToAct } else { lim.last = last } - return r } @@ -397,7 +357,9 @@ func (limit Limit) tokensFromDuration(d time.Duration) float64 { return d.Seconds() * float64(limit) } -func waitReservations(now time.Time, ctx context.Context, reservations []*Reservation) error { +// WaitReservations is used to process a series of reservations +// so that all limiter tokens are returned if one reservation fails +func WaitReservations(ctx context.Context, now time.Time, reservations []*Reservation) error { if len(reservations) == 0 { return nil } From ac83f2cd9cb8500a6883bdb1b13478621d615e1d Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Tue, 17 Jan 2023 16:18:29 +0800 Subject: [PATCH 13/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/limiter.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index d629185e702..369177fcfa9 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -150,7 +150,7 @@ func (r *Reservation) CancelAt(now time.Time) { // advance time to now now, _, tokens := r.lim.advance(now) // calculate new number of tokens - tokens += float64(r.tokens) + tokens += r.tokens // update state r.lim.last = now @@ -291,7 +291,7 @@ func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Dur now, last, tokens := lim.advance(now) // Calculate the remaining number of tokens resulting from the request. - tokens -= float64(n) + tokens -= n lim.maybeNotify() // Calculate the wait duration var waitDuration time.Duration From 57d526fcede4df6e0bb36d8d42d3c3fa4eded098 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Tue, 17 Jan 2023 18:20:59 +0800 Subject: [PATCH 14/32] address comment and add test Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/limiter.go | 37 +++++++++++----------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index 369177fcfa9..512a8ddfe11 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -45,8 +45,6 @@ func Every(interval time.Duration) Limit { return 1 / Limit(interval.Seconds()) } -const maxRequestTokens = 1e8 - // A Limiter controls how frequently events are allowed to happen. // It implements a "token bucket" of size b, initially full and refilled // at rate r tokens per second. @@ -63,14 +61,18 @@ const maxRequestTokens = 1e8 // and the amount of time the caller must wait before using it, // or its associated context.Context is canceled. type Limiter struct { - mu sync.Mutex - limit Limit - tokens float64 + mu sync.Mutex + limit Limit + tokens float64 + maxRequestTokens float64 // last is the last time the limiter's tokens field was updated last time.Time notifyThreshold float64 lowTokensNotifyChan chan struct{} - isLowProcess bool + // To prevent too many chan sent, the notifyThreshold is set to 0 after notify. + // So the notifyThreshold cannot show whether the limiter is in the low token state, + // isLowProcess is used to check it. + isLowProcess bool } // Limit returns the maximum overall event rate. @@ -82,11 +84,12 @@ func (lim *Limiter) Limit() Limit { // NewLimiter returns a new Limiter that allows events up to rate r and permits // bursts of at most b tokens. -func NewLimiter(r Limit, tokens float64, lowTokensNotifyChan chan struct{}) *Limiter { +func NewLimiter(r Limit, tokens, maxRequestTokens float64, lowTokensNotifyChan chan struct{}) *Limiter { lim := &Limiter{ limit: r, last: time.Now(), tokens: tokens, + maxRequestTokens: maxRequestTokens, lowTokensNotifyChan: lowTokensNotifyChan, } log.Info("new limiter", zap.String("limiter", fmt.Sprintf("%+v", lim))) @@ -144,7 +147,7 @@ func (r *Reservation) CancelAt(now time.Time) { r.lim.mu.Lock() defer r.lim.mu.Unlock() - if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(now) { + if r.lim.limit == Inf || r.tokens == 0 { return } // advance time to now @@ -189,8 +192,8 @@ func (lim *Limiter) Reserve(ctx context.Context, now time.Time, n float64) *Rese return &r } -// SetupNotificationAt enables the notification at the given threshold. -func (lim *Limiter) SetupNotificationAt(now time.Time, threshold float64) { +// SetupNotificationThreshold enables the notification at the given threshold. +func (lim *Limiter) SetupNotificationThreshold(now time.Time, threshold float64) { lim.advance(now) lim.notifyThreshold = threshold } @@ -268,7 +271,7 @@ func (lim *Limiter) AvailableTokens(now time.Time) float64 { // reserveN is a helper method for Reserve. // maxFutureReserve specifies the maximum reservation wait duration allowed. -// reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. +// reserveN returns Reservation, not *Reservation. func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Duration) Reservation { lim.mu.Lock() defer lim.mu.Unlock() @@ -280,19 +283,16 @@ func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Dur tokens: n, timeToAct: now, } - } else if n > maxRequestTokens { + } else if n > lim.maxRequestTokens { return Reservation{ - ok: false, - lim: lim, - tokens: lim.tokens, - timeToAct: now, + ok: false, + lim: lim, } } now, last, tokens := lim.advance(now) // Calculate the remaining number of tokens resulting from the request. tokens -= n - lim.maybeNotify() // Calculate the wait duration var waitDuration time.Duration if tokens < 0 { @@ -300,7 +300,7 @@ func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Dur } // Decide result - ok := n <= maxRequestTokens && waitDuration <= maxFutureReserve + ok := n <= lim.maxRequestTokens && waitDuration <= maxFutureReserve // Prepare reservation r := Reservation{ @@ -316,6 +316,7 @@ func (lim *Limiter) reserveN(now time.Time, n float64, maxFutureReserve time.Dur if ok { lim.last = now lim.tokens = tokens + lim.maybeNotify() } else { lim.last = last } From 0c46eafcb86fe02ff74f6a0aea96472e2fc07f65 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Tue, 17 Jan 2023 18:21:28 +0800 Subject: [PATCH 15/32] address comment and add test Signed-off-by: Cabinfever_B --- .../resource_manager/client/limiter_test.go | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 pkg/mcs/resource_manager/client/limiter_test.go diff --git a/pkg/mcs/resource_manager/client/limiter_test.go b/pkg/mcs/resource_manager/client/limiter_test.go new file mode 100644 index 00000000000..11e3e1eb2a2 --- /dev/null +++ b/pkg/mcs/resource_manager/client/limiter_test.go @@ -0,0 +1,170 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS,g +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + "context" + "math" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +const ( + d = 1 * time.Second +) + +var ( + t0 = time.Now() + t1 = t0.Add(time.Duration(1) * d) + t2 = t0.Add(time.Duration(2) * d) + t3 = t0.Add(time.Duration(3) * d) + t4 = t0.Add(time.Duration(4) * d) + t5 = t0.Add(time.Duration(5) * d) + t6 = t0.Add(time.Duration(6) * d) + t7 = t0.Add(time.Duration(7) * d) + t8 = t0.Add(time.Duration(8) * d) +) + +type request struct { + t time.Time + n float64 + act time.Time + ok bool +} + +// dFromDuration converts a duration to the nearest multiple of the global constant d. +func dFromDuration(dur time.Duration) int { + // Add d/2 to dur so that integer division will round to + // the nearest multiple instead of truncating. + // (We don't care about small inaccuracies.) + return int((dur + (d / 2)) / d) +} + +// dSince returns multiples of d since t0 +func dSince(t time.Time) int { + return dFromDuration(t.Sub(t0)) +} + +func runReserveMax(t *testing.T, lim *Limiter, req request) *Reservation { + return runReserve(t, lim, req, InfDuration) +} + +func runReserve(t *testing.T, lim *Limiter, req request, maxReserve time.Duration) *Reservation { + t.Helper() + r := lim.reserveN(req.t, req.n, maxReserve) + if r.ok && (dSince(r.timeToAct) != dSince(req.act)) || r.ok != req.ok { + t.Errorf("lim.reserveN(t%d, %v, %v) = (t%d, %v) want (t%d, %v)", + dSince(req.t), req.n, maxReserve, dSince(r.timeToAct), r.ok, dSince(req.act), req.ok) + } + return &r +} + +func checkTokens(re *require.Assertions, lim *Limiter, t time.Time, expected float64) { + re.LessOrEqual(math.Abs(expected-lim.AvailableTokens(t)), 1e-2) +} + +func TestSimpleReserve(t *testing.T) { + lim := NewLimiter(1, 2, 1000, make(chan struct{}, 1)) + + runReserveMax(t, lim, request{t0, 3, t1, true}) + runReserveMax(t, lim, request{t0, 3, t4, true}) + runReserveMax(t, lim, request{t3, 2, t6, true}) + + runReserve(t, lim, request{t3, 2, t7, false}, time.Second*4) + runReserveMax(t, lim, request{t5, 2000, t6, false}) + + runReserve(t, lim, request{t3, 2, t8, true}, time.Second*8) +} + +func TestReconfig(t *testing.T) { + re := require.New(t) + lim := NewLimiter(1, 2, 1000, make(chan struct{}, 1)) + + runReserveMax(t, lim, request{t0, 4, t2, true}) + args := tokenBucketReconfigureArgs{ + NewTokens: 6., + NewRate: 2, + } + lim.Reconfigure(t1, args) + checkTokens(re, lim, t1, 5) + checkTokens(re, lim, t2, 7) +} + +func TestNotify(t *testing.T) { + nc := make(chan struct{}, 1) + lim := NewLimiter(1, 0, 1000, nc) + + args := tokenBucketReconfigureArgs{ + NewTokens: 1000., + NewRate: 2, + NotifyThreshold: 400, + } + lim.Reconfigure(t1, args) + runReserveMax(t, lim, request{t2, 1000, t2, true}) + select { + case <-nc: + default: + t.Errorf("no notify") + } +} + +func TestCancel(t *testing.T) { + ctx := context.Background() + ctx1, cancel1 := context.WithDeadline(ctx, t2) + re := require.New(t) + nc := make(chan struct{}, 1) + lim1 := NewLimiter(1, 10, 100, nc) + lim2 := NewLimiter(1, 0, 100, nc) + + r1 := runReserveMax(t, lim1, request{t0, 5, t0, true}) + checkTokens(re, lim1, t0, 5) + r1.CancelAt(t1) + checkTokens(re, lim1, t1, 11) + + r1 = lim1.Reserve(ctx, t1, 5) + r2 := lim2.Reserve(ctx1, t1, 5) + checkTokens(re, lim1, t2, 7) + checkTokens(re, lim2, t2, 2) + err := WaitReservations(ctx, t2, []*Reservation{r1, r2}) + re.Error(err) + checkTokens(re, lim1, t3, 13) + checkTokens(re, lim2, t3, 3) + cancel1() + + ctx2, cancel2 := context.WithCancel(ctx) + r1 = lim1.Reserve(ctx, t3, 5) + r2 = lim2.Reserve(ctx2, t3, 5) + checkTokens(re, lim1, t3, 8) + checkTokens(re, lim2, t3, -2) + var wg sync.WaitGroup + wg.Add(1) + go func() { + err := WaitReservations(ctx2, t3, []*Reservation{r1, r2}) + re.Error(err) + wg.Done() + }() + time.Sleep(1 * time.Second) + cancel2() + wg.Wait() + checkTokens(re, lim1, t5, 15) + checkTokens(re, lim2, t5, 5) +} From 67d79628f964b2d2115a1b6863549def1ee7ac82 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Tue, 17 Jan 2023 18:34:57 +0800 Subject: [PATCH 16/32] merge master Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/server/grpc_service.go | 2 +- pkg/mcs/resource_manager/server/manager.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/mcs/resource_manager/server/grpc_service.go b/pkg/mcs/resource_manager/server/grpc_service.go index 9746d9e1382..a6b1c8f830f 100644 --- a/pkg/mcs/resource_manager/server/grpc_service.go +++ b/pkg/mcs/resource_manager/server/grpc_service.go @@ -78,7 +78,7 @@ func (s *Service) GetManager() *Manager { // GetResourceGroup implements ResourceManagerServer.GetResourceGroup. func (s *Service) GetResourceGroup(ctx context.Context, req *rmpb.GetResourceGroupRequest) (*rmpb.GetResourceGroupResponse, error) { - rg := s.manager.GetResourceDuplicateGroup(req.ResourceGroupName) + rg := s.manager.GetResourceGroup(req.ResourceGroupName) if rg == nil { return nil, errors.New("resource group not found") } diff --git a/pkg/mcs/resource_manager/server/manager.go b/pkg/mcs/resource_manager/server/manager.go index 2cd25b629de..ccb22c6ab06 100644 --- a/pkg/mcs/resource_manager/server/manager.go +++ b/pkg/mcs/resource_manager/server/manager.go @@ -112,8 +112,8 @@ func (m *Manager) DeleteResourceGroup(name string) error { return nil } -// GetResourceDuplicateGroup returns a copy of a resource group. -func (m *Manager) GetResourceDuplicateGroup(name string) *ResourceGroup { +// GetResourceGroup returns a copy of a resource group. +func (m *Manager) GetResourceGroup(name string) *ResourceGroup { m.RLock() defer m.RUnlock() if group, ok := m.groups[name]; ok { From 122c52b4dded73807bc6282034cf693cbbe97a9f Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Tue, 17 Jan 2023 18:40:10 +0800 Subject: [PATCH 17/32] fix test Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/limiter.go | 4 ++-- pkg/mcs/resource_manager/client/limiter_test.go | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index 512a8ddfe11..76825a2e5e5 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -84,10 +84,10 @@ func (lim *Limiter) Limit() Limit { // NewLimiter returns a new Limiter that allows events up to rate r and permits // bursts of at most b tokens. -func NewLimiter(r Limit, tokens, maxRequestTokens float64, lowTokensNotifyChan chan struct{}) *Limiter { +func NewLimiter(now time.Time, r Limit, tokens, maxRequestTokens float64, lowTokensNotifyChan chan struct{}) *Limiter { lim := &Limiter{ limit: r, - last: time.Now(), + last: now, tokens: tokens, maxRequestTokens: maxRequestTokens, lowTokensNotifyChan: lowTokensNotifyChan, diff --git a/pkg/mcs/resource_manager/client/limiter_test.go b/pkg/mcs/resource_manager/client/limiter_test.go index 11e3e1eb2a2..0a0ae48d7c6 100644 --- a/pkg/mcs/resource_manager/client/limiter_test.go +++ b/pkg/mcs/resource_manager/client/limiter_test.go @@ -79,11 +79,11 @@ func runReserve(t *testing.T, lim *Limiter, req request, maxReserve time.Duratio } func checkTokens(re *require.Assertions, lim *Limiter, t time.Time, expected float64) { - re.LessOrEqual(math.Abs(expected-lim.AvailableTokens(t)), 1e-2) + re.LessOrEqual(math.Abs(expected-lim.AvailableTokens(t)), 1e-7) } func TestSimpleReserve(t *testing.T) { - lim := NewLimiter(1, 2, 1000, make(chan struct{}, 1)) + lim := NewLimiter(t0, 1, 2, 1000, make(chan struct{}, 1)) runReserveMax(t, lim, request{t0, 3, t1, true}) runReserveMax(t, lim, request{t0, 3, t4, true}) @@ -97,7 +97,7 @@ func TestSimpleReserve(t *testing.T) { func TestReconfig(t *testing.T) { re := require.New(t) - lim := NewLimiter(1, 2, 1000, make(chan struct{}, 1)) + lim := NewLimiter(t0, 1, 2, 1000, make(chan struct{}, 1)) runReserveMax(t, lim, request{t0, 4, t2, true}) args := tokenBucketReconfigureArgs{ @@ -111,7 +111,7 @@ func TestReconfig(t *testing.T) { func TestNotify(t *testing.T) { nc := make(chan struct{}, 1) - lim := NewLimiter(1, 0, 1000, nc) + lim := NewLimiter(t0, 1, 0, 1000, nc) args := tokenBucketReconfigureArgs{ NewTokens: 1000., @@ -132,8 +132,8 @@ func TestCancel(t *testing.T) { ctx1, cancel1 := context.WithDeadline(ctx, t2) re := require.New(t) nc := make(chan struct{}, 1) - lim1 := NewLimiter(1, 10, 100, nc) - lim2 := NewLimiter(1, 0, 100, nc) + lim1 := NewLimiter(t0, 1, 10, 100, nc) + lim2 := NewLimiter(t0, 1, 0, 100, nc) r1 := runReserveMax(t, lim1, request{t0, 5, t0, true}) checkTokens(re, lim1, t0, 5) From 3b303336c142c61c5ce43ec16d1a04315b0e8712 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Wed, 18 Jan 2023 01:39:55 +0800 Subject: [PATCH 18/32] merge master Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index cdb7821eda8..a93a3e56ba7 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -420,7 +420,7 @@ func (gc *groupCostController) initRunState(ctx context.Context) { gc.run.requestUnitTokens = make(map[rmpb.RequestUnitType]*tokenCounter) for typ := range requestUnitList { counter := &tokenCounter{ - limiter: NewLimiter(now, 0, gc.mainCfg.maxRequestTokens, initialRequestUnits, gc.lowRUNotifyChan), + limiter: NewLimiter(now, 0, initialRequestUnits, gc.mainCfg.maxRequestTokens, gc.lowRUNotifyChan), avgRUPerSec: initialRequestUnits / gc.run.targetPeriod.Seconds() * 2, avgLastTime: now, } @@ -430,7 +430,7 @@ func (gc *groupCostController) initRunState(ctx context.Context) { gc.run.resourceTokens = make(map[rmpb.ResourceType]*tokenCounter) for typ := range requestResourceList { counter := &tokenCounter{ - limiter: NewLimiter(now, 0, gc.mainCfg.maxRequestTokens, initialRequestUnits, gc.lowRUNotifyChan), + limiter: NewLimiter(now, 0, initialRequestUnits, gc.mainCfg.maxRequestTokens, gc.lowRUNotifyChan), avgRUPerSec: initialRequestUnits / gc.run.targetPeriod.Seconds() * 2, avgLastTime: now, } From 55fedf50a13197ceaf8f0625c41a973fe02a7097 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Wed, 18 Jan 2023 01:59:18 +0800 Subject: [PATCH 19/32] merge master Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 20 ++++++++++---------- pkg/mcs/resource_manager/client/config.go | 8 ++++---- pkg/mcs/resource_manager/client/model.go | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index a93a3e56ba7..d1b4f0cb423 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -362,7 +362,7 @@ type groupCostController struct { // request completes successfully. initialRequestCompleted bool - resourceTokens map[rmpb.ResourceType]*tokenCounter + resourceTokens map[rmpb.RawResourceType]*tokenCounter requestUnitTokens map[rmpb.RequestUnitType]*tokenCounter } } @@ -427,7 +427,7 @@ func (gc *groupCostController) initRunState(ctx context.Context) { gc.run.requestUnitTokens[typ] = counter } case rmpb.GroupMode_RawMode: - gc.run.resourceTokens = make(map[rmpb.ResourceType]*tokenCounter) + gc.run.resourceTokens = make(map[rmpb.RawResourceType]*tokenCounter) for typ := range requestResourceList { counter := &tokenCounter{ limiter: NewLimiter(now, 0, initialRequestUnits, gc.mainCfg.maxRequestTokens, gc.lowRUNotifyChan), @@ -509,7 +509,7 @@ func (gc *groupCostController) updateAvgResourcePerSec(ctx context.Context) { if !gc.calcAvg(counter, GetResourceValueFromConsumption(gc.run.consumption, typ)) { continue } - log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.ResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) + log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) } } @@ -573,7 +573,7 @@ func (gc *groupCostController) handleResourceTokenResponse(resp *rmpb.TokenBucke // todo: check whether grant = 0 counter, ok := gc.run.resourceTokens[typ] if !ok { - log.Warn("not support this resource type", zap.String("type", rmpb.ResourceType_name[int32(typ)])) + log.Warn("not support this resource type", zap.String("type", rmpb.RawResourceType_name[int32(typ)])) continue } gc.modifyTokenCounter(counter, grantedTB.GetGrantedTokens(), grantedTB.GetTrickleTimeMs()) @@ -586,7 +586,7 @@ func (gc *groupCostController) handleRUTokenResponse(resp *rmpb.TokenBucketRespo // todo: check whether grant = 0 counter, ok := gc.run.requestUnitTokens[typ] if !ok { - log.Warn("not support this resource type", zap.String("type", rmpb.ResourceType_name[int32(typ)])) + log.Warn("not support this resource type", zap.String("type", rmpb.RawResourceType_name[int32(typ)])) continue } gc.modifyTokenCounter(counter, grantedTB.GetGrantedTokens(), grantedTB.GetTrickleTimeMs()) @@ -648,20 +648,20 @@ func (gc *groupCostController) collectRequestAndConsumption(low bool) *rmpb.Toke selected := !low switch gc.mode { case rmpb.GroupMode_RawMode: - requests := make([]*rmpb.ResourceItem, 0, len(requestResourceList)) + requests := make([]*rmpb.RawResourceItem, 0, len(requestResourceList)) for typ, counter := range gc.run.resourceTokens { if low && counter.limiter.IsLowTokens() { selected = true } - request := &rmpb.ResourceItem{ + request := &rmpb.RawResourceItem{ Type: typ, Value: gc.calcRequest(counter), } requests = append(requests, request) } - req.Request = &rmpb.TokenBucketRequest_ResourceItems{ - ResourceItems: &rmpb.TokenBucketRequest_RequestResource{ - RequestResource: requests, + req.Request = &rmpb.TokenBucketRequest_RawResourceItems{ + RawResourceItems: &rmpb.TokenBucketRequest_RequestRawResource{ + RequestRawResource: requests, }, } case rmpb.GroupMode_RUMode: diff --git a/pkg/mcs/resource_manager/client/config.go b/pkg/mcs/resource_manager/client/config.go index ee4522a32c7..5267eddce09 100644 --- a/pkg/mcs/resource_manager/client/config.go +++ b/pkg/mcs/resource_manager/client/config.go @@ -25,10 +25,10 @@ var ( rmpb.RequestUnitType_RRU: {}, rmpb.RequestUnitType_WRU: {}, } - requestResourceList map[rmpb.ResourceType]struct{} = map[rmpb.ResourceType]struct{}{ - rmpb.ResourceType_IOReadFlow: {}, - rmpb.ResourceType_IOWriteFlow: {}, - rmpb.ResourceType_CPU: {}, + requestResourceList map[rmpb.RawResourceType]struct{} = map[rmpb.RawResourceType]struct{}{ + rmpb.RawResourceType_IOReadFlow: {}, + rmpb.RawResourceType_IOWriteFlow: {}, + rmpb.RawResourceType_CPU: {}, } ) diff --git a/pkg/mcs/resource_manager/client/model.go b/pkg/mcs/resource_manager/client/model.go index 6b516f9d0a3..16972cbe54f 100644 --- a/pkg/mcs/resource_manager/client/model.go +++ b/pkg/mcs/resource_manager/client/model.go @@ -131,7 +131,7 @@ func GetRUValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RequestUnitTyp return 0 } -func GetResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.ResourceType) float64 { +func GetResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RawResourceType) float64 { switch typ { case 0: return custom.TotalCpuTimeMs From ed807b93fcebb8d49a6511f3e4e4377918666cc4 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Wed, 18 Jan 2023 17:22:56 +0800 Subject: [PATCH 20/32] fix static check Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 120 +++++++++++----------- pkg/mcs/resource_manager/client/model.go | 8 +- 2 files changed, 62 insertions(+), 66 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index d1b4f0cb423..055c11ea3f4 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -25,13 +25,18 @@ import ( "go.uber.org/zap" ) -var defaultWhiteList = map[string]struct{}{"default": {}} +var defaultWhiteList = map[string]struct{}{ + "default": {}, + "": {}, +} +// ResourceGroupKVInterceptor is used as quato limit controller for resource group using kv store. type ResourceGroupKVInterceptor interface { OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) error OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error } +// ResourceGroupProvider provides some api to interact with resource manager server。 type ResourceGroupProvider interface { ListResourceGroups(ctx context.Context) ([]*rmpb.ResourceGroup, error) GetResourceGroup(ctx context.Context, resourceGroupName string) (*rmpb.ResourceGroup, error) @@ -41,18 +46,10 @@ type ResourceGroupProvider interface { AcquireTokenBuckets(ctx context.Context, request *rmpb.TokenBucketsRequest) ([]*rmpb.TokenBucketResponse, error) } -func NewResourceGroupController( - clientUniqueId uint64, - provider ResourceGroupProvider, - requestUnitConfig *RequestUnitConfig, -) (*resourceGroupsController, error) { - return newResourceGroupController(clientUniqueId, provider, requestUnitConfig) -} - var _ ResourceGroupKVInterceptor = (*resourceGroupsController)(nil) type resourceGroupsController struct { - clientUniqueId uint64 + clientUniqueID uint64 provider ResourceGroupProvider groupsController sync.Map config *Config @@ -87,7 +84,8 @@ type resourceGroupsController struct { } } -func newResourceGroupController(clientUniqueId uint64, provider ResourceGroupProvider, requestUnitConfig *RequestUnitConfig) (*resourceGroupsController, error) { +// NewResourceGroupController returns a new resourceGroupsController which impls ResourceGroupKVInterceptor +func NewResourceGroupController(clientUniqueID uint64, provider ResourceGroupProvider, requestUnitConfig *RequestUnitConfig) (*resourceGroupsController, error) { var config *Config if requestUnitConfig != nil { config = generateConfig(requestUnitConfig) @@ -95,7 +93,7 @@ func newResourceGroupController(clientUniqueId uint64, provider ResourceGroupPro config = DefaultConfig() } return &resourceGroupsController{ - clientUniqueId: clientUniqueId, + clientUniqueID: clientUniqueID, provider: provider, config: config, lowTokenNotifyChan: make(chan struct{}, 1), @@ -104,11 +102,12 @@ func newResourceGroupController(clientUniqueId uint64, provider ResourceGroupPro }, nil } +// Start starts resourceGroupController service func (c *resourceGroupsController) Start(ctx context.Context) error { if err := c.updateAllResourceGroups(ctx); err != nil { log.Error("update ResourceGroup failed", zap.Error(err)) } - c.initRunState(ctx) + c.initRunState() go c.mainLoop(ctx) return nil } @@ -119,9 +118,9 @@ func (c *resourceGroupsController) putResourceGroup(ctx context.Context, name st return nil, err } log.Info("create resource group cost controller", zap.String("name", group.GetName())) - gc := newGroupCostController(ctx, group, c.config, c.lowTokenNotifyChan) + gc := newGroupCostController(group, c.config, c.lowTokenNotifyChan) + gc.initRunState() c.groupsController.Store(group.GetName(), gc) - gc.initRunState(ctx) return gc, nil } @@ -133,7 +132,7 @@ func (c *resourceGroupsController) updateAllResourceGroups(ctx context.Context) lastedGroups := make(map[string]struct{}) for _, group := range groups { log.Info("create resource group cost controller", zap.String("name", group.GetName())) - gc := newGroupCostController(ctx, group, c.config, c.lowTokenNotifyChan) + gc := newGroupCostController(group, c.config, c.lowTokenNotifyChan) c.groupsController.Store(group.GetName(), gc) lastedGroups[group.GetName()] = struct{}{} } @@ -147,14 +146,14 @@ func (c *resourceGroupsController) updateAllResourceGroups(ctx context.Context) return nil } -func (c *resourceGroupsController) initRunState(ctx context.Context) { +func (c *resourceGroupsController) initRunState() { now := time.Now() c.run.now = now c.run.lastRequestTime = now c.run.targetPeriod = c.config.targetPeriod c.groupsController.Range(func(name, value any) bool { gc := value.(*groupCostController) - gc.initRunState(ctx) + gc.initRunState() return true }) } @@ -188,15 +187,15 @@ func (c *resourceGroupsController) shouldReportConsumption() bool { return false } -func (c *resourceGroupsController) updateAvgRequestResourcePerSec(ctx context.Context) { +func (c *resourceGroupsController) updateAvgRequestResourcePerSec() { c.groupsController.Range(func(name, value any) bool { gc := value.(*groupCostController) - gc.updateAvgRequestResourcePerSec(ctx) + gc.updateAvgRequestResourcePerSec() return true }) } -func (c *resourceGroupsController) handleTokenBucketResponse(ctx context.Context, resp []*rmpb.TokenBucketResponse) { +func (c *resourceGroupsController) handleTokenBucketResponse(resp []*rmpb.TokenBucketResponse) { for _, res := range resp { name := res.GetResourceGroupName() v, ok := c.groupsController.Load(name) @@ -204,7 +203,7 @@ func (c *resourceGroupsController) handleTokenBucketResponse(ctx context.Context log.Warn("A non-existent resource group was found when handle token response.", zap.String("name", name)) } gc := v.(*groupCostController) - gc.handleTokenBucketResponse(ctx, res) + gc.handleTokenBucketResponse(res) } } @@ -270,24 +269,24 @@ func (c *resourceGroupsController) mainLoop(ctx context.Context) { c.run.requestInProgress = false if resp != nil { c.updateRunState(ctx) - c.handleTokenBucketResponse(ctx, resp) + c.handleTokenBucketResponse(resp) } else { // A nil response indicates a failure (which would have been logged). c.run.requestNeedsRetry = true } case <-ticker.C: c.updateRunState(ctx) - c.updateAvgRequestResourcePerSec(ctx) + c.updateAvgRequestResourcePerSec() if c.run.requestNeedsRetry || c.shouldReportConsumption() { c.run.requestNeedsRetry = false c.collectTokenBucketRequests(ctx, "report", false /* select all */) } case <-c.lowTokenNotifyChan: c.updateRunState(ctx) - c.updateAvgRequestResourcePerSec(ctx) + c.updateAvgRequestResourcePerSec() if !c.run.requestInProgress { - c.collectTokenBucketRequests(ctx, "low_ru", false /* only select low tokens resource group */) - //c.collectTokenBucketRequests(ctx, "low_ru", true /* only select low tokens resource group */) + // c.collectTokenBucketRequests(ctx, "low_ru", false /* only select low tokens resource group */) + c.collectTokenBucketRequests(ctx, "low_ru", true /* only select low tokens resource group */) } default: c.handleTokenBucketTrickEvent(ctx) @@ -386,7 +385,7 @@ type tokenCounter struct { limiter *Limiter } -func newGroupCostController(ctx context.Context, group *rmpb.ResourceGroup, mainCfg *Config, lowRUNotifyChan chan struct{}) *groupCostController { +func newGroupCostController(group *rmpb.ResourceGroup, mainCfg *Config, lowRUNotifyChan chan struct{}) *groupCostController { gc := &groupCostController{ ResourceGroup: group, mainCfg: mainCfg, @@ -406,7 +405,7 @@ func newGroupCostController(ctx context.Context, group *rmpb.ResourceGroup, main return gc } -func (gc *groupCostController) initRunState(ctx context.Context) { +func (gc *groupCostController) initRunState() { now := time.Now() gc.run.now = now gc.run.targetPeriod = gc.mainCfg.targetPeriod @@ -446,34 +445,34 @@ func (gc *groupCostController) updateRunState(ctx context.Context) { calc.Trickle(ctx, deltaConsumption) } gc.mu.Lock() - Add(gc.mu.consumption, deltaConsumption) + add(gc.mu.consumption, deltaConsumption) *gc.run.consumption = *gc.mu.consumption gc.mu.Unlock() // remove tokens switch gc.mode { case rmpb.GroupMode_RUMode: for typ, counter := range gc.run.requestUnitTokens { - if v := GetRUValueFromConsumption(deltaConsumption, typ); v > 0 { + if v := getRUValueFromConsumption(deltaConsumption, typ); v > 0 { counter.limiter.RemoveTokens(newTime, v) } } case rmpb.GroupMode_RawMode: for typ, counter := range gc.run.resourceTokens { - if v := GetResourceValueFromConsumption(deltaConsumption, typ); v > 0 { + if v := getResourceValueFromConsumption(deltaConsumption, typ); v > 0 { counter.limiter.RemoveTokens(newTime, v) } } } - log.Info("update run state", zap.Any("request unit comsumption", gc.run.consumption)) + log.Debug("update run state", zap.Any("request unit comsumption", gc.run.consumption)) gc.run.now = newTime } -func (gc *groupCostController) updateAvgRequestResourcePerSec(ctx context.Context) { +func (gc *groupCostController) updateAvgRequestResourcePerSec() { switch gc.mode { case rmpb.GroupMode_RawMode: - gc.updateAvgResourcePerSec(ctx) + gc.updateAvgResourcePerSec() case rmpb.GroupMode_RUMode: - gc.updateAvgRUPerSec(ctx) + gc.updateAvgRUPerSec() } } @@ -485,7 +484,7 @@ func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) case <-counter.setupNotificationCh: counter.setupNotificationTimer = nil counter.setupNotificationCh = nil - counter.limiter.SetupNotificationThreshold(gc.run.now, float64(counter.setupNotificationThreshold)) + counter.limiter.SetupNotificationThreshold(gc.run.now, counter.setupNotificationThreshold) gc.updateRunState(ctx) default: } @@ -496,7 +495,7 @@ func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) case <-counter.setupNotificationCh: counter.setupNotificationTimer = nil counter.setupNotificationCh = nil - counter.limiter.SetupNotificationThreshold(gc.run.now, float64(counter.setupNotificationThreshold)) + counter.limiter.SetupNotificationThreshold(gc.run.now, counter.setupNotificationThreshold) gc.updateRunState(ctx) default: } @@ -504,21 +503,21 @@ func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) } } -func (gc *groupCostController) updateAvgResourcePerSec(ctx context.Context) { +func (gc *groupCostController) updateAvgResourcePerSec() { for typ, counter := range gc.run.resourceTokens { - if !gc.calcAvg(counter, GetResourceValueFromConsumption(gc.run.consumption, typ)) { + if !gc.calcAvg(counter, getResourceValueFromConsumption(gc.run.consumption, typ)) { continue } - log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) + log.Debug("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) } } -func (gc *groupCostController) updateAvgRUPerSec(ctx context.Context) { +func (gc *groupCostController) updateAvgRUPerSec() { for typ, counter := range gc.run.requestUnitTokens { - if !gc.calcAvg(counter, GetRUValueFromConsumption(gc.run.consumption, typ)) { + if !gc.calcAvg(counter, getRUValueFromConsumption(gc.run.consumption, typ)) { continue } - log.Info("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) + log.Debug("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) } } @@ -538,13 +537,13 @@ func (gc *groupCostController) shouldReportConsumption() bool { switch gc.Mode { case rmpb.GroupMode_RUMode: for typ := range requestUnitList { - if GetRUValueFromConsumption(gc.run.consumption, typ)-GetRUValueFromConsumption(gc.run.lastRequestConsumption, typ) >= consumptionsReportingThreshold { + if getRUValueFromConsumption(gc.run.consumption, typ)-getRUValueFromConsumption(gc.run.lastRequestConsumption, typ) >= consumptionsReportingThreshold { return true } } case rmpb.GroupMode_RawMode: for typ := range requestResourceList { - if GetResourceValueFromConsumption(gc.run.consumption, typ)-GetResourceValueFromConsumption(gc.run.lastRequestConsumption, typ) >= consumptionsReportingThreshold { + if getResourceValueFromConsumption(gc.run.consumption, typ)-getResourceValueFromConsumption(gc.run.lastRequestConsumption, typ) >= consumptionsReportingThreshold { return true } } @@ -552,7 +551,7 @@ func (gc *groupCostController) shouldReportConsumption() bool { return false } -func (gc *groupCostController) handleTokenBucketResponse(ctx context.Context, resp *rmpb.TokenBucketResponse) { +func (gc *groupCostController) handleTokenBucketResponse(resp *rmpb.TokenBucketResponse) { gc.handleRespFunc(resp) if !gc.run.initialRequestCompleted { gc.run.initialRequestCompleted = true @@ -570,7 +569,6 @@ func (gc *groupCostController) handleTokenBucketResponse(ctx context.Context, re func (gc *groupCostController) handleResourceTokenResponse(resp *rmpb.TokenBucketResponse) { for _, grantedTB := range resp.GetGrantedResourceTokens() { typ := grantedTB.GetType() - // todo: check whether grant = 0 counter, ok := gc.run.resourceTokens[typ] if !ok { log.Warn("not support this resource type", zap.String("type", rmpb.RawResourceType_name[int32(typ)])) @@ -583,7 +581,6 @@ func (gc *groupCostController) handleResourceTokenResponse(resp *rmpb.TokenBucke func (gc *groupCostController) handleRUTokenResponse(resp *rmpb.TokenBucketResponse) { for _, grantedTB := range resp.GetGrantedRUTokens() { typ := grantedTB.GetType() - // todo: check whether grant = 0 counter, ok := gc.run.requestUnitTokens[typ] if !ok { log.Warn("not support this resource type", zap.String("type", rmpb.RawResourceType_name[int32(typ)])) @@ -595,13 +592,12 @@ func (gc *groupCostController) handleRUTokenResponse(resp *rmpb.TokenBucketRespo func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket *rmpb.TokenBucket, trickleTimeMs int64) { granted := bucket.Tokens - remainder := 0. if !counter.lastDeadline.IsZero() { // If last request came with a trickle duration, we may have RUs that were // not made available to the bucket yet; throw them together with the newly // granted RUs. if since := counter.lastDeadline.Sub(gc.run.now); since > 0 { - remainder = counter.lastRate * since.Seconds() + granted += counter.lastRate * since.Seconds() } } if counter.setupNotificationTimer != nil { @@ -621,10 +617,10 @@ func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket cfg.NotifyThreshold = notifyThreshold counter.lastDeadline = time.Time{} } else { - cfg.NewTokens = remainder + cfg.NewTokens = 0 trickleDuration := time.Duration(trickleTimeMs) * time.Millisecond deadline := gc.run.now.Add(trickleDuration) - cfg.NewRate = float64(bucket.GetSettings().FillRate) + bucket.Tokens/trickleDuration.Seconds() + cfg.NewRate = float64(bucket.GetSettings().FillRate) + granted/trickleDuration.Seconds() timerDuration := trickleDuration - time.Second if timerDuration <= 0 { @@ -688,7 +684,7 @@ func (gc *groupCostController) collectRequestAndConsumption(low bool) *rmpb.Toke deltaConsumption := &rmpb.Consumption{} *deltaConsumption = *gc.run.consumption - Sub(deltaConsumption, gc.run.lastRequestConsumption) + sub(deltaConsumption, gc.run.lastRequestConsumption) req.ConsumptionSinceLastRequest = deltaConsumption *gc.run.lastRequestConsumption = *gc.run.consumption @@ -716,7 +712,7 @@ func (gc *groupCostController) OnRequestWait( case rmpb.GroupMode_RawMode: res := make([]*Reservation, 0, len(requestResourceList)) for typ, counter := range gc.run.resourceTokens { - if v := GetResourceValueFromConsumption(delta, typ); v > 0 { + if v := getResourceValueFromConsumption(delta, typ); v > 0 { res = append(res, counter.limiter.Reserve(ctx, now, v)) } } @@ -726,7 +722,7 @@ func (gc *groupCostController) OnRequestWait( case rmpb.GroupMode_RUMode: res := make([]*Reservation, 0, len(requestUnitList)) for typ, counter := range gc.run.requestUnitTokens { - if v := GetRUValueFromConsumption(delta, typ); v > 0 { + if v := getRUValueFromConsumption(delta, typ); v > 0 { res = append(res, counter.limiter.Reserve(ctx, now, v)) } } @@ -735,7 +731,7 @@ func (gc *groupCostController) OnRequestWait( } } gc.mu.Lock() - Add(gc.mu.consumption, delta) + add(gc.mu.consumption, delta) gc.mu.Unlock() return nil } @@ -749,18 +745,18 @@ func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, switch gc.mode { case rmpb.GroupMode_RawMode: for typ, counter := range gc.run.resourceTokens { - if v := GetResourceValueFromConsumption(delta, typ); v > 0 { - counter.limiter.RemoveTokens(time.Now(), float64(v)) + if v := getResourceValueFromConsumption(delta, typ); v > 0 { + counter.limiter.RemoveTokens(time.Now(), v) } } case rmpb.GroupMode_RUMode: for typ, counter := range gc.run.requestUnitTokens { - if v := GetRUValueFromConsumption(delta, typ); v > 0 { - counter.limiter.RemoveTokens(time.Now(), float64(v)) + if v := getRUValueFromConsumption(delta, typ); v > 0 { + counter.limiter.RemoveTokens(time.Now(), v) } } } gc.mu.Lock() - Add(gc.mu.consumption, delta) + add(gc.mu.consumption, delta) gc.mu.Unlock() } diff --git a/pkg/mcs/resource_manager/client/model.go b/pkg/mcs/resource_manager/client/model.go index 16972cbe54f..f906dd0270c 100644 --- a/pkg/mcs/resource_manager/client/model.go +++ b/pkg/mcs/resource_manager/client/model.go @@ -121,7 +121,7 @@ func (dsc *SQLCalculator) BeforeKVRequest(consumption *rmpb.Consumption, req Req func (dsc *SQLCalculator) AfterKVRequest(consumption *rmpb.Consumption, req RequestInfo, res ResponseInfo) { } -func GetRUValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RequestUnitType) float64 { +func getRUValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RequestUnitType) float64 { switch typ { case 0: return custom.RRU @@ -131,7 +131,7 @@ func GetRUValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RequestUnitTyp return 0 } -func GetResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RawResourceType) float64 { +func getResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RawResourceType) float64 { switch typ { case 0: return custom.TotalCpuTimeMs @@ -143,7 +143,7 @@ func GetResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RawResou return 0 } -func Add(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { +func add(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { custom1.RRU += custom2.RRU custom1.WRU += custom2.WRU custom1.ReadBytes += custom2.ReadBytes @@ -154,7 +154,7 @@ func Add(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { custom1.KvWriteRpcCount += custom2.KvWriteRpcCount } -func Sub(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { +func sub(custom1 *rmpb.Consumption, custom2 *rmpb.Consumption) { custom1.RRU -= custom2.RRU custom1.WRU -= custom2.WRU custom1.ReadBytes -= custom2.ReadBytes From 5e64c41392cdc4753085f867d42f023a309d02d3 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 02:08:27 +0800 Subject: [PATCH 21/32] add test Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 47 ++-- pkg/mcs/resource_manager/client/limiter.go | 3 + .../resource_manager/resource_manager_test.go | 201 ++++++++++++++++-- 3 files changed, 212 insertions(+), 39 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 055c11ea3f4..4f4d1e6fd54 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -285,7 +285,6 @@ func (c *resourceGroupsController) mainLoop(ctx context.Context) { c.updateRunState(ctx) c.updateAvgRequestResourcePerSec() if !c.run.requestInProgress { - // c.collectTokenBucketRequests(ctx, "low_ru", false /* only select low tokens resource group */) c.collectTokenBucketRequests(ctx, "low_ru", true /* only select low tokens resource group */) } default: @@ -702,33 +701,41 @@ func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { func (gc *groupCostController) OnRequestWait( ctx context.Context, info RequestInfo, -) error { +) (err error) { delta := &rmpb.Consumption{} for _, calc := range gc.calculators { calc.BeforeKVRequest(delta, info) } now := time.Now() - switch gc.mode { - case rmpb.GroupMode_RawMode: - res := make([]*Reservation, 0, len(requestResourceList)) - for typ, counter := range gc.run.resourceTokens { - if v := getResourceValueFromConsumption(delta, typ); v > 0 { - res = append(res, counter.limiter.Reserve(ctx, now, v)) + // retry +retryLoop: + for i := 0; i < 3; i++ { + switch gc.mode { + case rmpb.GroupMode_RawMode: + res := make([]*Reservation, 0, len(requestResourceList)) + for typ, counter := range gc.run.resourceTokens { + if v := getResourceValueFromConsumption(delta, typ); v > 0 { + res = append(res, counter.limiter.Reserve(ctx, now, v)) + } } - } - if err := WaitReservations(ctx, now, res); err != nil { - return err - } - case rmpb.GroupMode_RUMode: - res := make([]*Reservation, 0, len(requestUnitList)) - for typ, counter := range gc.run.requestUnitTokens { - if v := getRUValueFromConsumption(delta, typ); v > 0 { - res = append(res, counter.limiter.Reserve(ctx, now, v)) + if err = WaitReservations(ctx, now, res); err == nil { + break retryLoop + } + case rmpb.GroupMode_RUMode: + res := make([]*Reservation, 0, len(requestUnitList)) + for typ, counter := range gc.run.requestUnitTokens { + if v := getRUValueFromConsumption(delta, typ); v > 0 { + res = append(res, counter.limiter.Reserve(ctx, now, v)) + } + } + if err = WaitReservations(ctx, now, res); err == nil { + break retryLoop } } - if err := WaitReservations(ctx, now, res); err != nil { - return err - } + time.Sleep(100 * time.Millisecond) + } + if err != nil { + return err } gc.mu.Lock() add(gc.mu.consumption, delta) diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index 76825a2e5e5..259ae8591b2 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -386,6 +386,9 @@ func WaitReservations(ctx context.Context, now time.Time, reservations []*Reserv if longestDelayDuration > 500*time.Millisecond { log.Warn("[resource group controllor] limiter needs wait ", zap.Time("now", now), zap.Duration("delay", longestDelayDuration)) } + if longestDelayDuration > 1*time.Second { + return fmt.Errorf("[resource group controllor] limiter needs wait too long") + } t := time.NewTimer(longestDelayDuration) defer t.Stop() diff --git a/tests/msc/resource_manager/resource_manager_test.go b/tests/msc/resource_manager/resource_manager_test.go index 68a8a7a2cc1..8b138e0c80b 100644 --- a/tests/msc/resource_manager/resource_manager_test.go +++ b/tests/msc/resource_manager/resource_manager_test.go @@ -26,6 +26,7 @@ import ( rmpb "github.com/pingcap/kvproto/pkg/resource_manager" "github.com/stretchr/testify/suite" pd "github.com/tikv/pd/client" + rgcli "github.com/tikv/pd/pkg/mcs/resource_manager/client" "github.com/tikv/pd/pkg/mcs/resource_manager/server" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/tests" @@ -42,10 +43,11 @@ func TestMain(m *testing.M) { type resourceManagerClientTestSuite struct { suite.Suite - ctx context.Context - clean context.CancelFunc - cluster *tests.TestCluster - client pd.Client + ctx context.Context + clean context.CancelFunc + cluster *tests.TestCluster + client pd.Client + initGroups []*rmpb.ResourceGroup } func TestResourceManagerClientTestSuite(t *testing.T) { @@ -67,20 +69,8 @@ func (suite *resourceManagerClientTestSuite) SetupSuite() { leaderName := suite.cluster.WaitLeader() leader := suite.cluster.GetServer(leaderName) suite.client, err = pd.NewClientWithContext(suite.ctx, []string{leader.GetAddr()}, pd.SecurityOption{}) - re.NoError(err) -} - -func (suite *resourceManagerClientTestSuite) TearDownSuite() { - suite.client.Close() - suite.clean() - suite.cluster.Destroy() -} - -func (suite *resourceManagerClientTestSuite) TestAcquireTokenBucket() { - re := suite.Require() - cli := suite.client - groups := []*rmpb.ResourceGroup{ + suite.initGroups = []*rmpb.ResourceGroup{ { Name: "test1", Mode: rmpb.GroupMode_RUMode, @@ -91,6 +81,12 @@ func (suite *resourceManagerClientTestSuite) TestAcquireTokenBucket() { }, Tokens: 100000, }, + WRU: &rmpb.TokenBucket{ + Settings: &rmpb.TokenLimitSettings{ + FillRate: 20000, + }, + Tokens: 50000, + }, }, }, { @@ -103,9 +99,176 @@ func (suite *resourceManagerClientTestSuite) TestAcquireTokenBucket() { }, Tokens: 100000, }, + WRU: &rmpb.TokenBucket{ + Settings: &rmpb.TokenLimitSettings{ + FillRate: 20000, + }, + Tokens: 50000, + }, }, }, } + + re.NoError(err) +} + +func (suite *resourceManagerClientTestSuite) TearDownSuite() { + suite.client.Close() + suite.clean() + suite.cluster.Destroy() +} + +const buffDuration = time.Millisecond * 100 + +type testRequestInfo struct { + isWrite bool + writeBytes uint64 +} + +func (ti *testRequestInfo) IsWrite() bool { + return ti.isWrite +} + +func (ti *testRequestInfo) WriteBytes() uint64 { + return ti.writeBytes +} + +type testResponseInfo struct { + cpuMs uint64 + readBytes uint64 +} + +func (tri *testResponseInfo) ReadBytes() uint64 { + return tri.readBytes +} + +func (tri *testResponseInfo) KVCPUMs() uint64 { + return tri.cpuMs +} + +type tokenConsumptionPerSecond struct { + rruTokensAtATime float64 + wruTokensAtATime float64 + times int + waitDuration time.Duration +} + +func (t tokenConsumptionPerSecond) makeReadRequest() *testRequestInfo { + return &testRequestInfo{ + isWrite: false, + writeBytes: 0, + } +} + +func (t tokenConsumptionPerSecond) makeWriteRequest() *testRequestInfo { + return &testRequestInfo{ + isWrite: true, + writeBytes: uint64(t.wruTokensAtATime - 1), + } +} + +func (t tokenConsumptionPerSecond) makeReadResponse() *testResponseInfo { + return &testResponseInfo{ + readBytes: uint64((t.rruTokensAtATime - 1) / 2), + cpuMs: uint64(t.rruTokensAtATime / 2), + } +} + +func (t tokenConsumptionPerSecond) makeWriteResponse() *testResponseInfo { + return &testResponseInfo{ + readBytes: 0, + cpuMs: 0, + } +} + +func (suite *resourceManagerClientTestSuite) TestResourceGroupController() { + re := suite.Require() + cli := suite.client + + for _, group := range suite.initGroups { + resp, err := cli.AddResourceGroup(suite.ctx, group) + re.NoError(err) + re.Contains(resp, "Success!") + } + + cfg := &rgcli.RequestUnitConfig{ + ReadBaseCost: 1, + ReadCostPerByte: 1, + ReadCPUMsCost: 1, + WriteBaseCost: 1, + WriteCostPerByte: 1, + } + + controller, _ := rgcli.NewResourceGroupController(1, cli, cfg) + controller.Start(suite.ctx) + + testCases := []struct { + resourceGroupName string + tcs []tokenConsumptionPerSecond + len int + }{ + { + resourceGroupName: suite.initGroups[0].Name, + len: 8, + tcs: []tokenConsumptionPerSecond{ + {rruTokensAtATime: 50, wruTokensAtATime: 20, times: 200, waitDuration: 0}, + {rruTokensAtATime: 50, wruTokensAtATime: 100, times: 200, waitDuration: 0}, + {rruTokensAtATime: 50, wruTokensAtATime: 100, times: 200, waitDuration: 0}, + {rruTokensAtATime: 20, wruTokensAtATime: 40, times: 500, waitDuration: 0}, + {rruTokensAtATime: 25, wruTokensAtATime: 50, times: 400, waitDuration: 0}, + {rruTokensAtATime: 30, wruTokensAtATime: 60, times: 330, waitDuration: 0}, + {rruTokensAtATime: 40, wruTokensAtATime: 80, times: 250, waitDuration: 0}, + {rruTokensAtATime: 50, wruTokensAtATime: 100, times: 200, waitDuration: 0}, + }, + }, + } + tricker := time.NewTicker(time.Second) + defer tricker.Stop() + i := 0 + for { + v := false + <-tricker.C + for _, cas := range testCases { + if i >= cas.len { + continue + } + v = true + sum := time.Duration(0) + for j := 0; j < cas.tcs[i].times; j++ { + rreq := cas.tcs[i].makeReadRequest() + wreq := cas.tcs[i].makeWriteRequest() + rres := cas.tcs[i].makeReadResponse() + wres := cas.tcs[i].makeWriteResponse() + startTime := time.Now() + controller.OnRequestWait(suite.ctx, cas.resourceGroupName, rreq) + controller.OnRequestWait(suite.ctx, cas.resourceGroupName, wreq) + endTime := time.Now() + sum += endTime.Sub(startTime) + controller.OnResponse(suite.ctx, cas.resourceGroupName, rreq, rres) + controller.OnResponse(suite.ctx, cas.resourceGroupName, wreq, wres) + time.Sleep(1000 * time.Microsecond) + } + re.LessOrEqual(sum, buffDuration+cas.tcs[i].waitDuration) + } + i++ + if !v { + break + } + } + for _, g := range suite.initGroups { + // Delete Resource Group + dresp, err := cli.DeleteResourceGroup(suite.ctx, g.Name) + re.NoError(err) + re.Contains(dresp, "Success!") + } +} + +func (suite *resourceManagerClientTestSuite) TestAcquireTokenBucket() { + re := suite.Require() + cli := suite.client + + groups := make([]*rmpb.ResourceGroup, len(suite.initGroups)) + copy(groups, suite.initGroups) for _, group := range groups { resp, err := cli.AddResourceGroup(suite.ctx, group) re.NoError(err) @@ -121,7 +284,7 @@ func (suite *resourceManagerClientTestSuite) TestAcquireTokenBucket() { requests := make([]*rmpb.RequestUnitItem, 0) requests = append(requests, &rmpb.RequestUnitItem{ Type: rmpb.RequestUnitType_RRU, - Value: 10000, + Value: 100, }) req := &rmpb.TokenBucketRequest{ ResourceGroupName: group.Name, @@ -137,7 +300,7 @@ func (suite *resourceManagerClientTestSuite) TestAcquireTokenBucket() { re.NoError(err) for _, resp := range aresp { re.Len(resp.GrantedRUTokens, 1) - re.Equal(resp.GrantedRUTokens[0].GrantedTokens.Tokens, float64(10000.)) + re.Equal(resp.GrantedRUTokens[0].GrantedTokens.Tokens, float64(100.)) } gresp, err := cli.GetResourceGroup(suite.ctx, groups[0].GetName()) re.NoError(err) From b4f1e575d34cdd4c4169d0ea82eadf1e7b607580 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 02:12:26 +0800 Subject: [PATCH 22/32] fit loan Signed-off-by: Cabinfever_B --- .../resource_manager/server/token_buckets_test.go | 10 +++++----- pkg/mcs/resource_manager/server/token_bukets.go | 12 +++++++++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pkg/mcs/resource_manager/server/token_buckets_test.go b/pkg/mcs/resource_manager/server/token_buckets_test.go index a7ecbe81d77..e064a6504fe 100644 --- a/pkg/mcs/resource_manager/server/token_buckets_test.go +++ b/pkg/mcs/resource_manager/server/token_buckets_test.go @@ -66,13 +66,13 @@ func TestGroupTokenBucketRequest(t *testing.T) { gtb := NewGroupTokenBucket(tbSetting) time1 := time.Now() - tb, trickle := gtb.request(time1, 100000, uint64(time.Second)*10/uint64(time.Millisecond)) - re.LessOrEqual(math.Abs(tb.Tokens-100000), 1e-7) + tb, trickle := gtb.request(time1, 190000, uint64(time.Second)*10/uint64(time.Millisecond)) + re.LessOrEqual(math.Abs(tb.Tokens-190000), 1e-7) re.Equal(trickle, int64(0)) // need to lend token - tb, trickle = gtb.request(time1, 101000, uint64(time.Second)*10/uint64(time.Millisecond)) - re.LessOrEqual(math.Abs(tb.Tokens-101000), 1e-7) - re.Equal(trickle, int64(time.Second)*10/int64(time.Millisecond)) + tb, trickle = gtb.request(time1, 11000, uint64(time.Second)*10/uint64(time.Millisecond)) + re.LessOrEqual(math.Abs(tb.Tokens-11000), 1e-7) + re.Equal(trickle, int64(time.Second)*11000./4000./int64(time.Millisecond)) tb, trickle = gtb.request(time1, 35000, uint64(time.Second)*10/uint64(time.Millisecond)) re.LessOrEqual(math.Abs(tb.Tokens-35000), 1e-7) re.Equal(trickle, int64(time.Second)*10/int64(time.Millisecond)) diff --git a/pkg/mcs/resource_manager/server/token_bukets.go b/pkg/mcs/resource_manager/server/token_bukets.go index 92edc5d347e..1dad22e2e25 100644 --- a/pkg/mcs/resource_manager/server/token_bukets.go +++ b/pkg/mcs/resource_manager/server/token_bukets.go @@ -15,6 +15,7 @@ package server import ( + "math" "time" "github.com/gogo/protobuf/proto" @@ -117,10 +118,12 @@ func (t *GroupTokenBucket) request(now time.Time, neededTokens float64, targetPe // Firstly allocate the remaining tokens var grantedTokens float64 + hasRemaining := false if t.Tokens > 0 { grantedTokens = t.Tokens neededTokens -= grantedTokens t.Tokens = 0 + hasRemaining = true } var targetPeriodTime = time.Duration(targetPeriodMs) * time.Millisecond @@ -155,6 +158,7 @@ func (t *GroupTokenBucket) request(now time.Time, neededTokens float64, targetPe if roundReserveTokens > neededTokens { t.Tokens -= neededTokens grantedTokens += neededTokens + trickleTime += grantedTokens / fillRate neededTokens = 0 } else { roundReserveTime := roundReserveTokens / fillRate @@ -177,5 +181,11 @@ func (t *GroupTokenBucket) request(now time.Time, neededTokens float64, targetPe grantedTokens = defaultReserveRatio * float64(t.Settings.FillRate) * targetPeriodTime.Seconds() } res.Tokens = grantedTokens - return &res, targetPeriodTime.Milliseconds() + var trickleDuration time.Duration + if hasRemaining { + trickleDuration = time.Duration(math.Min(trickleTime, targetPeriodTime.Seconds()) * float64(time.Second)) + } else { + trickleDuration = targetPeriodTime + } + return &res, trickleDuration.Milliseconds() } From a690b8e7d07f4944b599759590e585e5588d1b65 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 02:27:08 +0800 Subject: [PATCH 23/32] add retry Signed-off-by: Cabinfever_B add retry Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 11 ++++++++--- pkg/mcs/resource_manager/client/limiter.go | 9 +++------ pkg/mcs/resource_manager/client/limiter_test.go | 8 ++++---- tests/msc/resource_manager/resource_manager_test.go | 2 +- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 4f4d1e6fd54..df2df620dc8 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -25,6 +25,11 @@ import ( "go.uber.org/zap" ) +const ( + defaultMaxWaitDuration = time.Second + maxRetry = 3 +) + var defaultWhiteList = map[string]struct{}{ "default": {}, "": {}, @@ -709,13 +714,13 @@ func (gc *groupCostController) OnRequestWait( now := time.Now() // retry retryLoop: - for i := 0; i < 3; i++ { + for i := 0; i < maxRetry; i++ { switch gc.mode { case rmpb.GroupMode_RawMode: res := make([]*Reservation, 0, len(requestResourceList)) for typ, counter := range gc.run.resourceTokens { if v := getResourceValueFromConsumption(delta, typ); v > 0 { - res = append(res, counter.limiter.Reserve(ctx, now, v)) + res = append(res, counter.limiter.Reserve(ctx, defaultMaxWaitDuration, now, v)) } } if err = WaitReservations(ctx, now, res); err == nil { @@ -725,7 +730,7 @@ retryLoop: res := make([]*Reservation, 0, len(requestUnitList)) for typ, counter := range gc.run.requestUnitTokens { if v := getRUValueFromConsumption(delta, typ); v > 0 { - res = append(res, counter.limiter.Reserve(ctx, now, v)) + res = append(res, counter.limiter.Reserve(ctx, defaultMaxWaitDuration, now, v)) } } if err = WaitReservations(ctx, now, res); err == nil { diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index 259ae8591b2..6f3aaa0261d 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -174,7 +174,7 @@ func (r *Reservation) CancelAt(now time.Time) { // Act() // // Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. -func (lim *Limiter) Reserve(ctx context.Context, now time.Time, n float64) *Reservation { +func (lim *Limiter) Reserve(ctx context.Context, waitDuration time.Duration, now time.Time, n float64) *Reservation { // Check if ctx is already cancelled select { case <-ctx.Done(): @@ -184,7 +184,7 @@ func (lim *Limiter) Reserve(ctx context.Context, now time.Time, n float64) *Rese default: } // Determine wait limit - waitLimit := InfDuration + waitLimit := waitDuration if deadline, ok := ctx.Deadline(); ok { waitLimit = deadline.Sub(now) } @@ -373,7 +373,7 @@ func WaitReservations(ctx context.Context, now time.Time, reservations []*Reserv for _, res := range reservations { if !res.ok { cancel() - return fmt.Errorf("[resource group controller] limiter has no enough token") + return fmt.Errorf("[resource group controller] limiter has no enough token or needs wait too long") } delay := res.DelayFrom(now) if delay > longestDelayDuration { @@ -386,9 +386,6 @@ func WaitReservations(ctx context.Context, now time.Time, reservations []*Reserv if longestDelayDuration > 500*time.Millisecond { log.Warn("[resource group controllor] limiter needs wait ", zap.Time("now", now), zap.Duration("delay", longestDelayDuration)) } - if longestDelayDuration > 1*time.Second { - return fmt.Errorf("[resource group controllor] limiter needs wait too long") - } t := time.NewTimer(longestDelayDuration) defer t.Stop() diff --git a/pkg/mcs/resource_manager/client/limiter_test.go b/pkg/mcs/resource_manager/client/limiter_test.go index 0a0ae48d7c6..9a50b0db918 100644 --- a/pkg/mcs/resource_manager/client/limiter_test.go +++ b/pkg/mcs/resource_manager/client/limiter_test.go @@ -140,8 +140,8 @@ func TestCancel(t *testing.T) { r1.CancelAt(t1) checkTokens(re, lim1, t1, 11) - r1 = lim1.Reserve(ctx, t1, 5) - r2 := lim2.Reserve(ctx1, t1, 5) + r1 = lim1.Reserve(ctx, InfDuration, t1, 5) + r2 := lim2.Reserve(ctx1, InfDuration, t1, 5) checkTokens(re, lim1, t2, 7) checkTokens(re, lim2, t2, 2) err := WaitReservations(ctx, t2, []*Reservation{r1, r2}) @@ -151,8 +151,8 @@ func TestCancel(t *testing.T) { cancel1() ctx2, cancel2 := context.WithCancel(ctx) - r1 = lim1.Reserve(ctx, t3, 5) - r2 = lim2.Reserve(ctx2, t3, 5) + r1 = lim1.Reserve(ctx, InfDuration, t3, 5) + r2 = lim2.Reserve(ctx2, InfDuration, t3, 5) checkTokens(re, lim1, t3, 8) checkTokens(re, lim2, t3, -2) var wg sync.WaitGroup diff --git a/tests/msc/resource_manager/resource_manager_test.go b/tests/msc/resource_manager/resource_manager_test.go index 8b138e0c80b..ee77c5c1ba4 100644 --- a/tests/msc/resource_manager/resource_manager_test.go +++ b/tests/msc/resource_manager/resource_manager_test.go @@ -118,7 +118,7 @@ func (suite *resourceManagerClientTestSuite) TearDownSuite() { suite.cluster.Destroy() } -const buffDuration = time.Millisecond * 100 +const buffDuration = time.Millisecond * 200 type testRequestInfo struct { isWrite bool From 0dd78ff2aa79b1e3a4979b762cf76428879821f4 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 02:43:01 +0800 Subject: [PATCH 24/32] add test Signed-off-by: Cabinfever_B --- tests/msc/resource_manager/resource_manager_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/msc/resource_manager/resource_manager_test.go b/tests/msc/resource_manager/resource_manager_test.go index ee77c5c1ba4..90c9fe25862 100644 --- a/tests/msc/resource_manager/resource_manager_test.go +++ b/tests/msc/resource_manager/resource_manager_test.go @@ -267,8 +267,8 @@ func (suite *resourceManagerClientTestSuite) TestAcquireTokenBucket() { re := suite.Require() cli := suite.client - groups := make([]*rmpb.ResourceGroup, len(suite.initGroups)) - copy(groups, suite.initGroups) + groups := make([]*rmpb.ResourceGroup, 0) + groups = append(groups, suite.initGroups...) for _, group := range groups { resp, err := cli.AddResourceGroup(suite.ctx, group) re.NoError(err) From 606d6bab5a5a437dddbe21a924469f1178981605 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 15:02:12 +0800 Subject: [PATCH 25/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 32 ++++++++++++++----- pkg/mcs/resource_manager/client/model.go | 2 +- .../resource_manager/server/token_bukets.go | 2 ++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index df2df620dc8..a348c37bccd 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -59,6 +59,9 @@ type resourceGroupsController struct { groupsController sync.Map config *Config + loopCtx context.Context + loopCancel func() + calculators []ResourceCalculator // tokenResponseChan receives token bucket response from server. @@ -113,10 +116,19 @@ func (c *resourceGroupsController) Start(ctx context.Context) error { log.Error("update ResourceGroup failed", zap.Error(err)) } c.initRunState() + c.loopCtx, c.loopCancel = context.WithCancel(ctx) go c.mainLoop(ctx) return nil } +func (c *resourceGroupsController) Stop() error { + if c.loopCancel == nil { + return errors.Errorf("resourceGroupsController does not start.") + } + c.loopCancel() + return nil +} + func (c *resourceGroupsController) putResourceGroup(ctx context.Context, name string) (*groupCostController, error) { group, err := c.provider.GetResourceGroup(ctx, name) if err != nil { @@ -124,6 +136,7 @@ func (c *resourceGroupsController) putResourceGroup(ctx context.Context, name st } log.Info("create resource group cost controller", zap.String("name", group.GetName())) gc := newGroupCostController(group, c.config, c.lowTokenNotifyChan) + // A future case: If user change mode from RU to RAW mode. How to re-init? gc.initRunState() c.groupsController.Store(group.GetName(), gc) return gc, nil @@ -462,7 +475,7 @@ func (gc *groupCostController) updateRunState(ctx context.Context) { } case rmpb.GroupMode_RawMode: for typ, counter := range gc.run.resourceTokens { - if v := getResourceValueFromConsumption(deltaConsumption, typ); v > 0 { + if v := getRawResourceValueFromConsumption(deltaConsumption, typ); v > 0 { counter.limiter.RemoveTokens(newTime, v) } } @@ -474,7 +487,7 @@ func (gc *groupCostController) updateRunState(ctx context.Context) { func (gc *groupCostController) updateAvgRequestResourcePerSec() { switch gc.mode { case rmpb.GroupMode_RawMode: - gc.updateAvgResourcePerSec() + gc.updateAvgRaWResourcePerSec() case rmpb.GroupMode_RUMode: gc.updateAvgRUPerSec() } @@ -507,12 +520,12 @@ func (gc *groupCostController) handleTokenBucketTrickEvent(ctx context.Context) } } -func (gc *groupCostController) updateAvgResourcePerSec() { +func (gc *groupCostController) updateAvgRaWResourcePerSec() { for typ, counter := range gc.run.resourceTokens { - if !gc.calcAvg(counter, getResourceValueFromConsumption(gc.run.consumption, typ)) { + if !gc.calcAvg(counter, getRawResourceValueFromConsumption(gc.run.consumption, typ)) { continue } - log.Debug("[resource group controllor] update avg ru per sec", zap.String("name", gc.Name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) + log.Debug("[resource group controllor] update avg raw resource per sec", zap.String("name", gc.Name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avgRUPerSec", counter.avgRUPerSec)) } } @@ -547,7 +560,7 @@ func (gc *groupCostController) shouldReportConsumption() bool { } case rmpb.GroupMode_RawMode: for typ := range requestResourceList { - if getResourceValueFromConsumption(gc.run.consumption, typ)-getResourceValueFromConsumption(gc.run.lastRequestConsumption, typ) >= consumptionsReportingThreshold { + if getRawResourceValueFromConsumption(gc.run.consumption, typ)-getRawResourceValueFromConsumption(gc.run.lastRequestConsumption, typ) >= consumptionsReportingThreshold { return true } } @@ -615,12 +628,15 @@ func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket } var cfg tokenBucketReconfigureArgs + // when trickleTimeMs equals zero, server has enough tokens and does not need to + // limit client consume token. So all token is granted to client right now. if trickleTimeMs == 0 { cfg.NewTokens = granted cfg.NewRate = float64(bucket.GetSettings().FillRate) cfg.NotifyThreshold = notifyThreshold counter.lastDeadline = time.Time{} } else { + // Otherwise the granted token is delivered to the client by fillrate. cfg.NewTokens = 0 trickleDuration := time.Duration(trickleTimeMs) * time.Millisecond deadline := gc.run.now.Add(trickleDuration) @@ -719,7 +735,7 @@ retryLoop: case rmpb.GroupMode_RawMode: res := make([]*Reservation, 0, len(requestResourceList)) for typ, counter := range gc.run.resourceTokens { - if v := getResourceValueFromConsumption(delta, typ); v > 0 { + if v := getRawResourceValueFromConsumption(delta, typ); v > 0 { res = append(res, counter.limiter.Reserve(ctx, defaultMaxWaitDuration, now, v)) } } @@ -757,7 +773,7 @@ func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, switch gc.mode { case rmpb.GroupMode_RawMode: for typ, counter := range gc.run.resourceTokens { - if v := getResourceValueFromConsumption(delta, typ); v > 0 { + if v := getRawResourceValueFromConsumption(delta, typ); v > 0 { counter.limiter.RemoveTokens(time.Now(), v) } } diff --git a/pkg/mcs/resource_manager/client/model.go b/pkg/mcs/resource_manager/client/model.go index f906dd0270c..918cb31ca6c 100644 --- a/pkg/mcs/resource_manager/client/model.go +++ b/pkg/mcs/resource_manager/client/model.go @@ -131,7 +131,7 @@ func getRUValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RequestUnitTyp return 0 } -func getResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RawResourceType) float64 { +func getRawResourceValueFromConsumption(custom *rmpb.Consumption, typ rmpb.RawResourceType) float64 { switch typ { case 0: return custom.TotalCpuTimeMs diff --git a/pkg/mcs/resource_manager/server/token_bukets.go b/pkg/mcs/resource_manager/server/token_bukets.go index 1dad22e2e25..9ca81ed5dae 100644 --- a/pkg/mcs/resource_manager/server/token_bukets.go +++ b/pkg/mcs/resource_manager/server/token_bukets.go @@ -182,6 +182,8 @@ func (t *GroupTokenBucket) request(now time.Time, neededTokens float64, targetPe } res.Tokens = grantedTokens var trickleDuration time.Duration + // can't directly treat targetPeriodTime as trickleTime when there is a token remaining. + // If treat, client consumption will be slowed down (actually cloud be increased). if hasRemaining { trickleDuration = time.Duration(math.Min(trickleTime, targetPeriodTime.Seconds()) * float64(time.Second)) } else { From 166909f8af10a90bfd30ba128e77c0bac74e8a48 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 15:50:02 +0800 Subject: [PATCH 26/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 44 +++++++++++----------- pkg/mcs/resource_manager/client/limiter.go | 14 +++++-- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index a348c37bccd..87d1a328a08 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -51,9 +51,9 @@ type ResourceGroupProvider interface { AcquireTokenBuckets(ctx context.Context, request *rmpb.TokenBucketsRequest) ([]*rmpb.TokenBucketResponse, error) } -var _ ResourceGroupKVInterceptor = (*resourceGroupsController)(nil) +var _ ResourceGroupKVInterceptor = (*ResourceGroupsController)(nil) -type resourceGroupsController struct { +type ResourceGroupsController struct { clientUniqueID uint64 provider ResourceGroupProvider groupsController sync.Map @@ -92,15 +92,15 @@ type resourceGroupsController struct { } } -// NewResourceGroupController returns a new resourceGroupsController which impls ResourceGroupKVInterceptor -func NewResourceGroupController(clientUniqueID uint64, provider ResourceGroupProvider, requestUnitConfig *RequestUnitConfig) (*resourceGroupsController, error) { +// NewResourceGroupController returns a new ResourceGroupsController which impls ResourceGroupKVInterceptor +func NewResourceGroupController(clientUniqueID uint64, provider ResourceGroupProvider, requestUnitConfig *RequestUnitConfig) (*ResourceGroupsController, error) { var config *Config if requestUnitConfig != nil { config = generateConfig(requestUnitConfig) } else { config = DefaultConfig() } - return &resourceGroupsController{ + return &ResourceGroupsController{ clientUniqueID: clientUniqueID, provider: provider, config: config, @@ -111,7 +111,7 @@ func NewResourceGroupController(clientUniqueID uint64, provider ResourceGroupPro } // Start starts resourceGroupController service -func (c *resourceGroupsController) Start(ctx context.Context) error { +func (c *ResourceGroupsController) Start(ctx context.Context) error { if err := c.updateAllResourceGroups(ctx); err != nil { log.Error("update ResourceGroup failed", zap.Error(err)) } @@ -121,7 +121,7 @@ func (c *resourceGroupsController) Start(ctx context.Context) error { return nil } -func (c *resourceGroupsController) Stop() error { +func (c *ResourceGroupsController) Stop() error { if c.loopCancel == nil { return errors.Errorf("resourceGroupsController does not start.") } @@ -129,7 +129,7 @@ func (c *resourceGroupsController) Stop() error { return nil } -func (c *resourceGroupsController) putResourceGroup(ctx context.Context, name string) (*groupCostController, error) { +func (c *ResourceGroupsController) putResourceGroup(ctx context.Context, name string) (*groupCostController, error) { group, err := c.provider.GetResourceGroup(ctx, name) if err != nil { return nil, err @@ -142,7 +142,7 @@ func (c *resourceGroupsController) putResourceGroup(ctx context.Context, name st return gc, nil } -func (c *resourceGroupsController) updateAllResourceGroups(ctx context.Context) error { +func (c *ResourceGroupsController) updateAllResourceGroups(ctx context.Context) error { groups, err := c.provider.ListResourceGroups(ctx) if err != nil { return err @@ -164,7 +164,7 @@ func (c *resourceGroupsController) updateAllResourceGroups(ctx context.Context) return nil } -func (c *resourceGroupsController) initRunState() { +func (c *ResourceGroupsController) initRunState() { now := time.Now() c.run.now = now c.run.lastRequestTime = now @@ -176,7 +176,7 @@ func (c *resourceGroupsController) initRunState() { }) } -func (c *resourceGroupsController) updateRunState(ctx context.Context) { +func (c *ResourceGroupsController) updateRunState(ctx context.Context) { c.run.now = time.Now() c.groupsController.Range(func(name, value any) bool { gc := value.(*groupCostController) @@ -185,7 +185,7 @@ func (c *resourceGroupsController) updateRunState(ctx context.Context) { }) } -func (c *resourceGroupsController) shouldReportConsumption() bool { +func (c *ResourceGroupsController) shouldReportConsumption() bool { if c.run.requestInProgress { return false } @@ -205,7 +205,7 @@ func (c *resourceGroupsController) shouldReportConsumption() bool { return false } -func (c *resourceGroupsController) updateAvgRequestResourcePerSec() { +func (c *ResourceGroupsController) updateAvgRequestResourcePerSec() { c.groupsController.Range(func(name, value any) bool { gc := value.(*groupCostController) gc.updateAvgRequestResourcePerSec() @@ -213,7 +213,7 @@ func (c *resourceGroupsController) updateAvgRequestResourcePerSec() { }) } -func (c *resourceGroupsController) handleTokenBucketResponse(resp []*rmpb.TokenBucketResponse) { +func (c *ResourceGroupsController) handleTokenBucketResponse(resp []*rmpb.TokenBucketResponse) { for _, res := range resp { name := res.GetResourceGroupName() v, ok := c.groupsController.Load(name) @@ -225,7 +225,7 @@ func (c *resourceGroupsController) handleTokenBucketResponse(resp []*rmpb.TokenB } } -func (c *resourceGroupsController) collectTokenBucketRequests(ctx context.Context, source string, low bool) { +func (c *ResourceGroupsController) collectTokenBucketRequests(ctx context.Context, source string, low bool) { requests := make([]*rmpb.TokenBucketRequest, 0) c.groupsController.Range(func(name, value any) bool { gc := value.(*groupCostController) @@ -240,7 +240,7 @@ func (c *resourceGroupsController) collectTokenBucketRequests(ctx context.Contex } } -func (c *resourceGroupsController) sendTokenBucketRequests(ctx context.Context, requests []*rmpb.TokenBucketRequest, source string) { +func (c *ResourceGroupsController) sendTokenBucketRequests(ctx context.Context, requests []*rmpb.TokenBucketRequest, source string) { now := time.Now() c.run.lastRequestTime = now c.run.requestInProgress = true @@ -249,7 +249,7 @@ func (c *resourceGroupsController) sendTokenBucketRequests(ctx context.Context, TargetRequestPeriodMs: uint64(c.config.targetPeriod / time.Millisecond), } go func() { - log.Info("[resource group controllor] send token bucket request", zap.Time("now", now), zap.Any("req", req.Requests), zap.String("source", source)) + log.Debug("[resource group controllor] send token bucket request", zap.Time("now", now), zap.Any("req", req.Requests), zap.String("source", source)) resp, err := c.provider.AcquireTokenBuckets(ctx, req) if err != nil { // Don't log any errors caused by the stopper canceling the context. @@ -258,12 +258,12 @@ func (c *resourceGroupsController) sendTokenBucketRequests(ctx context.Context, } resp = nil } - log.Info("[resource group controllor] token bucket response", zap.Time("now", time.Now()), zap.Any("resp", resp), zap.String("source", source), zap.Duration("latency", time.Since(now))) + log.Debug("[resource group controllor] token bucket response", zap.Time("now", time.Now()), zap.Any("resp", resp), zap.String("source", source), zap.Duration("latency", time.Since(now))) c.tokenResponseChan <- resp }() } -func (c *resourceGroupsController) handleTokenBucketTrickEvent(ctx context.Context) { +func (c *ResourceGroupsController) handleTokenBucketTrickEvent(ctx context.Context) { c.groupsController.Range(func(name, value any) bool { gc := value.(*groupCostController) gc.handleTokenBucketTrickEvent(ctx) @@ -271,7 +271,7 @@ func (c *resourceGroupsController) handleTokenBucketTrickEvent(ctx context.Conte }) } -func (c *resourceGroupsController) mainLoop(ctx context.Context) { +func (c *ResourceGroupsController) mainLoop(ctx context.Context) { interval := c.config.groupLoopUpdateInterval ticker := time.NewTicker(interval) defer ticker.Stop() @@ -311,7 +311,7 @@ func (c *resourceGroupsController) mainLoop(ctx context.Context) { } } -func (c *resourceGroupsController) OnRequestWait( +func (c *ResourceGroupsController) OnRequestWait( ctx context.Context, resourceGroupName string, info RequestInfo, ) (err error) { if _, ok := defaultWhiteList[resourceGroupName]; ok { @@ -330,7 +330,7 @@ func (c *resourceGroupsController) OnRequestWait( return err } -func (c *resourceGroupsController) OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { +func (c *ResourceGroupsController) OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { if _, ok := defaultWhiteList[resourceGroupName]; ok { return nil } diff --git a/pkg/mcs/resource_manager/client/limiter.go b/pkg/mcs/resource_manager/client/limiter.go index 6f3aaa0261d..c87bf8c7424 100644 --- a/pkg/mcs/resource_manager/client/limiter.go +++ b/pkg/mcs/resource_manager/client/limiter.go @@ -194,6 +194,8 @@ func (lim *Limiter) Reserve(ctx context.Context, waitDuration time.Duration, now // SetupNotificationThreshold enables the notification at the given threshold. func (lim *Limiter) SetupNotificationThreshold(now time.Time, threshold float64) { + lim.mu.Lock() + defer lim.mu.Unlock() lim.advance(now) lim.notifyThreshold = threshold } @@ -215,19 +217,25 @@ func (lim *Limiter) notify() { // maybeNotify checks if it's time to send the notification and if so, performs // the notification. func (lim *Limiter) maybeNotify() { - if lim.IsLowTokens() { + if lim.isLowTokensLocked() { lim.notify() } } -// IsLowTokens returns whether the limiter is in low tokens -func (lim *Limiter) IsLowTokens() bool { +func (lim *Limiter) isLowTokensLocked() bool { if lim.isLowProcess || (lim.notifyThreshold > 0 && lim.tokens < lim.notifyThreshold) { return true } return false } +// IsLowTokens returns whether the limiter is in low tokens +func (lim *Limiter) IsLowTokens() bool { + lim.mu.Lock() + defer lim.mu.Unlock() + return lim.isLowTokensLocked() +} + // RemoveTokens decreases the amount of tokens currently available. func (lim *Limiter) RemoveTokens(now time.Time, amount float64) { lim.mu.Lock() From 07b4d6786ba77e61de881894f89f7d807d9dcf3f Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 16:35:35 +0800 Subject: [PATCH 27/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 87d1a328a08..a30ea2048ea 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -1,4 +1,4 @@ -// Copyright 2022 TiKV Project Authors. +// Copyright 2023 TiKV Project Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,7 +37,9 @@ var defaultWhiteList = map[string]struct{}{ // ResourceGroupKVInterceptor is used as quato limit controller for resource group using kv store. type ResourceGroupKVInterceptor interface { + // OnRequestWait is used to check whether resource group has enough tokens. It maybe needs wait some time. OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) error + // OnResponse is used to consume tokens atfer receiving response OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error } @@ -53,6 +55,7 @@ type ResourceGroupProvider interface { var _ ResourceGroupKVInterceptor = (*ResourceGroupsController)(nil) +// ResourceGroupsController impls ResourceGroupKVInterceptor. type ResourceGroupsController struct { clientUniqueID uint64 provider ResourceGroupProvider @@ -110,17 +113,17 @@ func NewResourceGroupController(clientUniqueID uint64, provider ResourceGroupPro }, nil } -// Start starts resourceGroupController service -func (c *ResourceGroupsController) Start(ctx context.Context) error { +// Start starts ResourceGroupController service. +func (c *ResourceGroupsController) Start(ctx context.Context) { if err := c.updateAllResourceGroups(ctx); err != nil { log.Error("update ResourceGroup failed", zap.Error(err)) } c.initRunState() c.loopCtx, c.loopCancel = context.WithCancel(ctx) go c.mainLoop(ctx) - return nil } +// Stop stops ResourceGroupController service. func (c *ResourceGroupsController) Stop() error { if c.loopCancel == nil { return errors.Errorf("resourceGroupsController does not start.") @@ -219,6 +222,7 @@ func (c *ResourceGroupsController) handleTokenBucketResponse(resp []*rmpb.TokenB v, ok := c.groupsController.Load(name) if !ok { log.Warn("A non-existent resource group was found when handle token response.", zap.String("name", name)) + return } gc := v.(*groupCostController) gc.handleTokenBucketResponse(res) @@ -720,6 +724,7 @@ func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { return value } +// OnRequestWait is used to check whether resource group has enough tokens. It maybe needs wait some time. func (gc *groupCostController) OnRequestWait( ctx context.Context, info RequestInfo, ) (err error) { @@ -764,6 +769,7 @@ retryLoop: return nil } +// OnResponse is used to consume tokens atfer receiving response func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, resp ResponseInfo) { delta := &rmpb.Consumption{} for _, calc := range gc.calculators { From d711d4ede41ebc417366dd987e29584005b45a1c Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 16:45:48 +0800 Subject: [PATCH 28/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index a30ea2048ea..085a960ea94 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -315,6 +315,7 @@ func (c *ResourceGroupsController) mainLoop(ctx context.Context) { } } +// OnRequestWait is used to check whether resource group has enough tokens. It maybe needs wait some time. func (c *ResourceGroupsController) OnRequestWait( ctx context.Context, resourceGroupName string, info RequestInfo, ) (err error) { @@ -330,10 +331,11 @@ func (c *ResourceGroupsController) OnRequestWait( return errors.Errorf("[resource group] resourceGroupName %s is not existed.", resourceGroupName) } } - err = gc.OnRequestWait(ctx, info) + err = gc.onRequestWait(ctx, info) return err } +// OnResponse is used to consume tokens atfer receiving response func (c *ResourceGroupsController) OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { if _, ok := defaultWhiteList[resourceGroupName]; ok { return nil @@ -343,7 +345,7 @@ func (c *ResourceGroupsController) OnResponse(ctx context.Context, resourceGroup log.Warn("[resource group] resourceGroupName is not existed.", zap.String("resourceGroupName", resourceGroupName)) } gc := tmp.(*groupCostController) - gc.OnResponse(ctx, req, resp) + gc.onResponse(ctx, req, resp) return nil } @@ -724,8 +726,7 @@ func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { return value } -// OnRequestWait is used to check whether resource group has enough tokens. It maybe needs wait some time. -func (gc *groupCostController) OnRequestWait( +func (gc *groupCostController) onRequestWait( ctx context.Context, info RequestInfo, ) (err error) { delta := &rmpb.Consumption{} @@ -769,8 +770,7 @@ retryLoop: return nil } -// OnResponse is used to consume tokens atfer receiving response -func (gc *groupCostController) OnResponse(ctx context.Context, req RequestInfo, resp ResponseInfo) { +func (gc *groupCostController) onResponse(ctx context.Context, req RequestInfo, resp ResponseInfo) { delta := &rmpb.Consumption{} for _, calc := range gc.calculators { calc.AfterKVRequest(delta, req, resp) From e83f208cc53c31cba67b2b4996a56666b61e4072 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 16:59:44 +0800 Subject: [PATCH 29/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 085a960ea94..0c088f4dfe4 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -150,16 +150,16 @@ func (c *ResourceGroupsController) updateAllResourceGroups(ctx context.Context) if err != nil { return err } - lastedGroups := make(map[string]struct{}) + latestGroups := make(map[string]struct{}) for _, group := range groups { log.Info("create resource group cost controller", zap.String("name", group.GetName())) gc := newGroupCostController(group, c.config, c.lowTokenNotifyChan) c.groupsController.Store(group.GetName(), gc) - lastedGroups[group.GetName()] = struct{}{} + latestGroups[group.GetName()] = struct{}{} } c.groupsController.Range(func(key, value any) bool { resourceGroupName := key.(string) - if _, ok := lastedGroups[resourceGroupName]; !ok { + if _, ok := latestGroups[resourceGroupName]; !ok { c.groupsController.Delete(key) } return true From c1b42ee99e8472e826cad04feeac6cb3805cfccb Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 17:19:54 +0800 Subject: [PATCH 30/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 0c088f4dfe4..9a7a61ca208 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -336,7 +336,7 @@ func (c *ResourceGroupsController) OnRequestWait( } // OnResponse is used to consume tokens atfer receiving response -func (c *ResourceGroupsController) OnResponse(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { +func (c *ResourceGroupsController) OnResponse(_ context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { if _, ok := defaultWhiteList[resourceGroupName]; ok { return nil } @@ -345,7 +345,7 @@ func (c *ResourceGroupsController) OnResponse(ctx context.Context, resourceGroup log.Warn("[resource group] resourceGroupName is not existed.", zap.String("resourceGroupName", resourceGroupName)) } gc := tmp.(*groupCostController) - gc.onResponse(ctx, req, resp) + gc.onResponse(req, resp) return nil } @@ -770,7 +770,7 @@ retryLoop: return nil } -func (gc *groupCostController) onResponse(ctx context.Context, req RequestInfo, resp ResponseInfo) { +func (gc *groupCostController) onResponse(req RequestInfo, resp ResponseInfo) { delta := &rmpb.Consumption{} for _, calc := range gc.calculators { calc.AfterKVRequest(delta, req, resp) From d1d56d119e628f49ce3a28744392dc2d54869515 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 17:34:28 +0800 Subject: [PATCH 31/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 9a7a61ca208..15d41282100 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -222,7 +222,7 @@ func (c *ResourceGroupsController) handleTokenBucketResponse(resp []*rmpb.TokenB v, ok := c.groupsController.Load(name) if !ok { log.Warn("A non-existent resource group was found when handle token response.", zap.String("name", name)) - return + continue } gc := v.(*groupCostController) gc.handleTokenBucketResponse(res) From 37abfa868d5edba2e5713efe9b354135145c8cd1 Mon Sep 17 00:00:00 2001 From: Cabinfever_B Date: Thu, 19 Jan 2023 18:49:19 +0800 Subject: [PATCH 32/32] address comment Signed-off-by: Cabinfever_B --- pkg/mcs/resource_manager/client/client.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pkg/mcs/resource_manager/client/client.go b/pkg/mcs/resource_manager/client/client.go index 15d41282100..f5c60b7d653 100644 --- a/pkg/mcs/resource_manager/client/client.go +++ b/pkg/mcs/resource_manager/client/client.go @@ -30,11 +30,6 @@ const ( maxRetry = 3 ) -var defaultWhiteList = map[string]struct{}{ - "default": {}, - "": {}, -} - // ResourceGroupKVInterceptor is used as quato limit controller for resource group using kv store. type ResourceGroupKVInterceptor interface { // OnRequestWait is used to check whether resource group has enough tokens. It maybe needs wait some time. @@ -319,9 +314,6 @@ func (c *ResourceGroupsController) mainLoop(ctx context.Context) { func (c *ResourceGroupsController) OnRequestWait( ctx context.Context, resourceGroupName string, info RequestInfo, ) (err error) { - if _, ok := defaultWhiteList[resourceGroupName]; ok { - return nil - } var gc *groupCostController if tmp, ok := c.groupsController.Load(resourceGroupName); ok { gc = tmp.(*groupCostController) @@ -337,9 +329,6 @@ func (c *ResourceGroupsController) OnRequestWait( // OnResponse is used to consume tokens atfer receiving response func (c *ResourceGroupsController) OnResponse(_ context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) error { - if _, ok := defaultWhiteList[resourceGroupName]; ok { - return nil - } tmp, ok := c.groupsController.Load(resourceGroupName) if !ok { log.Warn("[resource group] resourceGroupName is not existed.", zap.String("resourceGroupName", resourceGroupName))