Skip to content

Commit

Permalink
Add consecutive count check for Auto-scaling (#1703)
Browse files Browse the repository at this point in the history
* Add consecutive count check for Auto-scaling
  • Loading branch information
Yisaer authored Feb 17, 2020
1 parent b4cda09 commit 56daf5e
Show file tree
Hide file tree
Showing 12 changed files with 839 additions and 28 deletions.
36 changes: 36 additions & 0 deletions manifests/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7060,6 +7060,10 @@ spec:
tidb:
description: TidbAutoScalerSpec describes the spec for tidb auto-scaling
properties:
MetricsTimeDuration:
description: MetricsTimeDuration describe the Time duration to be
queried in the Prometheus
type: string
maxReplicas:
description: maxReplicas is the upper limit for the number of replicas
to which the autoscaler can scale out. It cannot be less than
Expand Down Expand Up @@ -7090,18 +7094,36 @@ spec:
will be set to 500
format: int32
type: integer
scaleInThreshold:
description: ScaleInThreshold describe the consecutive threshold
for the auto-scaling, if the consecutive counts of the scale-in
result in auto-scaling reach this number, the auto-scaling would
be performed. If not set, the default value is 5.
format: int32
type: integer
scaleOutIntervalSeconds:
description: ScaleOutIntervalSeconds represents the duration seconds
between each auto-scaling-out If not set, the default ScaleOutIntervalSeconds
will be set to 300
format: int32
type: integer
scaleOutThreshold:
description: ScaleOutThreshold describe the consecutive threshold
for the auto-scaling, if the consecutive counts of the scale-out
result in auto-scaling reach this number, the auto-scaling would
be performed. If not set, the default value is 3.
format: int32
type: integer
required:
- maxReplicas
type: object
tikv:
description: TikvAutoScalerSpec describes the spec for tikv auto-scaling
properties:
MetricsTimeDuration:
description: MetricsTimeDuration describe the Time duration to be
queried in the Prometheus
type: string
maxReplicas:
description: maxReplicas is the upper limit for the number of replicas
to which the autoscaler can scale out. It cannot be less than
Expand Down Expand Up @@ -7132,12 +7154,26 @@ spec:
will be set to 500
format: int32
type: integer
scaleInThreshold:
description: ScaleInThreshold describe the consecutive threshold
for the auto-scaling, if the consecutive counts of the scale-in
result in auto-scaling reach this number, the auto-scaling would
be performed. If not set, the default value is 5.
format: int32
type: integer
scaleOutIntervalSeconds:
description: ScaleOutIntervalSeconds represents the duration seconds
between each auto-scaling-out If not set, the default ScaleOutIntervalSeconds
will be set to 300
format: int32
type: integer
scaleOutThreshold:
description: ScaleOutThreshold describe the consecutive threshold
for the auto-scaling, if the consecutive counts of the scale-out
result in auto-scaling reach this number, the auto-scaling would
be performed. If not set, the default value is 3.
format: int32
type: integer
required:
- maxReplicas
type: object
Expand Down
63 changes: 63 additions & 0 deletions pkg/apis/pingcap/v1alpha1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,24 @@ type BasicAutoScalerSpec struct {
// If not set, the default metric will be set to 80% average CPU utilization.
// +optional
Metrics []v2beta2.MetricSpec `json:"metrics,omitempty"`

// MetricsTimeDuration describe the Time duration to be queried in the Prometheus
// +optional
MetricsTimeDuration *string `json:"MetricsTimeDuration,omitempty"`

// ScaleOutThreshold describe the consecutive threshold for the auto-scaling,
// if the consecutive counts of the scale-out result in auto-scaling reach this number,
// the auto-scaling would be performed.
// If not set, the default value is 3.
// +optional
ScaleOutThreshold *int32 `json:"scaleOutThreshold,omitempty"`

// ScaleInThreshold describe the consecutive threshold for the auto-scaling,
// if the consecutive counts of the scale-in result in auto-scaling reach this number,
// the auto-scaling would be performed.
// If not set, the default value is 5.
// +optional
ScaleInThreshold *int32 `json:"scaleInThreshold,omitempty"`
}

// TODO: sync status
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion pkg/autoscaler/autoscaler/autoscaler_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
informers "github.com/pingcap/tidb-operator/pkg/client/informers/externalversions"
v1alpha1listers "github.com/pingcap/tidb-operator/pkg/client/listers/pingcap/v1alpha1"
promClient "github.com/prometheus/client_golang/api"
"k8s.io/apimachinery/pkg/api/errors"
kubeinformers "k8s.io/client-go/informers"
appslisters "k8s.io/client-go/listers/apps/v1"
"k8s.io/client-go/tools/record"
Expand Down Expand Up @@ -57,8 +58,14 @@ func (am *autoScalerManager) Sync(tac *v1alpha1.TidbClusterAutoScaler) error {
tcNamespace := tac.Spec.Cluster.Namespace
tc, err := am.tcLister.TidbClusters(tcNamespace).Get(tcName)
if err != nil {
if errors.IsNotFound(err) {
// Target TidbCluster Ref is deleted, empty the auto-scaling status
emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType)
emptyAutoScalingCountAnn(tac, v1alpha1.TiKVMemberType)
}
return err
}
checkAndUpdateTacAnn(tac)
oldTCSpec := tc.Spec.DeepCopy()
if err := am.syncAutoScaling(tc, tac); err != nil {
return err
Expand Down Expand Up @@ -94,7 +101,7 @@ func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, o
}

//TODO: sync tac status
func (am *autoScalerManager) syncAutoScalingStatus(tc *v1alpha1.TidbCluster, oldTCSpec *v1alpha1.TidbClusterSpec,
func (am *autoScalerManager) syncAutoScalingStatus(tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbClusterSpec,
tac *v1alpha1.TidbClusterAutoScaler) error {
return nil
}
43 changes: 32 additions & 11 deletions pkg/autoscaler/autoscaler/tidb_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,59 @@ import (

func (am *autoScalerManager) syncTiDB(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, client promClient.Client) error {
if tac.Spec.TiDB == nil {
emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType)
return nil
}
sts, err := am.stsLister.StatefulSets(tc.Namespace).Get(operatorUtils.GetStatefulSetName(tc, v1alpha1.TiDBMemberType))
if err != nil {
return err
}
if !checkAutoScalingPrerequisites(tc, sts, v1alpha1.TiDBMemberType) {
emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType)
return nil
}
targetReplicas := tc.Spec.TiDB.Replicas

// TODO: sync tidb.metrics from prometheus
// rate(process_cpu_seconds_total{cluster="tidb",job="tidb"}[threshold Minute])
//for _, _ = range tac.Spec.TiDB.Metrics {
// // revive:disable:empty-block
//}
currentReplicas := tc.Spec.TiDB.Replicas
targetReplicas := calculateRecommendedReplicas(tac, v1alpha1.TiDBMemberType, client)
targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiDBMemberType)
if targetReplicas == tc.Spec.TiDB.Replicas {
emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType)
return nil
}
return syncTiDBAfterCalculated(tc, tac, currentReplicas, targetReplicas)
}

// syncTiDBAfterCalculated would check the Consecutive count to avoid jitter, and it would also check the interval
// duration between each auto-scaling. If either of them is not meet, the auto-scaling would be rejected.
// If the auto-scaling is permitted, the timestamp would be recorded and the Consecutive count would be zeroed.
func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32) error {
if err := updateConsecutiveCount(tac, v1alpha1.TiDBMemberType, currentReplicas, recommendedReplicas); err != nil {
return err
}

ableToScale, err := checkConsecutiveCount(tac, v1alpha1.TiDBMemberType, currentReplicas, recommendedReplicas)
if err != nil {
return err
}
if !ableToScale {
return nil
}
intervalSeconds := tac.Spec.TiDB.ScaleInIntervalSeconds
if targetReplicas > tc.Spec.TiDB.Replicas {
if recommendedReplicas > currentReplicas {
intervalSeconds = tac.Spec.TiDB.ScaleOutIntervalSeconds
}
ableToScale, err := checkStsAutoScalingInterval(tc, *intervalSeconds, v1alpha1.TiDBMemberType)
ableToScale, err = checkStsAutoScalingInterval(tc, *intervalSeconds, v1alpha1.TiDBMemberType)
if err != nil {
return err
}
if !ableToScale {
return nil
}
tc.Spec.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = time.Now().String()
tc.Spec.TiDB.Replicas = targetReplicas
updateTcTiDBAnnIfScale(tac)
tc.Spec.TiDB.Replicas = recommendedReplicas
return nil
}

func updateTcTiDBAnnIfScale(tac *v1alpha1.TidbClusterAutoScaler) {
tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = time.Now().String()
emptyAutoScalingCountAnn(tac, v1alpha1.TiDBMemberType)
}
Loading

0 comments on commit 56daf5e

Please sign in to comment.