pingcap · Yisaer · Feb 17, 2020 · Feb 14, 2020 · Feb 14, 2020 · Feb 14, 2020
diff --git a/manifests/crd.yaml b/manifests/crd.yaml
@@ -7078,6 +7078,12 @@ spec:
                     metric will be set to 80% average CPU utilization.
                   items: {}
                   type: array
+                metricsTimeWindowSeconds:
+                  description: MetricsTimeWindowSeconds describe the time window seconds
+                    for the metrics to be queried in the Prometheus. If not set, the
+                    default value would be 180.
+                  format: int32
+                  type: integer
                 minReplicas:
                   description: minReplicas is the lower limit for the number of replicas
                     to which the autoscaler can scale down.  It defaults to 1 pod.
@@ -7090,12 +7096,28 @@ spec:
                     will be set to 500
                   format: int32
                   type: integer
+                scaleInThreshold:
+                  description: ScaleInThreshold describe the consecutive threshold
+                    for the auto-scaling, if the consecutive counts of the scale-int
+                    result in auto-scaling reach this number, the auto-scaling would
+                    be performed. If not set, the default value is 1 which means it
+                    would perform with no threshold.
+                  format: int32
+                  type: integer
                 scaleOutIntervalSeconds:
                   description: ScaleOutIntervalSeconds represents the duration seconds
                     between each auto-scaling-out If not set, the default ScaleOutIntervalSeconds
                     will be set to 300
                   format: int32
                   type: integer
+                scaleOutThreshold:
+                  description: ScaleOutThreshold describe the consecutive threshold
+                    for the auto-scaling, if the consecutive counts of the scale-out
+                    result in auto-scaling reach this number, the auto-scaling would
+                    be performed. If not set, the default value is 1 which means it
+                    would perform with no threshold.
+                  format: int32
+                  type: integer
               required:
               - maxReplicas
               type: object
@@ -7120,6 +7142,12 @@ spec:
                     metric will be set to 80% average CPU utilization.
                   items: {}
                   type: array
+                metricsTimeWindowSeconds:
+                  description: MetricsTimeWindowSeconds describe the time window seconds
+                    for the metrics to be queried in the Prometheus. If not set, the
+                    default value would be 180.
+                  format: int32
+                  type: integer
                 minReplicas:
                   description: minReplicas is the lower limit for the number of replicas
                     to which the autoscaler can scale down.  It defaults to 1 pod.
@@ -7132,12 +7160,28 @@ spec:
                     will be set to 500
                   format: int32
                   type: integer
+                scaleInThreshold:
+                  description: ScaleInThreshold describe the consecutive threshold
+                    for the auto-scaling, if the consecutive counts of the scale-int
+                    result in auto-scaling reach this number, the auto-scaling would
+                    be performed. If not set, the default value is 1 which means it
+                    would perform with no threshold.
+                  format: int32
+                  type: integer
                 scaleOutIntervalSeconds:
                   description: ScaleOutIntervalSeconds represents the duration seconds
                     between each auto-scaling-out If not set, the default ScaleOutIntervalSeconds
                     will be set to 300
                   format: int32
                   type: integer
+                scaleOutThreshold:
+                  description: ScaleOutThreshold describe the consecutive threshold
+                    for the auto-scaling, if the consecutive counts of the scale-out
+                    result in auto-scaling reach this number, the auto-scaling would
+                    be performed. If not set, the default value is 1 which means it
+                    would perform with no threshold.
+                  format: int32
+                  type: integer
               required:
               - maxReplicas
               type: object

diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go
diff --git a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go
@@ -112,6 +112,26 @@ type BasicAutoScalerSpec struct {
 	// If not set, the default metric will be set to 80% average CPU utilization.
 	// +optional
 	Metrics []v2beta2.MetricSpec `json:"metrics,omitempty"`
+
+	// MetricsTimeWindowSeconds describe the time window seconds for the metrics
+	// to be queried in the Prometheus.
+	// If not set, the default value would be 180.
+	// +optional
+	MetricsTimeWindowSeconds *int32 `json:"metricsTimeWindowSeconds,omitempty"`
+
+	// ScaleOutThreshold describe the consecutive threshold for the auto-scaling,
+	// if the consecutive counts of the scale-out result in auto-scaling reach this number,
+	// the auto-scaling would be performed.
+	// If not set, the default value is 3.
+	// +optional
+	ScaleOutThreshold *int32 `json:"scaleOutThreshold,omitempty"`
+
+	// ScaleInThreshold describe the consecutive threshold for the auto-scaling,
+	// if the consecutive counts of the scale-int result in auto-scaling reach this number,
+	// the auto-scaling would be performed.
+	// If not set, the default value is 3.
+	// +optional
+	ScaleInThreshold *int32 `json:"scaleInThreshold,omitempty"`
 }
 
 // TODO: sync status

diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go
diff --git a/pkg/autoscaler/autoscaler/autoscaler_manager.go b/pkg/autoscaler/autoscaler/autoscaler_manager.go
@@ -94,7 +94,7 @@ func (am *autoScalerManager) syncTidbClusterReplicas(tc *v1alpha1.TidbCluster, o
 }
 
 //TODO: sync tac status
-func (am *autoScalerManager) syncAutoScalingStatus(tc *v1alpha1.TidbCluster, oldTCSpec *v1alpha1.TidbClusterSpec,
+func (am *autoScalerManager) syncAutoScalingStatus(tc *v1alpha1.TidbCluster, oldTc *v1alpha1.TidbClusterSpec,
 	tac *v1alpha1.TidbClusterAutoScaler) error {
 	return nil
 }
diff --git a/pkg/autoscaler/autoscaler/tidb_autoscaler.go b/pkg/autoscaler/autoscaler/tidb_autoscaler.go
@@ -24,38 +24,59 @@ import (
 
 func (am *autoScalerManager) syncTiDB(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, client promClient.Client) error {
 	if tac.Spec.TiDB == nil {
+		emptyConsecutiveCount(tc, v1alpha1.TiDBMemberType)
 		return nil
 	}
 	sts, err := am.stsLister.StatefulSets(tc.Namespace).Get(operatorUtils.GetStatefulSetName(tc, v1alpha1.TiDBMemberType))
 	if err != nil {
 		return err
 	}
 	if !checkAutoScalingPrerequisites(tc, sts, v1alpha1.TiDBMemberType) {
+		emptyConsecutiveCount(tc, v1alpha1.TiDBMemberType)
 		return nil
 	}
-	targetReplicas := tc.Spec.TiDB.Replicas
-
-	// TODO: sync tidb.metrics from prometheus
-	// rate(process_cpu_seconds_total{cluster="tidb",job="tidb"}[threshold Minute])
-	//for _, _ = range tac.Spec.TiDB.Metrics {
-	//	// revive:disable:empty-block
-	//}
+	currentReplicas := tc.Spec.TiDB.Replicas
+	targetReplicas := calculateRecommendedReplicas(tac, v1alpha1.TiDBMemberType, client)
 	targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiDBMemberType)
 	if targetReplicas == tc.Spec.TiDB.Replicas {
+		emptyConsecutiveCount(tc, v1alpha1.TiDBMemberType)
+		return nil
+	}
+	return syncTiDBAfterCalculated(tc, tac, currentReplicas, targetReplicas)
+}
+
+// syncTiDBAfterCalculated would check the Consecutive count to avoid jitter, and it would also check the interval
+// duration between each auto-scaling. If either of them is not meet, the auto-scaling would be rejected.
+// If the auto-scaling is permitted, the timestamp would be recorded and the Consecutive count would be zeroed.
+func syncTiDBAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32) error {
+	if err := updateConsecutiveCount(tc, tac, v1alpha1.TiDBMemberType, currentReplicas, recommendedReplicas); err != nil {
+		return err
+	}
+
+	ableToScale, err := checkConsecutiveCount(tc, tac, v1alpha1.TiDBMemberType, currentReplicas, recommendedReplicas)
+	if err != nil {
+		return err
+	}
+	if !ableToScale {
 		return nil
 	}
 	intervalSeconds := tac.Spec.TiDB.ScaleInIntervalSeconds
-	if targetReplicas > tc.Spec.TiDB.Replicas {
+	if recommendedReplicas > currentReplicas {
 		intervalSeconds = tac.Spec.TiDB.ScaleOutIntervalSeconds
 	}
-	ableToScale, err := checkStsAutoScalingInterval(tc, *intervalSeconds, v1alpha1.TiDBMemberType)
+	ableToScale, err = checkStsAutoScalingInterval(tc, *intervalSeconds, v1alpha1.TiDBMemberType)
 	if err != nil {
 		return err
 	}
 	if !ableToScale {
 		return nil
 	}
-	tc.Spec.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = time.Now().String()
-	tc.Spec.TiDB.Replicas = targetReplicas
+	updateTcTiDBAnnIfScale(tc)
+	tc.Spec.TiDB.Replicas = recommendedReplicas
 	return nil
 }
+
+func updateTcTiDBAnnIfScale(tc *v1alpha1.TidbCluster) {
+	tc.Annotations[label.AnnTiDBLastAutoScalingTimestamp] = time.Now().String()
+	emptyConsecutiveCount(tc, v1alpha1.TiDBMemberType)
+}