diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md index f786118f92..c4470f0e1c 100644 --- a/docs/api-references/docs.md +++ b/docs/api-references/docs.md @@ -1760,10 +1760,6 @@ TidbMonitorStatus

AutoScalerPhase

-(Appears on: -BasicAutoScalerStatus) -

-

BRConfig

@@ -2619,19 +2615,6 @@ to fetch the recommended replicas for TiKV/TiDB

-phase
- - -AutoScalerPhase - - - - -

Phase describes cluster auto scaling phase

- - - - metrics
@@ -16167,20 +16150,6 @@ BasicAutoScalerSpec

- - -readyToScaleThresholdSeconds
- -int32 - - - -(Optional) -

ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase -should last for before auto scaling. -If not set, the default ReadyToScaleThresholdSeconds will be set to 30.

- -

TikvAutoScalerStatus

diff --git a/go.mod b/go.mod index 5f8d70beb6..3ac43dbca4 100644 --- a/go.mod +++ b/go.mod @@ -39,7 +39,6 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect github.com/grpc-ecosystem/grpc-gateway v1.13.0 // indirect github.com/imdario/mergo v0.3.7 // indirect - github.com/jonboulle/clockwork v0.1.0 github.com/juju/errors v0.0.0-20180806074554-22422dad46e1 github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073 // indirect diff --git a/manifests/crd.yaml b/manifests/crd.yaml index e1d73d942f..0cd82ff046 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -13955,9 +13955,6 @@ spec: minReplicas: format: int32 type: integer - readyToScaleThresholdSeconds: - format: int32 - type: integer scaleInIntervalSeconds: format: int32 type: integer @@ -13995,8 +13992,6 @@ spec: - thresholdValue type: object type: array - phase: - type: string recommendedReplicas: format: int32 type: integer @@ -14026,8 +14021,6 @@ spec: - thresholdValue type: object type: array - phase: - type: string recommendedReplicas: format: int32 type: integer diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index 7b84ce1fef..022e3cea42 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -933,13 +933,6 @@ func schema_pkg_apis_pingcap_v1alpha1_BasicAutoScalerStatus(ref common.Reference Description: "BasicAutoScalerStatus describe the basic auto-scaling status", Type: []string{"object"}, Properties: map[string]spec.Schema{ - "phase": { - SchemaProps: spec.SchemaProps{ - Description: "Phase describes cluster auto scaling phase", - Type: []string{"string"}, - Format: "", - }, - }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", @@ -8135,13 +8128,6 @@ func schema_pkg_apis_pingcap_v1alpha1_TidbAutoScalerStatus(ref common.ReferenceC Description: "TidbAutoScalerStatus describe the auto-scaling status of tidb", Type: []string{"object"}, Properties: map[string]spec.Schema{ - "phase": { - SchemaProps: spec.SchemaProps{ - Description: "Phase describes cluster auto scaling phase", - Type: []string{"string"}, - Format: "", - }, - }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", @@ -9264,13 +9250,6 @@ func schema_pkg_apis_pingcap_v1alpha1_TikvAutoScalerSpec(ref common.ReferenceCal Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.ExternalEndpoint"), }, }, - "readyToScaleThresholdSeconds": { - SchemaProps: spec.SchemaProps{ - Description: "ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase should last for before auto scaling. If not set, the default ReadyToScaleThresholdSeconds will be set to 30.", - Type: []string{"integer"}, - Format: "int32", - }, - }, }, Required: []string{"maxReplicas"}, }, @@ -9287,13 +9266,6 @@ func schema_pkg_apis_pingcap_v1alpha1_TikvAutoScalerStatus(ref common.ReferenceC Description: "TikvAutoScalerStatus describe the auto-scaling status of tikv", Type: []string{"object"}, Properties: map[string]spec.Schema{ - "phase": { - SchemaProps: spec.SchemaProps{ - Description: "Phase describes cluster auto scaling phase", - Type: []string{"string"}, - Format: "", - }, - }, "metrics": { SchemaProps: spec.SchemaProps{ Description: "MetricsStatusList describes the metrics status in the last auto-scaling reconciliation", diff --git a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go index b4a726f587..825967de4a 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go +++ b/pkg/apis/pingcap/v1alpha1/tidbclusterautoscaler_types.go @@ -84,12 +84,6 @@ type TidbClusterAutoScalerSpec struct { // TikvAutoScalerSpec describes the spec for tikv auto-scaling type TikvAutoScalerSpec struct { BasicAutoScalerSpec `json:",inline"` - - // ReadyToScaleThresholdSeconds represents duration that the ReadyToScale phase - // should last for before auto scaling. - // If not set, the default ReadyToScaleThresholdSeconds will be set to 30. - // +optional - ReadyToScaleThresholdSeconds *int32 `json:"readyToScaleThresholdSeconds,omitempty"` } // +k8s:openapi-gen=true @@ -183,8 +177,6 @@ type TikvAutoScalerStatus struct { // +k8s:openapi-gen=true // BasicAutoScalerStatus describe the basic auto-scaling status type BasicAutoScalerStatus struct { - // Phase describes cluster auto scaling phase - Phase AutoScalerPhase `json:"phase,omitempty"` // MetricsStatusList describes the metrics status in the last auto-scaling reconciliation // +optional MetricsStatusList []MetricsStatus `json:"metrics,omitempty"` diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go index 4b4308f182..6b28232d82 100644 --- a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go @@ -7074,11 +7074,6 @@ func (in *TidbMonitorStatus) DeepCopy() *TidbMonitorStatus { func (in *TikvAutoScalerSpec) DeepCopyInto(out *TikvAutoScalerSpec) { *out = *in in.BasicAutoScalerSpec.DeepCopyInto(&out.BasicAutoScalerSpec) - if in.ReadyToScaleThresholdSeconds != nil { - in, out := &in.ReadyToScaleThresholdSeconds, &out.ReadyToScaleThresholdSeconds - *out = new(int32) - **out = **in - } return } diff --git a/pkg/autoscaler/autoscaler/calculate/util.go b/pkg/autoscaler/autoscaler/calculate/util.go index 14ded46ec9..1523ba74e3 100644 --- a/pkg/autoscaler/autoscaler/calculate/util.go +++ b/pkg/autoscaler/autoscaler/calculate/util.go @@ -32,13 +32,15 @@ const ( // currently, we only choose one metrics to be computed. // If there exists several metrics, we tend to choose ResourceMetricSourceType metric -func FilterMetrics(metrics []autoscalingv2beta2.MetricSpec) autoscalingv2beta2.MetricSpec { +func FilterMetrics(metrics []autoscalingv2beta2.MetricSpec, name corev1.ResourceName) []autoscalingv2beta2.MetricSpec { + var list []autoscalingv2beta2.MetricSpec for _, m := range metrics { - if m.Type == autoscalingv2beta2.ResourceMetricSourceType && m.Resource != nil { - return m + if m.Type == autoscalingv2beta2.ResourceMetricSourceType && m.Resource != nil && m.Resource.Name == name { + list = append(list, m) + break } } - return metrics[0] + return list } // genMetricType return the supported MetricType in Operator by kubernetes auto-scaling MetricType diff --git a/pkg/autoscaler/autoscaler/tidb_autoscaler.go b/pkg/autoscaler/autoscaler/tidb_autoscaler.go index 9e42a90ac7..6a40d4ae18 100644 --- a/pkg/autoscaler/autoscaler/tidb_autoscaler.go +++ b/pkg/autoscaler/autoscaler/tidb_autoscaler.go @@ -24,6 +24,7 @@ import ( operatorUtils "github.com/pingcap/tidb-operator/pkg/util" promClient "github.com/prometheus/client_golang/api" appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" "k8s.io/klog" ) @@ -97,29 +98,22 @@ func calculateTidbMetrics(tac *v1alpha1.TidbClusterAutoScaler, sts *appsv1.State if err != nil { return -1, err } - metric := calculate.FilterMetrics(tac.Spec.TiDB.Metrics) - mType, err := calculate.GenMetricType(tac, metric) - if err != nil { - return -1, err - } duration, err := time.ParseDuration(*tac.Spec.TiDB.MetricsTimeDuration) if err != nil { return -1, err } - sq := &calculate.SingleQuery{ - Endpoint: ep, - Timestamp: time.Now().Unix(), - Instances: instances, - Metric: metric, - Quary: fmt.Sprintf(calculate.TidbSumCpuMetricsPattern, tac.Spec.Cluster.Name, *tac.Spec.TiDB.MetricsTimeDuration), - } - - switch mType { - case calculate.MetricTypeCPU: + metrics := calculate.FilterMetrics(tac.Spec.TiDB.Metrics, corev1.ResourceCPU) + if len(metrics) > 0 { + sq := &calculate.SingleQuery{ + Endpoint: ep, + Timestamp: time.Now().Unix(), + Instances: instances, + Metric: metrics[0], + Quary: fmt.Sprintf(calculate.TidbSumCpuMetricsPattern, tac.Spec.Cluster.Name, *tac.Spec.TiDB.MetricsTimeDuration), + } return calculate.CalculateRecomendedReplicasByCpuCosts(tac, sq, sts, client, v1alpha1.TiDBMemberType, duration) - default: - return -1, fmt.Errorf(calculate.InvalidTacMetricConfigureMsg, tac.Namespace, tac.Name) } + return -1, fmt.Errorf(calculate.InvalidTacMetricConfigureMsg, tac.Namespace, tac.Name) } func filterTidbInstances(tc *v1alpha1.TidbCluster) []string { diff --git a/pkg/autoscaler/autoscaler/tikv_autoscaler.go b/pkg/autoscaler/autoscaler/tikv_autoscaler.go index da6d30624b..9c48094eea 100644 --- a/pkg/autoscaler/autoscaler/tikv_autoscaler.go +++ b/pkg/autoscaler/autoscaler/tikv_autoscaler.go @@ -17,17 +17,14 @@ import ( "fmt" "time" - "github.com/jonboulle/clockwork" "github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/autoscaler/autoscaler/calculate" - "github.com/pingcap/tidb-operator/pkg/autoscaler/autoscaler/query" - "github.com/pingcap/tidb-operator/pkg/controller" "github.com/pingcap/tidb-operator/pkg/label" operatorUtils "github.com/pingcap/tidb-operator/pkg/util" promClient "github.com/prometheus/client_golang/api" appsv1 "k8s.io/api/apps/v1" - "k8s.io/klog" + corev1 "k8s.io/api/core/v1" ) func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler) error { @@ -45,58 +42,7 @@ func (am *autoScalerManager) syncTiKV(tc *v1alpha1.TidbCluster, tac *v1alpha1.Ti return nil } instances := filterTiKVInstances(tc) - var targetReplicas int32 - if tac.Spec.TiKV.ExternalEndpoint == nil { - targetReplicas, err = calculateTikvMetrics(tac, sts, instances) - if err != nil { - return err - } - } else { - targetReplicas, err = query.ExternalService(tc, v1alpha1.TiKVMemberType, tac.Spec.TiKV.ExternalEndpoint, am.kubecli) - if err != nil { - klog.Errorf("tac[%s/%s] 's query to the external endpoint got error: %v", tac.Namespace, tac.Name, err) - return err - } - } - targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiKVMemberType) - if targetReplicas == tc.Spec.TiKV.Replicas { - return nil - } - currentReplicas := int32(len(instances)) - return syncTiKVAfterCalculated(tc, tac, currentReplicas, targetReplicas, sts) -} - -// syncTiKVAfterCalculated would check the Consecutive count to avoid jitter, and it would also check the interval -// duration between each auto-scaling. If either of them is not meet, the auto-scaling would be rejected. -// If the auto-scaling is permitted, the timestamp would be recorded and the Consecutive count would be zeroed. -// The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to -// add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas -func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error { - intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds - if recommendedReplicas > currentReplicas { - if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase { - tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleOutAutoScalerPhase - // phase could change from Normal to ReadyToScaleOut, ReadyToScaleIn to ReadyToScaleOut, - // reset timestamp in both cases. - tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) - } - } else { - if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase { - tac.Status.TiKV.Phase = v1alpha1.ReadyToScaleInAutoScalerPhase - // phase could change from Normal to ReadyToScaleIn, ReadyToScaleOut to ReadyToScaleIn, - // reset timestamp in both cases. - tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) - } - } - - ableToScale, err := checkTiKVStsAutoScaling(tac, *tac.Spec.TiKV.ReadyToScaleThresholdSeconds, *intervalSeconds) - if err != nil { - return err - } - if !ableToScale { - return nil - } - return updateTcTiKVIfScale(tc, tac, currentReplicas, recommendedReplicas, sts) + return calculateTiKVMetrics(tac, tc, sts, instances) } //TODO: fetch tikv instances info from pdapi in future @@ -110,91 +56,110 @@ func filterTiKVInstances(tc *v1alpha1.TidbCluster) []string { return instances } -// we record the auto-scaling out slot for tikv, in order to add special hot labels when they are created -func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error { - if recommendedReplicas > currentReplicas { - newlyScaleOutOrdinalSets := helper.GetPodOrdinals(recommendedReplicas, sts).Difference(helper.GetPodOrdinals(currentReplicas, sts)) - if newlyScaleOutOrdinalSets.Len() > 0 { - if tc.Annotations == nil { - tc.Annotations = map[string]string{} - } - existed := operatorUtils.GetAutoScalingOutSlots(tc, v1alpha1.TiKVMemberType) - v, err := operatorUtils.Encode(newlyScaleOutOrdinalSets.Union(existed).List()) - if err != nil { - return err - } - tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v - } - } - tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase - tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) - tc.Spec.TiKV.Replicas = recommendedReplicas - tac.Status.TiKV.RecommendedReplicas = recommendedReplicas - return nil -} - -func calculateTikvMetrics(tac *v1alpha1.TidbClusterAutoScaler, sts *appsv1.StatefulSet, instances []string) (int32, error) { +func calculateTiKVMetrics(tac *v1alpha1.TidbClusterAutoScaler, tc *v1alpha1.TidbCluster, sts *appsv1.StatefulSet, instances []string) error { ep, err := genMetricsEndpoint(tac) if err != nil { - return -1, err + return err } client, err := promClient.NewClient(promClient.Config{Address: ep}) if err != nil { - return -1, err + return err } - - metric := calculate.FilterMetrics(tac.Spec.TiKV.Metrics) - mType, err := calculate.GenMetricType(tac, metric) + duration, err := time.ParseDuration(*tac.Spec.TiKV.MetricsTimeDuration) if err != nil { - return -1, err + return err } - duration, err := time.ParseDuration(*tac.Spec.TiKV.MetricsTimeDuration) + // check CPU + metrics := calculate.FilterMetrics(tac.Spec.TiKV.Metrics, corev1.ResourceCPU) + if len(metrics) > 0 { + sq := &calculate.SingleQuery{ + Endpoint: ep, + Timestamp: time.Now().Unix(), + Instances: instances, + Metric: metrics[0], + Quary: fmt.Sprintf(calculate.TikvSumCpuMetricsPattern, tac.Spec.Cluster.Name, *tac.Spec.TiKV.MetricsTimeDuration), + } + return calculateTiKVCPUMetrics(tac, tc, sts, sq, client, duration) + } + return nil +} + +func calculateTiKVCPUMetrics(tac *v1alpha1.TidbClusterAutoScaler, tc *v1alpha1.TidbCluster, sts *appsv1.StatefulSet, sq *calculate.SingleQuery, client promClient.Client, duration time.Duration) error { + + targetReplicas, err := calculate.CalculateRecomendedReplicasByCpuCosts(tac, sq, sts, client, v1alpha1.TiKVMemberType, duration) if err != nil { - return -1, err + return err } - sq := &calculate.SingleQuery{ - Endpoint: ep, - Timestamp: time.Now().Unix(), - Instances: instances, - Metric: metric, - Quary: fmt.Sprintf(calculate.TikvSumCpuMetricsPattern, tac.Spec.Cluster.Name, *tac.Spec.TiKV.MetricsTimeDuration), + targetReplicas = limitTargetReplicas(targetReplicas, tac, v1alpha1.TiKVMemberType) + if targetReplicas == tc.Spec.TiKV.Replicas { + return nil } - - switch mType { - case calculate.MetricTypeCPU: - return calculate.CalculateRecomendedReplicasByCpuCosts(tac, sq, sts, client, v1alpha1.TiKVMemberType, duration) - default: - return -1, fmt.Errorf(calculate.InvalidTacMetricConfigureMsg, tac.Namespace, tac.Name) + currentReplicas := int32(len(sq.Instances)) + err = syncTiKVAfterCalculated(tc, tac, currentReplicas, targetReplicas) + if err != nil { + return err } + return addAnnotationMarkIfScaleOutDueToCPUMetrics(tc, currentReplicas, targetReplicas, sts) } -func checkTiKVStsAutoScaling(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds, intervalSeconds int32) (bool, error) { - realClock := clockwork.NewRealClock() +func checkTiKVAutoScaling(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32) (bool, error) { if tac.Annotations == nil { tac.Annotations = map[string]string{} } - // 3*controller.ResyncDuration is maximum time allowed before reset phase status - ableToScale, err := checkLastSyncingTimestamp(tac, 3*controller.ResyncDuration, realClock) + ableToScale, err := checkStsAutoScalingInterval(tac, intervalSeconds, v1alpha1.TiKVMemberType) if err != nil { return false, err } if !ableToScale { return false, nil } - ableToScale, err = checkStsReadyAutoScalingTimestamp(tac, thresholdSeconds, realClock) - if err != nil { - return false, err - } - if !ableToScale { - return false, nil + return true, nil +} + +// syncTiKVAfterCalculated would check the Consecutive count to avoid jitter, and it would also check the interval +// duration between each auto-scaling. If either of them is not meet, the auto-scaling would be rejected. +// If the auto-scaling is permitted, the timestamp would be recorded and the Consecutive count would be zeroed. +// The currentReplicas of TiKV calculated in auto-scaling is the count of the StateUp TiKV instance, so we need to +// add the number of other state tikv instance replicas when we update the TidbCluster.Spec.TiKV.Replicas +func syncTiKVAfterCalculated(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, currentReplicas, recommendedReplicas int32) error { + intervalSeconds := tac.Spec.TiKV.ScaleInIntervalSeconds + if recommendedReplicas > currentReplicas { + intervalSeconds = tac.Spec.TiKV.ScaleOutIntervalSeconds } - ableToScale, err = checkStsAutoScalingInterval(tac, intervalSeconds, v1alpha1.TiKVMemberType) + ableToScale, err := checkTiKVAutoScaling(tac, *intervalSeconds) if err != nil { - return false, err + return err } if !ableToScale { - return false, nil + return nil } - return true, nil + return updateTcTiKVIfScale(tc, tac, recommendedReplicas) +} + +// we record the auto-scaling out slot for tikv, in order to add special hot labels when they are created +func updateTcTiKVIfScale(tc *v1alpha1.TidbCluster, tac *v1alpha1.TidbClusterAutoScaler, recommendedReplicas int32) error { + tac.Annotations[label.AnnTiKVLastAutoScalingTimestamp] = fmt.Sprintf("%d", time.Now().Unix()) + tc.Spec.TiKV.Replicas = recommendedReplicas + tac.Status.TiKV.RecommendedReplicas = recommendedReplicas + return nil +} + +// Add mark for the scale out tikv in annotations in cpu metric case +func addAnnotationMarkIfScaleOutDueToCPUMetrics(tc *v1alpha1.TidbCluster, currentReplicas, recommendedReplicas int32, sts *appsv1.StatefulSet) error { + if recommendedReplicas > currentReplicas { + newlyScaleOutOrdinalSets := helper.GetPodOrdinals(recommendedReplicas, sts).Difference(helper.GetPodOrdinals(currentReplicas, sts)) + if newlyScaleOutOrdinalSets.Len() > 0 { + if tc.Annotations == nil { + tc.Annotations = map[string]string{} + } + existed := operatorUtils.GetAutoScalingOutSlots(tc, v1alpha1.TiKVMemberType) + v, err := operatorUtils.Encode(newlyScaleOutOrdinalSets.Union(existed).List()) + if err != nil { + return err + } + tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v + } + } + return nil } diff --git a/pkg/autoscaler/autoscaler/util.go b/pkg/autoscaler/autoscaler/util.go index 1ebc003551..1f753b4adf 100644 --- a/pkg/autoscaler/autoscaler/util.go +++ b/pkg/autoscaler/autoscaler/util.go @@ -18,7 +18,6 @@ import ( "strconv" "time" - "github.com/jonboulle/clockwork" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/label" operatorUtils "github.com/pingcap/tidb-operator/pkg/util" @@ -50,49 +49,6 @@ func checkStsAutoScalingPrerequisites(set *appsv1.StatefulSet) bool { return true } -// checkLastSyncingTimestamp reset TiKV phase if last auto scaling timestamp is longer than thresholdSec -func checkLastSyncingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSec time.Duration, clock clockwork.Clock) (bool, error) { - if tac.Annotations == nil { - tac.Annotations = map[string]string{} - } - - lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnLastSyncingTimestamp] - if !existed { - // NOTE: because record autoscaler sync timestamp happens after check auto scale, - // label will not exist during first sync, return allow auto scale in this case. - return true, nil - } - t, err := strconv.ParseInt(lastAutoScalingTimestamp, 10, 64) - if err != nil { - return false, err - } - // if there's no resync within thresholdSec, reset TiKV phase to Normal - if clock.Now().After(time.Unix(t, 0).Add(thresholdSec)) { - tac.Status.TiKV.Phase = v1alpha1.NormalAutoScalerPhase - return false, nil - } - return true, nil -} - -// checkStsReadyAutoScalingTimestamp would check whether there is enough time window after ready to scale -func checkStsReadyAutoScalingTimestamp(tac *v1alpha1.TidbClusterAutoScaler, thresholdSeconds int32, clock clockwork.Clock) (bool, error) { - readyAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] - - if !existed { - tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", clock.Now().Unix()) - return false, nil - } - t, err := strconv.ParseInt(readyAutoScalingTimestamp, 10, 32) - if err != nil { - return false, err - } - readyAutoScalingSec := int32(clock.Now().Sub(time.Unix(t, 0)).Seconds()) - if thresholdSeconds > readyAutoScalingSec { - return false, nil - } - return true, nil -} - // checkStsAutoScalingInterval would check whether there is enough interval duration between every two auto-scaling func checkStsAutoScalingInterval(tac *v1alpha1.TidbClusterAutoScaler, intervalSeconds int32, memberType v1alpha1.MemberType) (bool, error) { lastAutoScalingTimestamp, existed := tac.Annotations[label.AnnTiDBLastAutoScalingTimestamp] @@ -183,9 +139,6 @@ func defaultTAC(tac *v1alpha1.TidbClusterAutoScaler) { tac.Spec.TiKV.MetricsTimeDuration = pointer.StringPtr("3m") } } - if tac.Spec.TiKV.ReadyToScaleThresholdSeconds == nil { - tac.Spec.TiKV.ReadyToScaleThresholdSeconds = pointer.Int32Ptr(30) - } } if tac.Spec.TiDB != nil { diff --git a/pkg/autoscaler/autoscaler/util_test.go b/pkg/autoscaler/autoscaler/util_test.go index 66ca73c61e..a0a9b8b3a6 100644 --- a/pkg/autoscaler/autoscaler/util_test.go +++ b/pkg/autoscaler/autoscaler/util_test.go @@ -18,7 +18,6 @@ import ( "testing" "time" - "github.com/jonboulle/clockwork" . "github.com/onsi/gomega" "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1" "github.com/pingcap/tidb-operator/pkg/label" @@ -27,102 +26,6 @@ import ( "k8s.io/utils/pointer" ) -func Test_checkLastSyncingTimestamp(t *testing.T) { - g := NewGomegaWithT(t) - c := clockwork.NewFakeClockAt(time.Now()) - tests := []struct { - name string - withTimestamp bool - lastSyncSecAgo int - expectPhase v1alpha1.AutoScalerPhase - expectedPermitScaling bool - }{ - { - name: "tikv, no timestamp", - withTimestamp: false, - lastSyncSecAgo: 0, - expectPhase: v1alpha1.ReadyToScaleOutAutoScalerPhase, - expectedPermitScaling: true, - }, - { - name: "tikv, last sync 10s ago", - withTimestamp: true, - lastSyncSecAgo: 10, - expectPhase: v1alpha1.ReadyToScaleOutAutoScalerPhase, - expectedPermitScaling: true, - }, - { - name: "tikv, last sync 120s ago", - withTimestamp: true, - lastSyncSecAgo: 120, - expectPhase: v1alpha1.NormalAutoScalerPhase, - expectedPermitScaling: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tac := newTidbClusterAutoScaler() - tac.Status.TiKV = &v1alpha1.TikvAutoScalerStatus{BasicAutoScalerStatus: v1alpha1.BasicAutoScalerStatus{Phase: v1alpha1.ReadyToScaleOutAutoScalerPhase}} - if tt.withTimestamp { - d := time.Duration(tt.lastSyncSecAgo) * time.Second - tac.Annotations[label.AnnLastSyncingTimestamp] = fmt.Sprintf("%d", time.Now().Add(-d).Unix()) - } else { - tac.Annotations = map[string]string{} - } - r, err := checkLastSyncingTimestamp(tac, 100*time.Second, c) - g.Expect(err).Should(BeNil()) - g.Expect(r).Should(Equal(tt.expectedPermitScaling)) - g.Expect(tac.Status.TiKV.Phase).Should(Equal(tt.expectPhase)) - }) - } -} - -func TestCheckStsReadyAutoScalingTimestamp(t *testing.T) { - g := NewGomegaWithT(t) - c := clockwork.NewFakeClockAt(time.Now()) - tests := []struct { - name string - withTimestamp bool - readyAutoScalingSec int - expectedPermitScaling bool - }{ - { - name: "tikv, no timestamp", - withTimestamp: false, - readyAutoScalingSec: 0, - expectedPermitScaling: false, - }, - { - name: "tikv, ready autoscaling 60s", - withTimestamp: true, - readyAutoScalingSec: 60, - expectedPermitScaling: false, - }, - { - name: "tikv, ready autoscaling 120s", - withTimestamp: true, - readyAutoScalingSec: 120, - expectedPermitScaling: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - thresholdSec := int32(100) - tac := newTidbClusterAutoScaler() - d := time.Duration(tt.readyAutoScalingSec) * time.Second - if tt.withTimestamp { - tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] = fmt.Sprintf("%d", time.Now().Add(-d).Unix()) - } else { - tac.Annotations = map[string]string{} - } - r, err := checkStsReadyAutoScalingTimestamp(tac, thresholdSec, c) - g.Expect(err).Should(BeNil()) - g.Expect(r).Should(Equal(tt.expectedPermitScaling)) - }) - } -} - func TestCheckStsAutoScalingInterval(t *testing.T) { g := NewGomegaWithT(t) tests := []struct { diff --git a/tests/e2e/tidbcluster/serial.go b/tests/e2e/tidbcluster/serial.go index 08eea5733d..038ddddf2b 100644 --- a/tests/e2e/tidbcluster/serial.go +++ b/tests/e2e/tidbcluster/serial.go @@ -501,7 +501,6 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { MetricsTimeDuration: &duration, ScaleInIntervalSeconds: pointer.Int32Ptr(100), }, - ReadyToScaleThresholdSeconds: pointer.Int32Ptr(40), } tac.Spec.TiKV.Metrics = []autoscalingv2beta2.MetricSpec{} tac.Spec.TiKV.Metrics = append(tac.Spec.TiKV.Metrics, defaultMetricSpec) @@ -530,34 +529,8 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { framework.ExpectNoError(err, "create pdapi error") defer cancel() var firstScaleTimestamp int64 - var readyToScaleTimestamp int64 - err = wait.Poll(10*time.Second, 5*time.Minute, func() (done bool, err error) { - tac, err = cli.PingcapV1alpha1().TidbClusterAutoScalers(ns).Get(tac.Name, metav1.GetOptions{}) - if err != nil { - return false, nil - } - if tac.Annotations == nil || len(tac.Annotations) < 1 { - framework.Logf("tac haven't marked any annotation") - return false, nil - } - t, ok := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] - if !ok { - framework.Logf("tac has no tikv.tidb.pingcap.com/ready-to-scale-timestamp annotation") - return false, nil - } - readyToScaleTimestamp, err = strconv.ParseInt(t, 10, 64) - if err != nil { - return false, err - } - if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleOutAutoScalerPhase { - framework.Logf("tac dont' have the right ReadyToScale phase, expect: %s, got %s", v1alpha1.ReadyToScaleOutAutoScalerPhase, tac.Status.TiKV.Phase) - return false, nil - } - return true, nil - }) - framework.ExpectNoError(err, "check tikv has ready-to-scale-timestamp") - framework.Logf("tikv has checked ready-to-scale-timestamp") + // check tikv scale out to 4 and annotations err = wait.Poll(10*time.Second, 10*time.Minute, func() (done bool, err error) { stac, err := cli.PingcapV1alpha1().TidbClusterAutoScalers(ns).Get(tac.Name, metav1.GetOptions{}) if err != nil { @@ -603,13 +576,6 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { if err != nil { return false, err } - // check readyToScaleTimestamp - if time.Now().Sub(time.Unix(readyToScaleTimestamp, 0)).Seconds() < 40 { - return false, fmt.Errorf("tikv doesn't meet the ReadyToScale threshold") - } - if tac.Status.TiKV.Phase != v1alpha1.NormalAutoScalerPhase { - return false, fmt.Errorf("tikv don't have right ReadyToScale phase") - } // check store label storeId := "" for k, v := range tc.Status.TiKV.Stores { @@ -653,33 +619,7 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { err = mock.SetPrometheusResponse(monitor.Name, monitor.Namespace, mp, fw) framework.ExpectNoError(err, "set tikv mock metrics error") - err = wait.Poll(10*time.Second, 5*time.Minute, func() (done bool, err error) { - tac, err = cli.PingcapV1alpha1().TidbClusterAutoScalers(ns).Get(tac.Name, metav1.GetOptions{}) - if err != nil { - return false, nil - } - if tac.Annotations == nil || len(tac.Annotations) < 1 { - framework.Logf("tac haven't marked any annotation") - return false, nil - } - t, ok := tac.Annotations[label.AnnTiKVReadyToScaleTimestamp] - if !ok { - framework.Logf("tac has no tikv.tidb.pingcap.com/ready-to-scale-timestamp annotation") - return false, nil - } - readyToScaleTimestamp, err = strconv.ParseInt(t, 10, 64) - if err != nil { - return false, err - } - if tac.Status.TiKV.Phase != v1alpha1.ReadyToScaleInAutoScalerPhase { - framework.Logf("tac dont' have the right ReadyToScale phase, expect: %s, got %s", v1alpha1.ReadyToScaleOutAutoScalerPhase, tac.Status.TiKV.Phase) - return false, nil - } - return true, nil - }) - framework.ExpectNoError(err, "check tikv has ready-to-scale-timestamp") - framework.Logf("check tikv has ready-to-scale-timestamp") - + // check tikv scale-in to 3 err = wait.Poll(5*time.Second, 10*time.Minute, func() (done bool, err error) { stac, err := cli.PingcapV1alpha1().TidbClusterAutoScalers(ns).Get(tac.Name, metav1.GetOptions{}) if err != nil { @@ -742,12 +682,6 @@ var _ = ginkgo.Describe("[tidb-operator][Serial]", func() { if secondTs-firstScaleTimestamp < 100 { return false, fmt.Errorf("tikv second scale's interval isn't meeting the interval requirement") } - if time.Now().Sub(time.Unix(readyToScaleTimestamp, 0)).Seconds() < 40 { - return false, fmt.Errorf("tikv doesn't meet the ReadyToScale threshold") - } - if tac.Status.TiKV.Phase != v1alpha1.NormalAutoScalerPhase { - return false, fmt.Errorf("tikv don't have right ReadyToScale phase") - } return true, nil }) framework.ExpectNoError(err, "check tikv auto-scale to 3 error")