From e85eeaeadd1282d743f8c7e34128556aa4923729 Mon Sep 17 00:00:00 2001 From: Will Bowers <22203232+wllbo@users.noreply.github.com> Date: Wed, 18 Oct 2023 10:44:33 -0700 Subject: [PATCH] attach errors to scale-up request and add comments --- .../clusterstate/clusterstate.go | 31 +++++-- .../clusterstate/clusterstate_test.go | 89 +++++++++++++++++++ .../config/autoscaling_options.go | 4 +- cluster-autoscaler/core/static_autoscaler.go | 5 +- cluster-autoscaler/main.go | 80 ++++++++--------- 5 files changed, 159 insertions(+), 50 deletions(-) diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go index 0201871cd7c5..c25f4147d210 100644 --- a/cluster-autoscaler/clusterstate/clusterstate.go +++ b/cluster-autoscaler/clusterstate/clusterstate.go @@ -61,6 +61,8 @@ type ScaleUpRequest struct { ExpectedAddTime time.Time // How much the node group is increased. Increase int + // ErrorClasses is a set of the classes of errors encountered during a scale-up, if any. + ErrorClasses map[cloudprovider.InstanceErrorClass]struct{} } // ScaleDownRequest contains information about the requested node deletion. @@ -82,8 +84,11 @@ type ClusterStateRegistryConfig struct { // Minimum number of nodes that must be unready for MaxTotalUnreadyPercentage to apply. // This is to ensure that in very small clusters (e.g. 2 nodes) a single node's failure doesn't disable autoscaling. OkTotalUnreadyCount int - // NodeGroupKeepBackoffOutOfResources is whether a backoff can be removed before expiration when a scale-up fails due to the cloud provider being out of resources. - NodeGroupKeepBackoffOutOfResources bool + // NodeGroupRemovePersistentErrorBackoffEarly is whether a backoff can be removed before expiration when + // a scale-up partially fails due to a likely persistent error. + // If true (default), the backoff will be removed early regardless of the error. + // If false and the backoff is due to a likely persistent error, e.g. OutOfResourcesError, it will not be removed early. + NodeGroupRemovePersistentErrorBackoffEarly bool } // IncorrectNodeGroupSize contains information about how much the current size of the node group @@ -216,6 +221,7 @@ func (csr *ClusterStateRegistry) registerOrUpdateScaleUpNoLock(nodeGroup cloudpr Increase: delta, Time: currentTime, ExpectedAddTime: currentTime.Add(maxNodeProvisionTime), + ErrorClasses: make(map[cloudprovider.InstanceErrorClass]struct{}), } csr.scaleUpRequests[nodeGroup.Id()] = scaleUpRequest return @@ -258,9 +264,16 @@ func (csr *ClusterStateRegistry) updateScaleRequests(currentTime time.Time) { // scale-out finished successfully // remove it and reset node group backoff delete(csr.scaleUpRequests, nodeGroupName) - shouldKeepBackoff := csr.config.NodeGroupKeepBackoffOutOfResources && csr.backoff.IsNodeGroupOutOfResources(scaleUpRequest.NodeGroup) - if !shouldKeepBackoff { - klog.V(4).Infof("Removing backoff for node group %v", scaleUpRequest.NodeGroup.Id()) + // If a node group is backed off during a scale-up due to instance creation errors but partially succeeds, + // the backoff could be removed early, allowing the CA to retry scaling the same node group. + // Optionally, the backoff can be retained for persistent errors given the risk of recurrence. + // The backoff will be removed early if either of the following conditions are true: + // 1. NodeGroupRemovePersistentErrorBackoffEarly is enabled (default) + // 2. There is no persistent error class attached to the scale-up request + _, persistentError := scaleUpRequest.ErrorClasses[cloudprovider.OutOfResourcesErrorClass] + shouldRemoveBackoffEarly := csr.config.NodeGroupRemovePersistentErrorBackoffEarly || !persistentError + if shouldRemoveBackoffEarly { + klog.V(4).Infof("Removing backoff early for node group %v", scaleUpRequest.NodeGroup.Id()) csr.backoff.RemoveBackoff(scaleUpRequest.NodeGroup, csr.nodeInfosForGroups[scaleUpRequest.NodeGroup.Id()]) } klog.V(4).Infof("Scale up in group %v finished successfully in %v", @@ -315,6 +328,12 @@ func (csr *ClusterStateRegistry) RegisterFailedScaleUp(nodeGroup cloudprovider.N func (csr *ClusterStateRegistry) registerFailedScaleUpNoLock(nodeGroup cloudprovider.NodeGroup, reason metrics.FailedScaleUpReason, errorClass cloudprovider.InstanceErrorClass, errorCode string, gpuResourceName, gpuType string, currentTime time.Time) { csr.scaleUpFailures[nodeGroup.Id()] = append(csr.scaleUpFailures[nodeGroup.Id()], ScaleUpFailure{NodeGroup: nodeGroup, Reason: reason, Time: currentTime}) metrics.RegisterFailedScaleUp(reason, gpuResourceName, gpuType) + // attach the error class to the scale-up request if it exists + // it will be used to determine whether to remove the backoff early when updating scale-up requests + scaleUpRequest, found := csr.scaleUpRequests[nodeGroup.Id()] + if found { + scaleUpRequest.ErrorClasses[errorClass] = struct{}{} + } csr.backoffNodeGroup(nodeGroup, errorClass, errorCode, currentTime) } @@ -1097,7 +1116,7 @@ func (csr *ClusterStateRegistry) handleInstanceCreationErrorsForNodeGroup( // If node group is scaling up and there are new node-create requests which cannot be satisfied because of // out-of-resources errors we: // - emit event - // - alter the scale-up + // - alter the scale-up and attach the error class // - increase scale-up failure metric // - backoff the node group for errorCode, instances := range currentErrorCodeToInstance { diff --git a/cluster-autoscaler/clusterstate/clusterstate_test.go b/cluster-autoscaler/clusterstate/clusterstate_test.go index 6a74f69617fd..14ccd6889f5d 100644 --- a/cluster-autoscaler/clusterstate/clusterstate_test.go +++ b/cluster-autoscaler/clusterstate/clusterstate_test.go @@ -508,6 +508,95 @@ func TestRegisterScaleDown(t *testing.T) { assert.Empty(t, clusterstate.GetScaleUpFailures()) } +func TestRemovePersistentErrorBackoffEarlyEnabled(t *testing.T) { + ng1_1 := BuildTestNode("ng1-1", 1000, 1000) + provider := testprovider.NewTestCloudProvider(nil, nil) + provider.AddNodeGroup("ng1", 1, 10, 1) + provider.AddNode("ng1", ng1_1) + assert.NotNil(t, provider) + + fakeClient := &fake.Clientset{} + fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap") + clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ + MaxTotalUnreadyPercentage: 10, + OkTotalUnreadyCount: 1, + NodeGroupRemovePersistentErrorBackoffEarly: true, + }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute})) + + now := time.Now() + + provider.GetNodeGroup("ng1").(*testprovider.TestNodeGroup).SetTargetSize(4) + clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now) + clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), 3, now) + assert.True(t, clusterstate.IsNodeGroupScalingUp("ng1")) + + // Fail two nodes with a persistent and a non-persistent error + clusterstate.registerFailedScaleUpNoLock(provider.GetNodeGroup("ng1"), metrics.CloudProviderError, cloudprovider.OutOfResourcesErrorClass, string(metrics.CloudProviderError), "", "", now) + clusterstate.registerFailedScaleUpNoLock(provider.GetNodeGroup("ng1"), metrics.CloudProviderError, cloudprovider.OtherErrorClass, string(metrics.CloudProviderError), "", "", now) + clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), -2, now) + assert.Equal(t, 2, len(clusterstate.scaleUpRequests["ng1"].ErrorClasses)) + assert.True(t, clusterstate.backoff.IsBackedOff(provider.GetNodeGroup("ng1"), nil, now)) + + // Reduce the target size to original value to complete the scale-up and trigger the backoff early removal + provider.GetNodeGroup("ng1").(*testprovider.TestNodeGroup).SetTargetSize(1) + clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now) + assert.False(t, clusterstate.IsNodeGroupScalingUp("ng1")) + assert.False(t, clusterstate.backoff.IsBackedOff(provider.GetNodeGroup("ng1"), nil, now)) +} + +func TestRemovePersistentErrorBackoffEarlyDisabled(t *testing.T) { + ng1_1 := BuildTestNode("ng1-1", 1000, 1000) + provider := testprovider.NewTestCloudProvider(nil, nil) + provider.AddNodeGroup("ng1", 1, 10, 1) + provider.AddNode("ng1", ng1_1) + assert.NotNil(t, provider) + + fakeClient := &fake.Clientset{} + fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap") + clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ + MaxTotalUnreadyPercentage: 10, + OkTotalUnreadyCount: 1, + NodeGroupRemovePersistentErrorBackoffEarly: false, + }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute})) + + now := time.Now() + + provider.GetNodeGroup("ng1").(*testprovider.TestNodeGroup).SetTargetSize(3) + clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now) + clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), 2, now) + assert.True(t, clusterstate.IsNodeGroupScalingUp("ng1")) + + // Fail one node with a persistent error + clusterstate.registerFailedScaleUpNoLock(provider.GetNodeGroup("ng1"), metrics.CloudProviderError, cloudprovider.OutOfResourcesErrorClass, string(metrics.CloudProviderError), "", "", now) + clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), -1, now) + assert.True(t, clusterstate.backoff.IsBackedOff(provider.GetNodeGroup("ng1"), nil, now)) + + // Confirm the persistent error backoff is not removed early + provider.GetNodeGroup("ng1").(*testprovider.TestNodeGroup).SetTargetSize(1) + clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now) + assert.False(t, clusterstate.IsNodeGroupScalingUp("ng1")) + assert.True(t, clusterstate.backoff.IsBackedOff(provider.GetNodeGroup("ng1"), nil, now)) + + // Remove the backoff and scale up again + clusterstate.backoff.RemoveBackoff(provider.GetNodeGroup("ng1"), nil) + provider.GetNodeGroup("ng1").(*testprovider.TestNodeGroup).SetTargetSize(3) + clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now) + clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), 2, now) + assert.False(t, clusterstate.backoff.IsBackedOff(provider.GetNodeGroup("ng1"), nil, now)) + assert.True(t, clusterstate.IsNodeGroupScalingUp("ng1")) + + // Fail one node with a non-persistent error + clusterstate.registerFailedScaleUpNoLock(provider.GetNodeGroup("ng1"), metrics.CloudProviderError, cloudprovider.OtherErrorClass, string(metrics.CloudProviderError), "", "", now) + clusterstate.RegisterOrUpdateScaleUp(provider.GetNodeGroup("ng1"), -1, now) + assert.True(t, clusterstate.backoff.IsBackedOff(provider.GetNodeGroup("ng1"), nil, now)) + + // Complete the scale-up and confirm the backoff is removed early + provider.GetNodeGroup("ng1").(*testprovider.TestNodeGroup).SetTargetSize(1) + clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now) + assert.False(t, clusterstate.IsNodeGroupScalingUp("ng1")) + assert.False(t, clusterstate.backoff.IsBackedOff(provider.GetNodeGroup("ng1"), nil, now)) +} + func TestUpcomingNodes(t *testing.T) { provider := testprovider.NewTestCloudProvider(nil, nil) now := time.Now() diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index a599c5dff9c4..88d6d6ca0cac 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -244,8 +244,8 @@ type AutoscalingOptions struct { MaxNodeGroupBackoffDuration time.Duration // NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset. NodeGroupBackoffResetTimeout time.Duration - // NodeGroupKeepBackoffOutOfResources is whether a backoff can be removed before expiration when a scale-up fails due to the cloud provider being out of resources. - NodeGroupKeepBackoffOutOfResources bool + // NodeGroupRemovePersistentErrorBackoffEarly is whether a backoff can be removed before expiration when a scale-up partially fails due to a likely persistent error. + NodeGroupRemovePersistentErrorBackoffEarly bool // MaxScaleDownParallelism is the maximum number of nodes (both empty and needing drain) that can be deleted in parallel. MaxScaleDownParallelism int // MaxDrainParallelism is the maximum number of nodes needing drain, that can be drained and deleted in parallel. diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 68b870786d02..bb9d38a23f7d 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -148,8 +148,9 @@ func NewStaticAutoscaler( drainabilityRules rules.Rules) *StaticAutoscaler { clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ - MaxTotalUnreadyPercentage: opts.MaxTotalUnreadyPercentage, - OkTotalUnreadyCount: opts.OkTotalUnreadyCount, + MaxTotalUnreadyPercentage: opts.MaxTotalUnreadyPercentage, + OkTotalUnreadyCount: opts.OkTotalUnreadyCount, + NodeGroupRemovePersistentErrorBackoffEarly: opts.NodeGroupRemovePersistentErrorBackoffEarly, } clusterStateRegistry := clusterstate.NewClusterStateRegistry(cloudProvider, clusterStateConfig, autoscalingKubeClients.LogRecorder, backoff, processors.NodeGroupConfigProcessor) processorCallbacks := newStaticAutoscalerProcessorCallbacks() diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 063250d027e7..46c82e0d33eb 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -226,24 +226,24 @@ var ( "maxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.") nodeGroupBackoffResetTimeout = flag.Duration("node-group-backoff-reset-timeout", 3*time.Hour, "nodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.") - nodeGroupKeepBackoffOutOfResources = flag.Bool("node-group-keep-backoff-out-of-resources", false, "Prevents removal of backoff before expiration when a scale-up fails due to the cloud provider being out of resources.") - maxScaleDownParallelismFlag = flag.Int("max-scale-down-parallelism", 10, "Maximum number of nodes (both empty and needing drain) that can be deleted in parallel.") - maxDrainParallelismFlag = flag.Int("max-drain-parallelism", 1, "Maximum number of nodes needing drain, that can be drained and deleted in parallel.") - recordDuplicatedEvents = flag.Bool("record-duplicated-events", false, "enable duplication of similar events within a 5 minute window.") - maxNodesPerScaleUp = flag.Int("max-nodes-per-scaleup", 1000, "Max nodes added in a single scale-up. This is intended strictly for optimizing CA algorithm latency and not a tool to rate-limit scale-up throughput.") - maxNodeGroupBinpackingDuration = flag.Duration("max-nodegroup-binpacking-duration", 10*time.Second, "Maximum time that will be spent in binpacking simulation for each NodeGroup.") - skipNodesWithSystemPods = flag.Bool("skip-nodes-with-system-pods", true, "If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods)") - skipNodesWithLocalStorage = flag.Bool("skip-nodes-with-local-storage", true, "If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath") - skipNodesWithCustomControllerPods = flag.Bool("skip-nodes-with-custom-controller-pods", true, "If true cluster autoscaler will never delete nodes with pods owned by custom controllers") - minReplicaCount = flag.Int("min-replica-count", 0, "Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down") - nodeDeleteDelayAfterTaint = flag.Duration("node-delete-delay-after-taint", 5*time.Second, "How long to wait before deleting a node after tainting it") - scaleDownSimulationTimeout = flag.Duration("scale-down-simulation-timeout", 30*time.Second, "How long should we run scale down simulation.") - parallelDrain = flag.Bool("parallel-drain", true, "Whether to allow parallel drain of nodes. This flag is deprecated and will be removed in future releases.") - maxCapacityMemoryDifferenceRatio = flag.Float64("memory-difference-ratio", config.DefaultMaxCapacityMemoryDifferenceRatio, "Maximum difference in memory capacity between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's memory capacity.") - maxFreeDifferenceRatio = flag.Float64("max-free-difference-ratio", config.DefaultMaxFreeDifferenceRatio, "Maximum difference in free resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's free resource.") - maxAllocatableDifferenceRatio = flag.Float64("max-allocatable-difference-ratio", config.DefaultMaxAllocatableDifferenceRatio, "Maximum difference in allocatable resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's allocatable resource.") - forceDaemonSets = flag.Bool("force-ds", false, "Blocks scale-up of node groups too small for all suitable Daemon Sets pods.") - dynamicNodeDeleteDelayAfterTaintEnabled = flag.Bool("dynamic-node-delete-delay-after-taint-enabled", false, "Enables dynamic adjustment of NodeDeleteDelayAfterTaint based of the latency between CA and api-server") + nodeGroupRemovePersistentErrorBackoffEarly = flag.Bool("node-group-remove-persistent-error-backoff-early", true, "Allows early removal of backoff before expiration when a scale-up partially fails due to a likely to be persistent error, e.g. cloud provider being out of resources.") + maxScaleDownParallelismFlag = flag.Int("max-scale-down-parallelism", 10, "Maximum number of nodes (both empty and needing drain) that can be deleted in parallel.") + maxDrainParallelismFlag = flag.Int("max-drain-parallelism", 1, "Maximum number of nodes needing drain, that can be drained and deleted in parallel.") + recordDuplicatedEvents = flag.Bool("record-duplicated-events", false, "enable duplication of similar events within a 5 minute window.") + maxNodesPerScaleUp = flag.Int("max-nodes-per-scaleup", 1000, "Max nodes added in a single scale-up. This is intended strictly for optimizing CA algorithm latency and not a tool to rate-limit scale-up throughput.") + maxNodeGroupBinpackingDuration = flag.Duration("max-nodegroup-binpacking-duration", 10*time.Second, "Maximum time that will be spent in binpacking simulation for each NodeGroup.") + skipNodesWithSystemPods = flag.Bool("skip-nodes-with-system-pods", true, "If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods)") + skipNodesWithLocalStorage = flag.Bool("skip-nodes-with-local-storage", true, "If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath") + skipNodesWithCustomControllerPods = flag.Bool("skip-nodes-with-custom-controller-pods", true, "If true cluster autoscaler will never delete nodes with pods owned by custom controllers") + minReplicaCount = flag.Int("min-replica-count", 0, "Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down") + nodeDeleteDelayAfterTaint = flag.Duration("node-delete-delay-after-taint", 5*time.Second, "How long to wait before deleting a node after tainting it") + scaleDownSimulationTimeout = flag.Duration("scale-down-simulation-timeout", 30*time.Second, "How long should we run scale down simulation.") + parallelDrain = flag.Bool("parallel-drain", true, "Whether to allow parallel drain of nodes. This flag is deprecated and will be removed in future releases.") + maxCapacityMemoryDifferenceRatio = flag.Float64("memory-difference-ratio", config.DefaultMaxCapacityMemoryDifferenceRatio, "Maximum difference in memory capacity between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's memory capacity.") + maxFreeDifferenceRatio = flag.Float64("max-free-difference-ratio", config.DefaultMaxFreeDifferenceRatio, "Maximum difference in free resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's free resource.") + maxAllocatableDifferenceRatio = flag.Float64("max-allocatable-difference-ratio", config.DefaultMaxAllocatableDifferenceRatio, "Maximum difference in allocatable resources between two similar node groups to be considered for balancing. Value is a ratio of the smaller node group's allocatable resource.") + forceDaemonSets = flag.Bool("force-ds", false, "Blocks scale-up of node groups too small for all suitable Daemon Sets pods.") + dynamicNodeDeleteDelayAfterTaintEnabled = flag.Bool("dynamic-node-delete-delay-after-taint-enabled", false, "Enables dynamic adjustment of NodeDeleteDelayAfterTaint based of the latency between CA and api-server") ) func isFlagPassed(name string) bool { @@ -364,28 +364,28 @@ func createAutoscalingOptions() config.AutoscalingOptions { MigInstancesMinRefreshWaitTime: *gceMigInstancesMinRefreshWaitTime, ExpanderEphemeralStorageSupport: *gceExpanderEphemeralStorageSupport, }, - ClusterAPICloudConfigAuthoritative: *clusterAPICloudConfigAuthoritative, - CordonNodeBeforeTerminate: *cordonNodeBeforeTerminate, - DaemonSetEvictionForEmptyNodes: *daemonSetEvictionForEmptyNodes, - DaemonSetEvictionForOccupiedNodes: *daemonSetEvictionForOccupiedNodes, - UserAgent: *userAgent, - InitialNodeGroupBackoffDuration: *initialNodeGroupBackoffDuration, - MaxNodeGroupBackoffDuration: *maxNodeGroupBackoffDuration, - NodeGroupBackoffResetTimeout: *nodeGroupBackoffResetTimeout, - NodeGroupKeepBackoffOutOfResources: *nodeGroupKeepBackoffOutOfResources, - MaxScaleDownParallelism: *maxScaleDownParallelismFlag, - MaxDrainParallelism: *maxDrainParallelismFlag, - RecordDuplicatedEvents: *recordDuplicatedEvents, - MaxNodesPerScaleUp: *maxNodesPerScaleUp, - MaxNodeGroupBinpackingDuration: *maxNodeGroupBinpackingDuration, - NodeDeletionBatcherInterval: *nodeDeletionBatcherInterval, - SkipNodesWithSystemPods: *skipNodesWithSystemPods, - SkipNodesWithLocalStorage: *skipNodesWithLocalStorage, - MinReplicaCount: *minReplicaCount, - NodeDeleteDelayAfterTaint: *nodeDeleteDelayAfterTaint, - ScaleDownSimulationTimeout: *scaleDownSimulationTimeout, - ParallelDrain: *parallelDrain, - SkipNodesWithCustomControllerPods: *skipNodesWithCustomControllerPods, + ClusterAPICloudConfigAuthoritative: *clusterAPICloudConfigAuthoritative, + CordonNodeBeforeTerminate: *cordonNodeBeforeTerminate, + DaemonSetEvictionForEmptyNodes: *daemonSetEvictionForEmptyNodes, + DaemonSetEvictionForOccupiedNodes: *daemonSetEvictionForOccupiedNodes, + UserAgent: *userAgent, + InitialNodeGroupBackoffDuration: *initialNodeGroupBackoffDuration, + MaxNodeGroupBackoffDuration: *maxNodeGroupBackoffDuration, + NodeGroupBackoffResetTimeout: *nodeGroupBackoffResetTimeout, + NodeGroupRemovePersistentErrorBackoffEarly: *nodeGroupRemovePersistentErrorBackoffEarly, + MaxScaleDownParallelism: *maxScaleDownParallelismFlag, + MaxDrainParallelism: *maxDrainParallelismFlag, + RecordDuplicatedEvents: *recordDuplicatedEvents, + MaxNodesPerScaleUp: *maxNodesPerScaleUp, + MaxNodeGroupBinpackingDuration: *maxNodeGroupBinpackingDuration, + NodeDeletionBatcherInterval: *nodeDeletionBatcherInterval, + SkipNodesWithSystemPods: *skipNodesWithSystemPods, + SkipNodesWithLocalStorage: *skipNodesWithLocalStorage, + MinReplicaCount: *minReplicaCount, + NodeDeleteDelayAfterTaint: *nodeDeleteDelayAfterTaint, + ScaleDownSimulationTimeout: *scaleDownSimulationTimeout, + ParallelDrain: *parallelDrain, + SkipNodesWithCustomControllerPods: *skipNodesWithCustomControllerPods, NodeGroupSetRatios: config.NodeGroupDifferenceRatios{ MaxCapacityMemoryDifferenceRatio: *maxCapacityMemoryDifferenceRatio, MaxAllocatableDifferenceRatio: *maxAllocatableDifferenceRatio,