From 091c72cbb041871d5be55b0ac47f7b23585d4732 Mon Sep 17 00:00:00 2001 From: Marwan Ahmed Date: Mon, 20 Dec 2021 14:34:01 +0200 Subject: [PATCH] cleanup scale set size logs --- .../cloudprovider/azure/azure_scale_set.go | 46 ++++++++----------- .../azure/azure_scale_set_test.go | 15 ++---- 2 files changed, 24 insertions(+), 37 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go index 138486a8241b..500b24c3ae19 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go @@ -50,10 +50,8 @@ type ScaleSet struct { minSize int maxSize int - sizeMutex sync.Mutex - curSize int64 - lastSizeRefresh time.Time - sizeRefreshPeriod time.Duration + sizeMutex sync.Mutex + curSize int64 instancesRefreshPeriod time.Duration instancesRefreshJitter int @@ -118,7 +116,7 @@ func (scaleSet *ScaleSet) Autoprovisioned() bool { func (scaleSet *ScaleSet) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { template, err := scaleSet.getVMSSFromCache() if err != nil { - return nil, err.Error() + return nil, err } return scaleSet.manager.GetScaleSetOptions(*template.Name, defaults), nil } @@ -128,11 +126,11 @@ func (scaleSet *ScaleSet) MaxSize() int { return scaleSet.maxSize } -func (scaleSet *ScaleSet) getVMSSFromCache() (compute.VirtualMachineScaleSet, *retry.Error) { +func (scaleSet *ScaleSet) getVMSSFromCache() (compute.VirtualMachineScaleSet, error) { allVMSS := scaleSet.manager.azureCache.getScaleSets() if _, exists := allVMSS[scaleSet.Name]; !exists { - return compute.VirtualMachineScaleSet{}, &retry.Error{RawError: fmt.Errorf("could not find vmss: %s", scaleSet.Name)} + return compute.VirtualMachineScaleSet{}, fmt.Errorf("could not find vmss: %s", scaleSet.Name) } return allVMSS[scaleSet.Name], nil @@ -142,19 +140,15 @@ func (scaleSet *ScaleSet) getCurSize() (int64, error) { scaleSet.sizeMutex.Lock() defer scaleSet.sizeMutex.Unlock() - if scaleSet.lastSizeRefresh.Add(scaleSet.sizeRefreshPeriod).After(time.Now()) { - return scaleSet.curSize, nil - } - - klog.V(5).Infof("Get scale set size for %q", scaleSet.Name) - set, rerr := scaleSet.getVMSSFromCache() - if rerr != nil { - return -1, rerr.Error() + set, err := scaleSet.getVMSSFromCache() + if err != nil { + klog.Errorf("failed to get information for VMSS: %s, error: %v", scaleSet.Name, err) + return -1, err } // If VMSS state is updating, return the currentSize which would've been proactively incremented or decremented by CA if set.VirtualMachineScaleSetProperties != nil && strings.EqualFold(to.String(set.VirtualMachineScaleSetProperties.ProvisioningState), string(compute.ProvisioningStateUpdating)) { - klog.V(5).Infof("VMSS %q is in updating state, returning cached size: %d", scaleSet.Name, scaleSet.curSize) + klog.V(3).Infof("VMSS % is in updating state, returning cached size: %d", scaleSet.Name, scaleSet.curSize) return scaleSet.curSize, nil } @@ -162,15 +156,14 @@ func (scaleSet *ScaleSet) getCurSize() (int64, error) { curSize := *set.Sku.Capacity vmssSizeMutex.Unlock() - klog.V(5).Infof("Getting scale set (%q) capacity: %d\n", scaleSet.Name, curSize) - if scaleSet.curSize != curSize { // Invalidate the instance cache if the capacity has changed. + klog.V(5).Infof("VMSS %q size changed from: %d to %d, invalidating instance cache", scaleSet.Name, scaleSet.curSize, curSize) scaleSet.invalidateInstanceCache() } + klog.V(3).Infof("VMSS: %s, previous size: %d, new size: %d", scaleSet.Name, scaleSet.curSize, curSize) scaleSet.curSize = curSize - scaleSet.lastSizeRefresh = time.Now() return scaleSet.curSize, nil } @@ -226,10 +219,10 @@ func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error { scaleSet.sizeMutex.Lock() defer scaleSet.sizeMutex.Unlock() - vmssInfo, rerr := scaleSet.getVMSSFromCache() - if rerr != nil { - klog.Errorf("Failed to get information for VMSS (%q): %v", scaleSet.Name, rerr) - return rerr.Error() + vmssInfo, err := scaleSet.getVMSSFromCache() + if err != nil { + klog.Errorf("Failed to get information for VMSS (%q): %v", scaleSet.Name, err) + return err } // Update the new capacity to cache. @@ -254,7 +247,6 @@ func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error { // Proactively set the VMSS size so autoscaler makes better decisions. scaleSet.curSize = size - scaleSet.lastSizeRefresh = time.Now() go scaleSet.updateVMSSCapacity(future) return nil @@ -474,9 +466,9 @@ func (scaleSet *ScaleSet) Debug() string { // TemplateNodeInfo returns a node template for this scale set. func (scaleSet *ScaleSet) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { - template, rerr := scaleSet.getVMSSFromCache() - if rerr != nil { - return nil, rerr.Error() + template, err := scaleSet.getVMSSFromCache() + if err != nil { + return nil, err } node, err := buildNodeFromTemplate(scaleSet.Name, template) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go index 0d604a8b235e..f05b9882dc4b 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go @@ -18,15 +18,13 @@ package azure import ( "fmt" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "net/http" - "testing" - "time" - apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "net/http" "sigs.k8s.io/cloud-provider-azure/pkg/azureclients/vmssclient/mockvmssclient" "sigs.k8s.io/cloud-provider-azure/pkg/azureclients/vmssvmclient/mockvmssvmclient" + "testing" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-12-01/compute" "github.com/Azure/go-autorest/autorest/to" @@ -138,12 +136,9 @@ func TestIncreaseSize(t *testing.T) { err := provider.azureManager.forceRefresh() assert.NoError(t, err) - ss := newTestScaleSet(provider.azureManager, "test-asg") - ss.lastSizeRefresh = time.Now() - ss.sizeRefreshPeriod = 1 * time.Minute - ss.curSize = -1 + ss := newTestScaleSet(provider.azureManager, "test-asg-doesnt-exist") err = ss.IncreaseSize(100) - expectedErr := fmt.Errorf("the scale set test-asg is under initialization, skipping IncreaseSize") + expectedErr := fmt.Errorf("could not find vmss: test-asg-doesnt-exist") assert.Equal(t, expectedErr, err) registered := provider.azureManager.RegisterNodeGroup(