Skip to content

Commit

Permalink
Merge pull request #4541 from marwanad/improve-azure-cache-logging
Browse files Browse the repository at this point in the history
improve logging for scale set size changes
  • Loading branch information
k8s-ci-robot authored Dec 21, 2021
2 parents 6c9b0e9 + 091c72c commit 1b63233
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 37 deletions.
46 changes: 19 additions & 27 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,8 @@ type ScaleSet struct {
minSize int
maxSize int

sizeMutex sync.Mutex
curSize int64
lastSizeRefresh time.Time
sizeRefreshPeriod time.Duration
sizeMutex sync.Mutex
curSize int64

instancesRefreshPeriod time.Duration
instancesRefreshJitter int
Expand Down Expand Up @@ -118,7 +116,7 @@ func (scaleSet *ScaleSet) Autoprovisioned() bool {
func (scaleSet *ScaleSet) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) {
template, err := scaleSet.getVMSSFromCache()
if err != nil {
return nil, err.Error()
return nil, err
}
return scaleSet.manager.GetScaleSetOptions(*template.Name, defaults), nil
}
Expand All @@ -128,11 +126,11 @@ func (scaleSet *ScaleSet) MaxSize() int {
return scaleSet.maxSize
}

func (scaleSet *ScaleSet) getVMSSFromCache() (compute.VirtualMachineScaleSet, *retry.Error) {
func (scaleSet *ScaleSet) getVMSSFromCache() (compute.VirtualMachineScaleSet, error) {
allVMSS := scaleSet.manager.azureCache.getScaleSets()

if _, exists := allVMSS[scaleSet.Name]; !exists {
return compute.VirtualMachineScaleSet{}, &retry.Error{RawError: fmt.Errorf("could not find vmss: %s", scaleSet.Name)}
return compute.VirtualMachineScaleSet{}, fmt.Errorf("could not find vmss: %s", scaleSet.Name)
}

return allVMSS[scaleSet.Name], nil
Expand All @@ -142,35 +140,30 @@ func (scaleSet *ScaleSet) getCurSize() (int64, error) {
scaleSet.sizeMutex.Lock()
defer scaleSet.sizeMutex.Unlock()

if scaleSet.lastSizeRefresh.Add(scaleSet.sizeRefreshPeriod).After(time.Now()) {
return scaleSet.curSize, nil
}

klog.V(5).Infof("Get scale set size for %q", scaleSet.Name)
set, rerr := scaleSet.getVMSSFromCache()
if rerr != nil {
return -1, rerr.Error()
set, err := scaleSet.getVMSSFromCache()
if err != nil {
klog.Errorf("failed to get information for VMSS: %s, error: %v", scaleSet.Name, err)
return -1, err
}

// If VMSS state is updating, return the currentSize which would've been proactively incremented or decremented by CA
if set.VirtualMachineScaleSetProperties != nil && strings.EqualFold(to.String(set.VirtualMachineScaleSetProperties.ProvisioningState), string(compute.ProvisioningStateUpdating)) {
klog.V(5).Infof("VMSS %q is in updating state, returning cached size: %d", scaleSet.Name, scaleSet.curSize)
klog.V(3).Infof("VMSS % is in updating state, returning cached size: %d", scaleSet.Name, scaleSet.curSize)
return scaleSet.curSize, nil
}

vmssSizeMutex.Lock()
curSize := *set.Sku.Capacity
vmssSizeMutex.Unlock()

klog.V(5).Infof("Getting scale set (%q) capacity: %d\n", scaleSet.Name, curSize)

if scaleSet.curSize != curSize {
// Invalidate the instance cache if the capacity has changed.
klog.V(5).Infof("VMSS %q size changed from: %d to %d, invalidating instance cache", scaleSet.Name, scaleSet.curSize, curSize)
scaleSet.invalidateInstanceCache()
}
klog.V(3).Infof("VMSS: %s, previous size: %d, new size: %d", scaleSet.Name, scaleSet.curSize, curSize)

scaleSet.curSize = curSize
scaleSet.lastSizeRefresh = time.Now()
return scaleSet.curSize, nil
}

Expand Down Expand Up @@ -226,10 +219,10 @@ func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error {
scaleSet.sizeMutex.Lock()
defer scaleSet.sizeMutex.Unlock()

vmssInfo, rerr := scaleSet.getVMSSFromCache()
if rerr != nil {
klog.Errorf("Failed to get information for VMSS (%q): %v", scaleSet.Name, rerr)
return rerr.Error()
vmssInfo, err := scaleSet.getVMSSFromCache()
if err != nil {
klog.Errorf("Failed to get information for VMSS (%q): %v", scaleSet.Name, err)
return err
}

// Update the new capacity to cache.
Expand All @@ -254,7 +247,6 @@ func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error {

// Proactively set the VMSS size so autoscaler makes better decisions.
scaleSet.curSize = size
scaleSet.lastSizeRefresh = time.Now()

go scaleSet.updateVMSSCapacity(future)
return nil
Expand Down Expand Up @@ -474,9 +466,9 @@ func (scaleSet *ScaleSet) Debug() string {

// TemplateNodeInfo returns a node template for this scale set.
func (scaleSet *ScaleSet) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) {
template, rerr := scaleSet.getVMSSFromCache()
if rerr != nil {
return nil, rerr.Error()
template, err := scaleSet.getVMSSFromCache()
if err != nil {
return nil, err
}

node, err := buildNodeFromTemplate(scaleSet.Name, template)
Expand Down
15 changes: 5 additions & 10 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,13 @@ package azure

import (
"fmt"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"net/http"
"testing"
"time"

apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"net/http"
"sigs.k8s.io/cloud-provider-azure/pkg/azureclients/vmssclient/mockvmssclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azureclients/vmssvmclient/mockvmssvmclient"
"testing"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-12-01/compute"
"github.com/Azure/go-autorest/autorest/to"
Expand Down Expand Up @@ -138,12 +136,9 @@ func TestIncreaseSize(t *testing.T) {
err := provider.azureManager.forceRefresh()
assert.NoError(t, err)

ss := newTestScaleSet(provider.azureManager, "test-asg")
ss.lastSizeRefresh = time.Now()
ss.sizeRefreshPeriod = 1 * time.Minute
ss.curSize = -1
ss := newTestScaleSet(provider.azureManager, "test-asg-doesnt-exist")
err = ss.IncreaseSize(100)
expectedErr := fmt.Errorf("the scale set test-asg is under initialization, skipping IncreaseSize")
expectedErr := fmt.Errorf("could not find vmss: test-asg-doesnt-exist")
assert.Equal(t, expectedErr, err)

registered := provider.azureManager.RegisterNodeGroup(
Expand Down

0 comments on commit 1b63233

Please sign in to comment.