Skip to content

Commit

Permalink
Update backoff code from upstream and use when detaching ENIs
Browse files Browse the repository at this point in the history
  • Loading branch information
Claes Mogren authored and mogren committed Sep 25, 2019
1 parent 813f282 commit 36c108d
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 320 deletions.
4 changes: 2 additions & 2 deletions ipamd/introspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
"time"

"github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils"
"github.com/aws/amazon-vpc-cni-k8s/pkg/utils"
"github.com/aws/amazon-vpc-cni-k8s/pkg/utils/retry"
log "github.com/cihub/seelog"
)

Expand Down Expand Up @@ -61,7 +61,7 @@ func (c *IPAMContext) ServeIntrospection() {
server := c.setupIntrospectionServer()
for {
once := sync.Once{}
_ = utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
_ = retry.RetryWithBackoff(retry.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
err := server.ListenAndServe()
once.Do(func() {
log.Error("Error running http API: ", err)
Expand Down
5 changes: 2 additions & 3 deletions ipamd/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@ import (
"sync"
"time"

"github.com/aws/amazon-vpc-cni-k8s/pkg/utils/retry"
log "github.com/cihub/seelog"
"github.com/prometheus/client_golang/prometheus/promhttp"

"github.com/aws/amazon-vpc-cni-k8s/pkg/utils"
)

const (
Expand All @@ -44,7 +43,7 @@ func (c *IPAMContext) ServeMetrics() {
server := c.setupMetricsServer()
for {
once := sync.Once{}
_ = utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
_ = retry.RetryWithBackoff(retry.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
err := server.ListenAndServe()
once.Do(func() {
log.Error("Error running http API: ", err)
Expand Down
99 changes: 42 additions & 57 deletions pkg/awsutils/awsutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (

"github.com/aws/amazon-vpc-cni-k8s/pkg/ec2metadata"
"github.com/aws/amazon-vpc-cni-k8s/pkg/ec2wrapper"
"github.com/aws/amazon-vpc-cni-k8s/pkg/utils/retry"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/session"
Expand All @@ -49,7 +50,8 @@ const (
metadataInterface = "/interface-id/"
metadataSubnetCIDR = "/subnet-ipv4-cidr-block"
metadataIPv4s = "/local-ipv4s"
maxENIDeleteRetries = 20
maxENIDeleteRetries = 12
maxENIBackoffDelay = time.Minute
eniDescriptionPrefix = "aws-K8S-"
metadataOwnerID = "/owner-id"
// AllocENI need to choose a first free device number between 0 and maxENI
Expand All @@ -58,8 +60,6 @@ const (
eniNodeTagKey = "node.k8s.amazonaws.com/instance_id"
eniClusterTagKey = "cluster.k8s.amazonaws.com/name"

retryDeleteENIInternal = 5 * time.Second

// UnknownInstanceType indicates that the instance type is not yet supported
UnknownInstanceType = "vpc ip resource(eni ip limit): unknown instance type"
)
Expand Down Expand Up @@ -550,15 +550,16 @@ func (cache *EC2InstanceMetadataCache) AllocENI(useCustomCfg bool, sg []*string,
return "", errors.Wrap(err, "AllocENI: failed to create ENI")
}

cache.tagENI(eniID)

attachmentID, err := cache.attachENI(eniID)
if err != nil {
_ = cache.deleteENI(eniID, retryDeleteENIInternal)
_ = cache.deleteENI(eniID, maxENIBackoffDelay)
return "", errors.Wrap(err, "AllocENI: error attaching ENI")
}

// also change the ENI's attribute so that the ENI will be deleted when the instance is deleted.
// Once the ENI is attached, tag it.
cache.tagENI(eniID)

// Also change the ENI's attribute so that the ENI will be deleted when the instance is deleted.
attributeInput := &ec2.ModifyNetworkInterfaceAttributeInput{
Attachment: &ec2.NetworkInterfaceAttachmentChanges{
AttachmentId: aws.String(attachmentID),
Expand Down Expand Up @@ -713,14 +714,13 @@ func awsUtilsErrInc(fn string, err error) {

// FreeENI detaches and deletes the ENI interface
func (cache *EC2InstanceMetadataCache) FreeENI(eniName string) error {
return cache.freeENI(eniName, retryDeleteENIInternal)
return cache.freeENI(eniName, maxENIBackoffDelay)
}

func (cache *EC2InstanceMetadataCache) freeENI(eniName string, retryDeleteENISleepDuration time.Duration) error {
func (cache *EC2InstanceMetadataCache) freeENI(eniName string, maxBackoffDelay time.Duration) error {
log.Infof("Trying to free ENI: %s", eniName)

// Find out attachment
// TODO: use metadata
_, attachID, err := cache.DescribeENI(eniName)
if err != nil {
if err == ErrENINotFound {
Expand All @@ -739,31 +739,27 @@ func (cache *EC2InstanceMetadataCache) freeENI(eniName string, retryDeleteENISle
}

// Retry detaching the ENI from the instance
var retry int
for retry = 0; retry <= maxENIDeleteRetries; retry++ {
err = retry.RetryNWithBackoff(retry.NewSimpleBackoff(time.Millisecond*200, maxBackoffDelay, 0.15, 2.0), maxENIDeleteRetries, func() error {
start := time.Now()
_, err = cache.ec2SVC.DetachNetworkInterface(detachInput)
awsAPILatency.WithLabelValues("DetachNetworkInterface", fmt.Sprint(err != nil)).Observe(msSince(start))
if err != nil {
awsAPIErrInc("DetachNetworkInterface", err)
log.Errorf("Failed to detach ENI %s %v", eniName, err)
if retry == maxENIDeleteRetries {
return errors.New("unable to detach ENI from EC2 instance, giving up")
}
} else {
log.Infof("Successfully detached ENI: %s", eniName)
break
_, ec2Err := cache.ec2SVC.DetachNetworkInterface(detachInput)
awsAPILatency.WithLabelValues("DetachNetworkInterface", fmt.Sprint(ec2Err != nil)).Observe(msSince(start))
if ec2Err != nil {
awsAPIErrInc("DetachNetworkInterface", ec2Err)
log.Errorf("Failed to detach ENI %s %v", eniName, ec2Err)
return errors.New("unable to detach ENI from EC2 instance, giving up")
}
log.Infof("Successfully detached ENI: %s", eniName)
return nil
})

log.Debugf("Not able to detach ENI yet (attempt %d/%d): %v ", retry, maxENIDeleteRetries, err)
time.Sleep(retryDeleteENISleepDuration)
if err != nil {
log.Errorf("Failed to detach ENI %s %v", eniName, err)
return err
}

// It may take awhile for EC2-VPC to detach ENI from instance
// retry maxENIDeleteRetries times with sleep 5 sec between to delete the interface
// TODO check if can use built-in waiter in the aws-sdk-go,
// Example: https://github.com/aws/aws-sdk-go/blob/master/service/ec2/waiters.go#L874
err = cache.deleteENI(eniName, retryDeleteENISleepDuration)
// It does take awhile for EC2 to detach ENI from instance, so we wait 2s before trying the delete.
time.Sleep(time.Second*2)
err = cache.deleteENI(eniName, maxBackoffDelay)
if err != nil {
awsUtilsErrInc("FreeENIDeleteErr", err)
return errors.Wrapf(err, "FreeENI: failed to free ENI: %s", eniName)
Expand All @@ -773,32 +769,24 @@ func (cache *EC2InstanceMetadataCache) freeENI(eniName string, retryDeleteENISle
return nil
}

func (cache *EC2InstanceMetadataCache) deleteENI(eniName string, retryDeleteENIInternal time.Duration) error {
func (cache *EC2InstanceMetadataCache) deleteENI(eniName string, maxBackoffDelay time.Duration) error {
log.Debugf("Trying to delete ENI: %s", eniName)

retry := 0
var err error
for {
retry++
if retry > maxENIDeleteRetries {
return errors.New("unable to delete ENI, giving up")
}

deleteInput := &ec2.DeleteNetworkInterfaceInput{
NetworkInterfaceId: aws.String(eniName),
}
deleteInput := &ec2.DeleteNetworkInterfaceInput{
NetworkInterfaceId: aws.String(eniName),
}
err := retry.RetryNWithBackoff(retry.NewSimpleBackoff(time.Millisecond*500, maxBackoffDelay, 0.15, 2.0), maxENIDeleteRetries, func() error {
start := time.Now()
_, err = cache.ec2SVC.DeleteNetworkInterface(deleteInput)
awsAPILatency.WithLabelValues("DeleteNetworkInterface", fmt.Sprint(err != nil)).Observe(msSince(start))
if err == nil {
log.Infof("Successfully deleted ENI: %s", eniName)
return nil
_, ec2Err := cache.ec2SVC.DeleteNetworkInterface(deleteInput)
awsAPILatency.WithLabelValues("DeleteNetworkInterface", fmt.Sprint(ec2Err != nil)).Observe(msSince(start))
if ec2Err != nil {
awsAPIErrInc("DeleteNetworkInterface", ec2Err)
log.Debugf("Not able to delete ENI: %v ", ec2Err)
return errors.Wrapf(ec2Err, "unable to delete ENI")
}
awsAPIErrInc("DeleteNetworkInterface", err)

log.Debugf("Not able to delete ENI yet (attempt %d/%d): %v ", retry, maxENIDeleteRetries, err)
time.Sleep(retryDeleteENIInternal)
}
log.Infof("Successfully deleted ENI: %s", eniName)
return nil
})
return err
}

// DescribeENI returns the IPv4 addresses of interface and the attachment id
Expand Down Expand Up @@ -886,7 +874,6 @@ func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int
}

log.Infof("Trying to allocate %d IP addresses on ENI %s", needIPs, eniID)

input := &ec2.AssignPrivateIpAddressesInput{
NetworkInterfaceId: aws.String(eniID),
SecondaryPrivateIpAddressCount: aws.Int64(int64(needIPs)),
Expand All @@ -909,10 +896,8 @@ func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int
// DeallocIPAddresses allocates numIPs of IP address on an ENI
func (cache *EC2InstanceMetadataCache) DeallocIPAddresses(eniID string, ips []string) error {
ctx := context.Background()

log.Infof("Trying to unassign the following IPs %s from ENI %s", ips, eniID)

ipsInput := []*string{}
var ipsInput []*string
for _, ip := range ips {
ipsInput = append(ipsInput, aws.String(ip))
}
Expand Down
10 changes: 4 additions & 6 deletions pkg/awsutils/awsutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ func TestAllocENI(t *testing.T) {
cureniID := eniID
eni := ec2.CreateNetworkInterfaceOutput{NetworkInterface: &ec2.NetworkInterface{NetworkInterfaceId: &cureniID}}
mockEC2.EXPECT().CreateNetworkInterface(gomock.Any()).Return(&eni, nil)
mockEC2.EXPECT().CreateTags(gomock.Any()).Return(nil, nil)

// 2 ENIs, uses device number 0 3, expect to find free at 1
ec2ENIs := make([]*ec2.InstanceNetworkInterface, 0)
Expand All @@ -359,6 +358,7 @@ func TestAllocENI(t *testing.T) {
attachResult := &ec2.AttachNetworkInterfaceOutput{
AttachmentId: &attachmentID}
mockEC2.EXPECT().AttachNetworkInterface(gomock.Any()).Return(attachResult, nil)
mockEC2.EXPECT().CreateTags(gomock.Any()).Return(nil, nil)
mockEC2.EXPECT().ModifyNetworkInterfaceAttribute(gomock.Any()).Return(nil, nil)

ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2}
Expand All @@ -373,7 +373,6 @@ func TestAllocENINoFreeDevice(t *testing.T) {
cureniID := eniID
eni := ec2.CreateNetworkInterfaceOutput{NetworkInterface: &ec2.NetworkInterface{NetworkInterfaceId: &cureniID}}
mockEC2.EXPECT().CreateNetworkInterface(gomock.Any()).Return(&eni, nil)
mockEC2.EXPECT().CreateTags(gomock.Any()).Return(nil, nil)

// test no free index
ec2ENIs := make([]*ec2.InstanceNetworkInterface, 0)
Expand Down Expand Up @@ -404,7 +403,6 @@ func TestAllocENIMaxReached(t *testing.T) {
cureniID := eniID
eni := ec2.CreateNetworkInterfaceOutput{NetworkInterface: &ec2.NetworkInterface{NetworkInterfaceId: &cureniID}}
mockEC2.EXPECT().CreateNetworkInterface(gomock.Any()).Return(&eni, nil)
mockEC2.EXPECT().CreateTags(gomock.Any()).Return(nil, nil)

// 2 ENIs, uses device number 0 3, expect to find free at 1
ec2ENIs := make([]*ec2.InstanceNetworkInterface, 0)
Expand Down Expand Up @@ -445,7 +443,7 @@ func TestFreeENI(t *testing.T) {
mockEC2.EXPECT().DeleteNetworkInterface(gomock.Any()).Return(nil, nil)

ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2}
err := ins.freeENI("test-eni", 0*time.Second)
err := ins.freeENI("test-eni", time.Millisecond)
assert.NoError(t, err)
}

Expand All @@ -465,7 +463,7 @@ func TestFreeENIRetry(t *testing.T) {
mockEC2.EXPECT().DeleteNetworkInterface(gomock.Any()).Return(nil, nil)

ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2}
err := ins.freeENI("test-eni", 0*time.Second)
err := ins.freeENI("test-eni", time.Millisecond)
assert.NoError(t, err)
}

Expand All @@ -485,7 +483,7 @@ func TestFreeENIRetryMax(t *testing.T) {
}

ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2}
err := ins.freeENI("test-eni", 0*time.Second)
err := ins.freeENI("test-eni", time.Millisecond)
assert.Error(t, err)
}

Expand Down
20 changes: 7 additions & 13 deletions pkg/utils/backoff.go → pkg/utils/retry/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package utils
package retry

//TODO needs to extract this to library (it's copied from ecs agent)
import (
"math"
"math/rand"
Expand All @@ -35,13 +34,10 @@ type SimpleBackoff struct {
mu sync.Mutex
}

// NewSimpleBackoff creates a Backoff which ranges from min to max increasing by
// multiple each time.
// It also adds (and yes, the jitter is always added, never
// subtracted) a random amount of jitter up to jitterMultiple percent (that is,
// jitterMultiple = 0.0 is no jitter, 0.15 is 15% added jitter). The total time
// may exceed "max" when accounting for jitter, such that the absolute max is
// max + max * jiterMultiple
// NewSimpleBackoff creates a Backoff which ranges from min to max increasing by multiple each time.
// It also adds (and yes, the jitter is always added, never subtracted) a random amount of jitter up to jitterMultiple
// percent (that is, jitterMultiple = 0.0 is no jitter, 0.15 is 15% added jitter). The total time/ may exceed "max"
// when accounting for jitter, such that the absolute max is max + max * jitterMultiple
func NewSimpleBackoff(min, max time.Duration, jitterMultiple, multiple float64) *SimpleBackoff {
return &SimpleBackoff{
start: min,
Expand All @@ -56,8 +52,7 @@ func (sb *SimpleBackoff) Duration() time.Duration {
sb.mu.Lock()
defer sb.mu.Unlock()
ret := sb.current
sb.current = time.Duration(math.Min(float64(sb.max.Nanoseconds()), float64(float64(sb.current.Nanoseconds())*sb.multiple)))

sb.current = time.Duration(math.Min(float64(sb.max.Nanoseconds()), float64(sb.current.Nanoseconds())*sb.multiple))
return AddJitter(ret, time.Duration(int64(float64(ret)*sb.jitterMultiple)))
}

Expand All @@ -67,8 +62,7 @@ func (sb *SimpleBackoff) Reset() {
sb.current = sb.start
}

// AddJitter adds an amount of jitter between 0 and the given jitter to the
// given duration
// AddJitter adds an amount of jitter between 0 and the given jitter to the given duration
func AddJitter(duration time.Duration, jitter time.Duration) time.Duration {
var randJitter int64
if jitter.Nanoseconds() == 0 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package utils
package retry

import (
"testing"
Expand Down
16 changes: 1 addition & 15 deletions pkg/utils/errors.go → pkg/utils/retry/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package utils
package retry

import (
"fmt"
Expand Down Expand Up @@ -61,10 +61,6 @@ func (e AttributeError) Error() string {
return e.err
}

func NewAttributeError(err string) AttributeError {
return AttributeError{err}
}

// MultiErr Implements error
type MultiErr struct {
errors []error
Expand All @@ -78,13 +74,3 @@ func (me MultiErr) Error() string {
}
return strings.Join(ret, "\n")
}

func NewMultiError(errs ...error) error {
errors := make([]error, 0, len(errs))
for _, err := range errs {
if err != nil {
errors = append(errors, err)
}
}
return MultiErr{errors}
}
Loading

0 comments on commit 36c108d

Please sign in to comment.