diff --git a/go.mod b/go.mod index 7fd25e1a59..8b30717dcb 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect golang.org/x/net v0.0.0-20210614182718-04defd469f4e golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 - golang.org/x/tools v0.1.3 // indirect + golang.org/x/tools v0.1.5 // indirect google.golang.org/grpc v1.29.0 gopkg.in/natefinch/lumberjack.v2 v2.0.0 k8s.io/api v0.18.6 diff --git a/pkg/awsutils/awsutils.go b/pkg/awsutils/awsutils.go index 63fbeab712..59280f6fc1 100644 --- a/pkg/awsutils/awsutils.go +++ b/pkg/awsutils/awsutils.go @@ -1353,7 +1353,7 @@ func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int } log.Errorf("Failed to allocate a private IP/Prefix addresses on ENI %v: %v", eniID, err) awsAPIErrInc("AssignPrivateIpAddresses", err) - return errors.Wrap(err, "allocate IP/Prefix address: failed to allocate a private IP/Prefix address") + return err } if output != nil { if cache.enableIpv4PrefixDelegation { diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index 42219be3da..3249adf071 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -28,6 +28,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -136,6 +137,12 @@ const ( envWarmPrefixTarget = "WARM_PREFIX_TARGET" defaultWarmPrefixTarget = 0 + //insufficientCidrErrorCooldown is the amount of time reconciler will wait before trying to fetch + //more IPs/prefixes for an ENI. With InsufficientCidr we know the subnet doesn't have enough IPs so + //instead of retrying every 5s which would lead to increase in EC2 AllocIPAddress calls, we wait for + //120 seconds for a retry. + insufficientCidrErrorCooldown = 120 * time.Second + // envManageUntaggedENI is used to determine if untagged ENIs should be managed or unmanaged envManageUntaggedENI = "MANAGE_UNTAGGED_ENI" @@ -228,6 +235,7 @@ type IPAMContext struct { enablePodENI bool myNodeName string enableIpv4PrefixDelegation bool + lastInsufficientCidrError time.Time enableManageUntaggedMode bool } @@ -311,6 +319,20 @@ func prometheusRegister() { } } +// containsInsufficientCidrBlocksError returns whether exceeds ENI's IP address limit +func containsInsufficientCidrBlocksError(err error) bool { + var awsErr awserr.Error + if errors.As(err, &awsErr) { + return awsErr.Code() == "InsufficientCidrBlocks" + } + return false +} + +// inInsufficientCidrCoolingPeriod checks whether IPAMD is in insufficientCidrErrorCooldown +func (c *IPAMContext) inInsufficientCidrCoolingPeriod() bool { + return time.Since(c.lastInsufficientCidrError) <= insufficientCidrErrorCooldown +} + // New retrieves IP address usage information from Instance MetaData service and Kubelet // then initializes IP address pool data store func New(rawK8SClient client.Client, cachedK8SClient client.Client) (*IPAMContext, error) { @@ -517,6 +539,11 @@ func (c *IPAMContext) nodeInit() error { if err == nil && increasedPool { c.updateLastNodeIPPoolAction() } else if err != nil { + if containsInsufficientCidrBlocksError(err) { + log.Errorf("Unable to attach IPs/Prefixes for the ENI, subnet doesn't seem to have enough IPs/Prefixes. Consider using new subnet or carve a reserved range using create-subnet-cidr-reservation") + c.lastInsufficientCidrError = time.Now() + return nil + } return err } } @@ -712,9 +739,19 @@ func (c *IPAMContext) increaseDatastorePool(ctx context.Context) { return } // Try to add more Cidrs to existing ENIs first. + if c.inInsufficientCidrCoolingPeriod() { + log.Debugf("Recently we had InsufficientCidr error hence will wait for %v before retrying", insufficientCidrErrorCooldown) + return + } + increasedPool, err := c.tryAssignCidrs() if err != nil { log.Errorf(err.Error()) + if containsInsufficientCidrBlocksError(err) { + log.Errorf("Unable to attach IPs/Prefixes for the ENI, subnet doesn't seem to have enough IPs/Prefixes. Consider using new subnet or carve a reserved range using create-subnet-cidr-reservation") + c.lastInsufficientCidrError = time.Now() + return + } } if increasedPool { c.updateLastNodeIPPoolAction() @@ -781,6 +818,11 @@ func (c *IPAMContext) tryAllocateENI(ctx context.Context) error { log.Warnf("Failed to allocate %d IP addresses on an ENI: %v", resourcesToAllocate, err) // Continue to process the allocated IP addresses ipamdErrInc("increaseIPPoolAllocIPAddressesFailed") + if containsInsufficientCidrBlocksError(err) { + log.Errorf("Unable to attach IPs/Prefixes for the ENI, subnet doesn't seem to have enough IPs/Prefixes. Consider using new subnet or carve a reserved range using create-subnet-cidr-reservation") + c.lastInsufficientCidrError = time.Now() + return err + } } eniMetadata, err := c.awsClient.WaitForENIAndIPsAttached(eni, resourcesToAllocate) diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 3c88a86e7c..18a9fa1730 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -37,7 +37,7 @@ log_in_json() unsupported_prefix_target_conf() { - if [ "${WARM_PREFIX_TARGET}" <= "0" ] && [ "${WARM_IP_TARGET}" <= "0" ] && [ "${MINIMUM_IP_TARGET}" <= "0" ];then + if [ "${WARM_PREFIX_TARGET}" -le "0" ] && [ "${WARM_IP_TARGET}" -le "0" ] && [ "${MINIMUM_IP_TARGET}" -le "0" ];then true else false