From 24ba739ce1caa1627311ad038f9fcc34778a2199 Mon Sep 17 00:00:00 2001 From: Nick Turner Date: Wed, 1 May 2019 15:07:21 -0700 Subject: [PATCH] Fixes for IP warm IP target IP deallocation * Remove extra ENI when warm IP target is set * Modify max ENI check to allow for IP increase when warm IP target is set (Fixes issue where IPs are not allocated due ENIs being full) --- ipamd/datastore/data_store.go | 49 +++++++++++++++--- ipamd/ipamd.go | 98 ++++++++++++++++++++++++----------- pkg/awsutils/awsutils.go | 3 +- 3 files changed, 113 insertions(+), 37 deletions(-) diff --git a/ipamd/datastore/data_store.go b/ipamd/datastore/data_store.go index f6ceba04ae..c8cb0506f2 100644 --- a/ipamd/datastore/data_store.go +++ b/ipamd/datastore/data_store.go @@ -319,21 +319,43 @@ func (ds *DataStore) GetStats() (int, int) { return ds.total, ds.assigned } -func (ds *DataStore) getDeletableENI() *ENIIPPool { +// IsRequiredForWarmIPTarget determines if this ENI has warm IPs that are required to fulfill whatever WARM_IP_TARGET is +// set to. +func (ds *DataStore) isRequiredForWarmIPTarget(warmIPTarget int, eni *ENIIPPool) bool { + otherWarmIPs := 0 + for _, other := range ds.eniIPPools { + if other.ID != eni.ID { + otherWarmIPs += len(other.IPv4Addresses) - other.AssignedIPv4Addresses + } + } + log.Debugf("ENI %s otherWarmIPs: %d < warmIPTarget: %d: %v", eni.ID, otherWarmIPs, warmIPTarget, otherWarmIPs < warmIPTarget) + return otherWarmIPs < warmIPTarget +} + +func (ds *DataStore) getDeletableENI(warmIPTarget int) *ENIIPPool { for _, eni := range ds.eniIPPools { if eni.IsPrimary { + log.Debugf("ENI %s cannot be deleted because it is primary", eni.ID) + continue + } + + if eni.isTooYoung() { + log.Debugf("ENI %s cannot be deleted because it is too young", eni.ID) continue } - if time.Now().Sub(eni.createTime) < minLifeTime { + if eni.hasIPInCooling() { + log.Debugf("ENI %s cannot be deleted because has IPs in cooling", eni.ID) continue } - if time.Now().Sub(eni.lastUnassignedTime) < addressENICoolingPeriod { + if eni.hasPods() { + log.Debugf("ENI %s cannot be deleted because it has pods assigned", eni.ID) continue } - if eni.AssignedIPv4Addresses != 0 { + if warmIPTarget != 0 && ds.isRequiredForWarmIPTarget(warmIPTarget, eni) { + log.Debugf("ENI %s cannot be deleted because it is required for WARM_IP_TARGET: %d", eni.ID, warmIPTarget) continue } @@ -343,6 +365,21 @@ func (ds *DataStore) getDeletableENI() *ENIIPPool { return nil } +// IsTooYoung returns true if the ENI hasn't been around long enough to be deleted. +func (e *ENIIPPool) isTooYoung() bool { + return time.Now().Sub(e.createTime) < minLifeTime +} + +// HasIPInCooling returns true if an IP address was unassigned recently. +func (e *ENIIPPool) hasIPInCooling() bool { + return time.Now().Sub(e.lastUnassignedTime) < addressENICoolingPeriod +} + +// HasPods returns true if the ENI has pods assigned to it. +func (e *ENIIPPool) hasPods() bool { + return e.AssignedIPv4Addresses != 0 +} + // GetENINeedsIP finds an ENI in the datastore that needs more IP addresses allocated func (ds *DataStore) GetENINeedsIP(maxIPperENI int, skipPrimary bool) *ENIIPPool { for _, eni := range ds.eniIPPools { @@ -362,11 +399,11 @@ func (ds *DataStore) GetENINeedsIP(maxIPperENI int, skipPrimary bool) *ENIIPPool // RemoveUnusedENIFromStore removes a deletable ENI from the data store. // It returns the name of the ENI which has been removed from the data store and needs to be deleted, // or empty string if no ENI could be removed. -func (ds *DataStore) RemoveUnusedENIFromStore() string { +func (ds *DataStore) RemoveUnusedENIFromStore(warmIPTarget int) string { ds.lock.Lock() defer ds.lock.Unlock() - deletableENI := ds.getDeletableENI() + deletableENI := ds.getDeletableENI(warmIPTarget) if deletableENI == nil { log.Debugf("No ENI can be deleted at this time") return "" diff --git a/ipamd/ipamd.go b/ipamd/ipamd.go index e7344b1f76..81550839f2 100644 --- a/ipamd/ipamd.go +++ b/ipamd/ipamd.go @@ -208,7 +208,11 @@ func (c *IPAMContext) nodeInit() error { log.Debugf("Start node init") - instanceMaxENIs, _ := c.awsClient.GetENILimit() + instanceMaxENIs, err := c.awsClient.GetENILimit() + if err != nil { + log.Errorf("Failed to get ENI limit: %s") + } + maxENIs := getMaxENI(instanceMaxENIs) if maxENIs >= 1 { enisMax.Set(float64(maxENIs)) @@ -359,6 +363,10 @@ func (c *IPAMContext) updateIPPoolIfRequired() { } else if c.nodeIPPoolTooHigh() { c.decreaseIPPool(decreaseIPPoolInterval) } + + if c.shouldRemoveExtraENIs() { + c.tryFreeENI() + } } // decreaseIPPool runs every `interval` and attempts to return unused ENIs and IPs @@ -375,7 +383,6 @@ func (c *IPAMContext) decreaseIPPool(interval time.Duration) { log.Debugf("Starting to decrease IP pool") - c.tryFreeENI() c.tryUnassignIPsFromAll() c.lastDecreaseIPPool = now @@ -387,7 +394,9 @@ func (c *IPAMContext) decreaseIPPool(interval time.Duration) { // tryFreeENI always trys to free one ENI func (c *IPAMContext) tryFreeENI() { - eni := c.dataStore.RemoveUnusedENIFromStore() + warmIPTarget := getWarmIPTarget() + + eni := c.dataStore.RemoveUnusedENIFromStore(warmIPTarget) if eni == "" { log.Info("No ENI to remove, all ENIs have IPs in use") return @@ -495,23 +504,45 @@ func (c *IPAMContext) increaseIPPool() { } instanceMaxENIs, err := c.awsClient.GetENILimit() + if err != nil { + log.Errorf("Failed to get ENI limit: %s") + } + + // instanceMaxENIs will be 0 if the instance type is unknown. In this case, getMaxENI returns 0 or will use + // MAX_ENI if it is set. maxENIs := getMaxENI(instanceMaxENIs) if maxENIs >= 1 { enisMax.Set(float64(maxENIs)) } - if err == nil && maxENIs == c.dataStore.GetENIs() { - log.Debugf("Skipping increase IP pool due to max ENI already attached to the instance: %d", maxENIs) + // Unknown instance type and MAX_ENI is not set + if maxENIs == 0 { + log.Errorf("Unknown instance type and MAX_ENI is not set. Cannot increase IP pool.") return } - if (c.maxENI > 0) && (c.maxENI == c.dataStore.GetENIs()) { - log.Debugf("Skipping increase IP pool due to max ENI already attached to the instance: %d", c.maxENI) - return + + if c.dataStore.GetENIs() < maxENIs { + // c.maxENI represent the discovered maximum number of ENIs + if (c.maxENI > 0) && (c.maxENI == c.dataStore.GetENIs()) { + log.Debugf("Skipping ENI allocation due to max ENI already attached to the instance: %d", c.maxENI) + } else { + c.tryAllocateENI() + c.updateLastNodeIPPoolAction() + } + } else { + log.Debugf("Skipping ENI allocation due to max ENI already attached to the instance: %d", maxENIs) } - c.tryAllocateENI() - c.tryAssignIPs() + increasedPool, err := c.tryAssignIPs() + if err != nil { + log.Errorf(err.Error()) + } + if increasedPool { + c.updateLastNodeIPPoolAction() + } +} +func (c *IPAMContext) updateLastNodeIPPoolAction() { c.lastNodeIPPoolAction = time.Now() total, used := c.dataStore.GetStats() log.Debugf("Successfully increased IP pool") @@ -585,40 +616,40 @@ func (c *IPAMContext) tryAllocateENI() { } // For an ENI, try to fill in missing IPs -func (c *IPAMContext) tryAssignIPs() { +func (c *IPAMContext) tryAssignIPs() (increasedPool bool, err error) { // if WARM_IP_TARGET is set, only proceed if we are short of target short, _, warmIPTargetDefined := c.ipTargetState() if warmIPTargetDefined && short == 0 { - return + return false, nil } maxIPPerENI, err := c.awsClient.GetENIipLimit() if err != nil { - log.Infof("Failed to retrieve ENI IP limit: %v", err) - return + return false, errors.Wrap(err, "failed to retrieve ENI IP limit during IP allocation") } eni := c.dataStore.GetENINeedsIP(maxIPPerENI, UseCustomNetworkCfg()) + if len(eni.IPv4Addresses) < maxIPPerENI { - log.Debugf("Found ENI %s that has less than the maximum number of IP addresses allocated: cur=%d, max=%d", - eni.ID, len(eni.IPv4Addresses), maxIPPerENI) + log.Debugf("Found ENI %s that has less than the maximum number of IP addresses allocated: cur=%d, max=%d", eni.ID, len(eni.IPv4Addresses), maxIPPerENI) err = c.awsClient.AllocIPAddresses(eni.ID, maxIPPerENI-len(eni.IPv4Addresses)) if err != nil { - log.Warnf("Failed to allocate all available ip addresses on an eni %s: %s", eni.ID, err) + return false, errors.Wrap(err,fmt.Sprintf("failed to allocate all available IP addresses on ENI %s", eni.ID)) } ec2Addrs, _, err := c.getENIaddresses(eni.ID) if err != nil { ipamdErrInc("increaseIPPoolGetENIaddressesFailed", err) - log.Warn("During eni repair: failed to get ENI ip addresses", err) - return + return true, errors.Wrap(err, "failed to get ENI IP addresses during IP allocation") } c.addENIaddressesToDataStore(ec2Addrs, eni.ID) + return true, nil } + return false, nil } // setupENI does following: @@ -782,29 +813,38 @@ func (c *IPAMContext) nodeIPPoolTooLow() bool { return short > 0 } - // If WARM-IP-TARGET not defined fallback using number of ENIs + // If WARM_IP_TARGET not defined fallback using number of ENIs warmENITarget := getWarmENITarget() total, used := c.dataStore.GetStats() logPoolStats(total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI) available := total - used - return available < c.maxAddrsPerENI*warmENITarget + return available < c.maxAddrsPerENI * warmENITarget } // nodeIPPoolTooHigh returns true if IP pool is above high threshold func (c *IPAMContext) nodeIPPoolTooHigh() bool { - warmENITarget := getWarmENITarget() - total, used := c.dataStore.GetStats() - logPoolStats(total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI) + _, over, warmIPTargetDefined := c.ipTargetState() + if warmIPTargetDefined { + return over > 0 + } - available := total - used + // We only ever report the pool being too high if WARM_IP_TARGET is set + return false +} - target := getWarmIPTarget() - if target != noWarmIPTarget { - return target > available +// shouldRemoveExtraENIs returns true if we should attempt to find an ENI to free. When WARM_IP_TARGET is set, we +// always check and do verification in getDeletableENI() +func (c *IPAMContext) shouldRemoveExtraENIs() bool { + _, _, warmIPTargetDefined := c.ipTargetState() + if warmIPTargetDefined { + return true } - return available >= (warmENITarget + 1) * c.maxAddrsPerENI + warmENITarget := getWarmENITarget() + total, used := c.dataStore.GetStats() + available := total - used + return available > warmENITarget * c.maxAddrsPerENI } func ipamdErrInc(fn string, err error) { diff --git a/pkg/awsutils/awsutils.go b/pkg/awsutils/awsutils.go index a52b386656..66ff41691e 100644 --- a/pkg/awsutils/awsutils.go +++ b/pkg/awsutils/awsutils.go @@ -851,8 +851,7 @@ func (cache *EC2InstanceMetadataCache) GetENILimit() (int, error) { eniLimit, ok := InstanceENIsAvailable[cache.instanceType] if !ok { - log.Errorf("Failed to get ENI limit due to unknown instance type %s", cache.instanceType) - return 0, errors.New(UnknownInstanceType) + return 0, errors.New(fmt.Sprintf("%s: %s", UnknownInstanceType, cache.instanceType)) } return eniLimit, nil }