Skip to content

Commit

Permalink
Fixes for IP warm IP target IP deallocation
Browse files Browse the repository at this point in the history
* Remove extra ENI when warm IP target is set
* Modify max ENI check to allow for IP increase when warm IP target is
  set (Fixes issue where IPs are not allocated due ENIs being full)
  • Loading branch information
nckturner committed May 1, 2019
1 parent 0d3b925 commit 24ba739
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 37 deletions.
49 changes: 43 additions & 6 deletions ipamd/datastore/data_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,21 +319,43 @@ func (ds *DataStore) GetStats() (int, int) {
return ds.total, ds.assigned
}

func (ds *DataStore) getDeletableENI() *ENIIPPool {
// IsRequiredForWarmIPTarget determines if this ENI has warm IPs that are required to fulfill whatever WARM_IP_TARGET is
// set to.
func (ds *DataStore) isRequiredForWarmIPTarget(warmIPTarget int, eni *ENIIPPool) bool {
otherWarmIPs := 0
for _, other := range ds.eniIPPools {
if other.ID != eni.ID {
otherWarmIPs += len(other.IPv4Addresses) - other.AssignedIPv4Addresses
}
}
log.Debugf("ENI %s otherWarmIPs: %d < warmIPTarget: %d: %v", eni.ID, otherWarmIPs, warmIPTarget, otherWarmIPs < warmIPTarget)
return otherWarmIPs < warmIPTarget
}

func (ds *DataStore) getDeletableENI(warmIPTarget int) *ENIIPPool {
for _, eni := range ds.eniIPPools {
if eni.IsPrimary {
log.Debugf("ENI %s cannot be deleted because it is primary", eni.ID)
continue
}

if eni.isTooYoung() {
log.Debugf("ENI %s cannot be deleted because it is too young", eni.ID)
continue
}

if time.Now().Sub(eni.createTime) < minLifeTime {
if eni.hasIPInCooling() {
log.Debugf("ENI %s cannot be deleted because has IPs in cooling", eni.ID)
continue
}

if time.Now().Sub(eni.lastUnassignedTime) < addressENICoolingPeriod {
if eni.hasPods() {
log.Debugf("ENI %s cannot be deleted because it has pods assigned", eni.ID)
continue
}

if eni.AssignedIPv4Addresses != 0 {
if warmIPTarget != 0 && ds.isRequiredForWarmIPTarget(warmIPTarget, eni) {
log.Debugf("ENI %s cannot be deleted because it is required for WARM_IP_TARGET: %d", eni.ID, warmIPTarget)
continue
}

Expand All @@ -343,6 +365,21 @@ func (ds *DataStore) getDeletableENI() *ENIIPPool {
return nil
}

// IsTooYoung returns true if the ENI hasn't been around long enough to be deleted.
func (e *ENIIPPool) isTooYoung() bool {
return time.Now().Sub(e.createTime) < minLifeTime
}

// HasIPInCooling returns true if an IP address was unassigned recently.
func (e *ENIIPPool) hasIPInCooling() bool {
return time.Now().Sub(e.lastUnassignedTime) < addressENICoolingPeriod
}

// HasPods returns true if the ENI has pods assigned to it.
func (e *ENIIPPool) hasPods() bool {
return e.AssignedIPv4Addresses != 0
}

// GetENINeedsIP finds an ENI in the datastore that needs more IP addresses allocated
func (ds *DataStore) GetENINeedsIP(maxIPperENI int, skipPrimary bool) *ENIIPPool {
for _, eni := range ds.eniIPPools {
Expand All @@ -362,11 +399,11 @@ func (ds *DataStore) GetENINeedsIP(maxIPperENI int, skipPrimary bool) *ENIIPPool
// RemoveUnusedENIFromStore removes a deletable ENI from the data store.
// It returns the name of the ENI which has been removed from the data store and needs to be deleted,
// or empty string if no ENI could be removed.
func (ds *DataStore) RemoveUnusedENIFromStore() string {
func (ds *DataStore) RemoveUnusedENIFromStore(warmIPTarget int) string {
ds.lock.Lock()
defer ds.lock.Unlock()

deletableENI := ds.getDeletableENI()
deletableENI := ds.getDeletableENI(warmIPTarget)
if deletableENI == nil {
log.Debugf("No ENI can be deleted at this time")
return ""
Expand Down
98 changes: 69 additions & 29 deletions ipamd/ipamd.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,11 @@ func (c *IPAMContext) nodeInit() error {

log.Debugf("Start node init")

instanceMaxENIs, _ := c.awsClient.GetENILimit()
instanceMaxENIs, err := c.awsClient.GetENILimit()
if err != nil {
log.Errorf("Failed to get ENI limit: %s")
}

maxENIs := getMaxENI(instanceMaxENIs)
if maxENIs >= 1 {
enisMax.Set(float64(maxENIs))
Expand Down Expand Up @@ -359,6 +363,10 @@ func (c *IPAMContext) updateIPPoolIfRequired() {
} else if c.nodeIPPoolTooHigh() {
c.decreaseIPPool(decreaseIPPoolInterval)
}

if c.shouldRemoveExtraENIs() {
c.tryFreeENI()
}
}

// decreaseIPPool runs every `interval` and attempts to return unused ENIs and IPs
Expand All @@ -375,7 +383,6 @@ func (c *IPAMContext) decreaseIPPool(interval time.Duration) {

log.Debugf("Starting to decrease IP pool")

c.tryFreeENI()
c.tryUnassignIPsFromAll()

c.lastDecreaseIPPool = now
Expand All @@ -387,7 +394,9 @@ func (c *IPAMContext) decreaseIPPool(interval time.Duration) {

// tryFreeENI always trys to free one ENI
func (c *IPAMContext) tryFreeENI() {
eni := c.dataStore.RemoveUnusedENIFromStore()
warmIPTarget := getWarmIPTarget()

eni := c.dataStore.RemoveUnusedENIFromStore(warmIPTarget)
if eni == "" {
log.Info("No ENI to remove, all ENIs have IPs in use")
return
Expand Down Expand Up @@ -495,23 +504,45 @@ func (c *IPAMContext) increaseIPPool() {
}

instanceMaxENIs, err := c.awsClient.GetENILimit()
if err != nil {
log.Errorf("Failed to get ENI limit: %s")
}

// instanceMaxENIs will be 0 if the instance type is unknown. In this case, getMaxENI returns 0 or will use
// MAX_ENI if it is set.
maxENIs := getMaxENI(instanceMaxENIs)
if maxENIs >= 1 {
enisMax.Set(float64(maxENIs))
}

if err == nil && maxENIs == c.dataStore.GetENIs() {
log.Debugf("Skipping increase IP pool due to max ENI already attached to the instance: %d", maxENIs)
// Unknown instance type and MAX_ENI is not set
if maxENIs == 0 {
log.Errorf("Unknown instance type and MAX_ENI is not set. Cannot increase IP pool.")
return
}
if (c.maxENI > 0) && (c.maxENI == c.dataStore.GetENIs()) {
log.Debugf("Skipping increase IP pool due to max ENI already attached to the instance: %d", c.maxENI)
return

if c.dataStore.GetENIs() < maxENIs {
// c.maxENI represent the discovered maximum number of ENIs
if (c.maxENI > 0) && (c.maxENI == c.dataStore.GetENIs()) {
log.Debugf("Skipping ENI allocation due to max ENI already attached to the instance: %d", c.maxENI)
} else {
c.tryAllocateENI()
c.updateLastNodeIPPoolAction()
}
} else {
log.Debugf("Skipping ENI allocation due to max ENI already attached to the instance: %d", maxENIs)
}

c.tryAllocateENI()
c.tryAssignIPs()
increasedPool, err := c.tryAssignIPs()
if err != nil {
log.Errorf(err.Error())
}
if increasedPool {
c.updateLastNodeIPPoolAction()
}
}

func (c *IPAMContext) updateLastNodeIPPoolAction() {
c.lastNodeIPPoolAction = time.Now()
total, used := c.dataStore.GetStats()
log.Debugf("Successfully increased IP pool")
Expand Down Expand Up @@ -585,40 +616,40 @@ func (c *IPAMContext) tryAllocateENI() {
}

// For an ENI, try to fill in missing IPs
func (c *IPAMContext) tryAssignIPs() {
func (c *IPAMContext) tryAssignIPs() (increasedPool bool, err error) {

// if WARM_IP_TARGET is set, only proceed if we are short of target
short, _, warmIPTargetDefined := c.ipTargetState()
if warmIPTargetDefined && short == 0 {
return
return false, nil
}

maxIPPerENI, err := c.awsClient.GetENIipLimit()
if err != nil {
log.Infof("Failed to retrieve ENI IP limit: %v", err)
return
return false, errors.Wrap(err, "failed to retrieve ENI IP limit during IP allocation")
}

eni := c.dataStore.GetENINeedsIP(maxIPPerENI, UseCustomNetworkCfg())


if len(eni.IPv4Addresses) < maxIPPerENI {
log.Debugf("Found ENI %s that has less than the maximum number of IP addresses allocated: cur=%d, max=%d",
eni.ID, len(eni.IPv4Addresses), maxIPPerENI)
log.Debugf("Found ENI %s that has less than the maximum number of IP addresses allocated: cur=%d, max=%d", eni.ID, len(eni.IPv4Addresses), maxIPPerENI)

err = c.awsClient.AllocIPAddresses(eni.ID, maxIPPerENI-len(eni.IPv4Addresses))
if err != nil {
log.Warnf("Failed to allocate all available ip addresses on an eni %s: %s", eni.ID, err)
return false, errors.Wrap(err,fmt.Sprintf("failed to allocate all available IP addresses on ENI %s", eni.ID))
}

ec2Addrs, _, err := c.getENIaddresses(eni.ID)
if err != nil {
ipamdErrInc("increaseIPPoolGetENIaddressesFailed", err)
log.Warn("During eni repair: failed to get ENI ip addresses", err)
return
return true, errors.Wrap(err, "failed to get ENI IP addresses during IP allocation")
}

c.addENIaddressesToDataStore(ec2Addrs, eni.ID)
return true, nil
}
return false, nil
}

// setupENI does following:
Expand Down Expand Up @@ -782,29 +813,38 @@ func (c *IPAMContext) nodeIPPoolTooLow() bool {
return short > 0
}

// If WARM-IP-TARGET not defined fallback using number of ENIs
// If WARM_IP_TARGET not defined fallback using number of ENIs
warmENITarget := getWarmENITarget()
total, used := c.dataStore.GetStats()
logPoolStats(total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)

available := total - used
return available < c.maxAddrsPerENI*warmENITarget
return available < c.maxAddrsPerENI * warmENITarget
}

// nodeIPPoolTooHigh returns true if IP pool is above high threshold
func (c *IPAMContext) nodeIPPoolTooHigh() bool {
warmENITarget := getWarmENITarget()
total, used := c.dataStore.GetStats()
logPoolStats(total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)
_, over, warmIPTargetDefined := c.ipTargetState()
if warmIPTargetDefined {
return over > 0
}

available := total - used
// We only ever report the pool being too high if WARM_IP_TARGET is set
return false
}

target := getWarmIPTarget()
if target != noWarmIPTarget {
return target > available
// shouldRemoveExtraENIs returns true if we should attempt to find an ENI to free. When WARM_IP_TARGET is set, we
// always check and do verification in getDeletableENI()
func (c *IPAMContext) shouldRemoveExtraENIs() bool {
_, _, warmIPTargetDefined := c.ipTargetState()
if warmIPTargetDefined {
return true
}

return available >= (warmENITarget + 1) * c.maxAddrsPerENI
warmENITarget := getWarmENITarget()
total, used := c.dataStore.GetStats()
available := total - used
return available > warmENITarget * c.maxAddrsPerENI
}

func ipamdErrInc(fn string, err error) {
Expand Down
3 changes: 1 addition & 2 deletions pkg/awsutils/awsutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -851,8 +851,7 @@ func (cache *EC2InstanceMetadataCache) GetENILimit() (int, error) {
eniLimit, ok := InstanceENIsAvailable[cache.instanceType]

if !ok {
log.Errorf("Failed to get ENI limit due to unknown instance type %s", cache.instanceType)
return 0, errors.New(UnknownInstanceType)
return 0, errors.New(fmt.Sprintf("%s: %s", UnknownInstanceType, cache.instanceType))
}
return eniLimit, nil
}
Expand Down

0 comments on commit 24ba739

Please sign in to comment.