Skip to content

Commit

Permalink
Add config option for number of warm ENIs get allocated
Browse files Browse the repository at this point in the history
  • Loading branch information
liwenwu-amazon committed May 18, 2018
1 parent d69852b commit 90c3243
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 20 deletions.
4 changes: 2 additions & 2 deletions ipamd/datastore/data_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ var (
enis = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "eni_allocated",
Help: "The number of ENI allocated",
Help: "The number of ENIs allocated",
},
)
totalIPs = prometheus.NewGauge(
Expand All @@ -62,7 +62,7 @@ var (
assignedIPs = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "assigned_ip_addresses",
Help: "The number of IP addresses assigned",
Help: "The number of IP addresses assigned to pods",
},
)
prometheusRegistered = false
Expand Down
75 changes: 60 additions & 15 deletions ipamd/ipamd.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ package ipamd

import (
"net"
"os"
"strconv"
"strings"
"time"

Expand All @@ -40,27 +42,34 @@ const (
ipPoolMonitorInterval = 5 * time.Second
maxRetryCheckENI = 5
eniAttachTime = 10 * time.Second
defaultWarmENITarget = 1
)

var (
ipamdErr = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "ipamd_error_count",
Help: "the number of errors encountered in ipamd",
Help: "The number of errors encountered in ipamd",
},
[]string{"fn", "error"},
)
ipamdActionsInprogress = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "ipamd_action_inprogress",
Help: "the number of ipamd actions inprogress",
Help: "The number of ipamd actions inprogress",
},
[]string{"fn"},
)
enisMax = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "eni_max",
Help: "The number of maximum ENIs can be attached to the instance",
Help: "The maximum number of ENIs that can be attached to the instance",
},
)
ipMax = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "ip_max",
Help: "The maximum number of IP addresses that can be allocated to the instance",
},
)
prometheusRegistered = false
Expand All @@ -86,6 +95,7 @@ func prometheusRegister() {
prometheus.MustRegister(ipamdErr)
prometheus.MustRegister(ipamdActionsInprogress)
prometheus.MustRegister(enisMax)
prometheus.MustRegister(ipMax)
prometheusRegistered = true
}
}
Expand Down Expand Up @@ -117,8 +127,16 @@ func New() (*IPAMContext, error) {

//TODO need to break this function down(comments from CR)
func (c *IPAMContext) nodeInit() error {
ipamdActionsInprogress.WithLabelValues("nodeInit").Add(float64(1))
defer ipamdActionsInprogress.WithLabelValues("nodeInit").Sub(float64(1))
maxENIs, err := c.awsClient.GetENILimit()
enisMax.Set(float64(maxENIs))
if err == nil {
enisMax.Set(float64(maxENIs))
}
maxIPs, err := c.awsClient.GetENIipLimit()
if err == nil {
ipMax.Set(float64(maxIPs * int64(maxENIs)))
}
enis, err := c.awsClient.GetAttachedENIs()
if err != nil {
log.Error("Failed to retrive ENI info")
Expand Down Expand Up @@ -240,8 +258,8 @@ func (c *IPAMContext) decreaseIPPool() {
log.Debugf("Start freeing eni %s", eni)
c.awsClient.FreeENI(eni)
total, used := c.dataStore.GetStats()
log.Debugf("Successfully decreased IP Pool: total=%d, used=%d, c.currentMaxAddrsPerENI =%d, c.maxAddrsPerENI = %d",
total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)
log.Debugf("Successfully decreased IP Pool")
logPoolStats(total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)
}

func isAttachmentLimitExceededError(err error) bool {
Expand All @@ -260,6 +278,10 @@ func (c *IPAMContext) increaseIPPool() {
return
}
if (c.maxENI > 0) && (c.maxENI == c.dataStore.GetENIs()) {
if c.maxENI < maxENIs {
errString := "desired: " + strconv.FormatInt(int64(maxENIs), 10) + "current: " + strconv.FormatInt(int64(c.maxENI), 10)
ipamdErrInc("unExpectedMaxENIAttached", errors.New(errString))
}
log.Debugf("Skipping increase IPPOOL due to max ENI already attached to the instance : %d", c.maxENI)
return
}
Expand Down Expand Up @@ -297,8 +319,8 @@ func (c *IPAMContext) increaseIPPool() {
return
}
total, used := c.dataStore.GetStats()
log.Debugf("Successfully increased IP Pool: total=%d, used=%d, c.currentMaxAddrsPerENI =%d, c.maxAddrsPerENI = %d",
total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)
log.Debugf("Successfully increased IP Pool")
logPoolStats(total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)
}

// setupENI does following:
Expand Down Expand Up @@ -400,23 +422,46 @@ func (c *IPAMContext) waitENIAttached(eni string) (awsutils.ENIMetadata, error)
}
}

func getWarmENITarget() int {
inputStr, found := os.LookupEnv("WARM_ENI_TARGET")

if !found {
return defaultWarmENITarget
}

if input, err := strconv.Atoi(inputStr); err == nil {
if input < 0 {
return defaultWarmENITarget
}
log.Debugf("Using WARM-ENI-TARGET %v", input)
return input
}
return defaultWarmENITarget
}

func logPoolStats(total, used, currentMaxAddrsPerENI, maxAddrsPerENI int) {
log.Debugf("IP pool stats: total = %d, used = %d, c.currentMaxAddrsPerENI = %d, c.maxAddrsPerENI = %d",
total, used, currentMaxAddrsPerENI, maxAddrsPerENI)
}

//nodeIPPoolTooLow returns true if IP pool is below low threshhold
func (c *IPAMContext) nodeIPPoolTooLow() bool {
warmENITarget := getWarmENITarget()
total, used := c.dataStore.GetStats()
log.Debugf("IP pool stats: total=%d, used=%d, c.currentMaxAddrsPerENI =%d, c.maxAddrsPerENI = %d",
total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)
logPoolStats(total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)

return ((total - used) <= c.currentMaxAddrsPerENI)
available := total - used
return (available <= c.currentMaxAddrsPerENI*warmENITarget)
}

// NodeIPPoolTooHigh returns true if IP pool is above high threshhold
func (c *IPAMContext) nodeIPPoolTooHigh() bool {
warmENITarget := getWarmENITarget()
total, used := c.dataStore.GetStats()
logPoolStats(total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)

log.Debugf("IP pool stats: total=%d, used=%d, c.currentMaxAddrsPerENI =%d, c.maxAddrsPerENI = %d",
total, used, c.currentMaxAddrsPerENI, c.maxAddrsPerENI)

return (total-used > 2*c.currentMaxAddrsPerENI)
available := total - used
return (available > (warmENITarget+1)*c.currentMaxAddrsPerENI)

}

Expand Down
5 changes: 4 additions & 1 deletion misc/cni_metrics_helper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ spec:
spec:
serviceAccountName: cni-metrics-helper
containers:
- image: 694065802095.dkr.ecr.us-west-2.amazonaws.com/cni-metrics-helper:0.1.0
- image: 694065802095.dkr.ecr.us-west-2.amazonaws.com/cni-metrics-helper:0.1.1
imagePullPolicy: Always
name: cni-metrics-helper
env:
- name: USE_CLOUDWATCH
value: "no"
4 changes: 2 additions & 2 deletions pkg/awsutils/awsutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ var (
awsAPIErr = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "aws_api_error_count",
Help: "the number of times AWS API returns an err",
Help: "The number of times AWS API returns an error",
},
[]string{"api", "error"},
)
awsUtilsErr = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "aws_utils_error_count",
Help: " the number of errors not handled in awsutils library",
Help: "The number of errors not handled in awsutils library",
},
[]string{"fn", "error"},
)
Expand Down

0 comments on commit 90c3243

Please sign in to comment.