Skip to content

Commit 1e82e2a

Browse files
authored
Add metrics for leaked ENI cleanup routine (#328)
Add metrics for leaked ENI cleanup routine
1 parent c43b62a commit 1e82e2a

File tree

3 files changed

+37
-2
lines changed

3 files changed

+37
-2
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ MAKEFILE_PATH = $(dir $(realpath -s $(firstword $(MAKEFILE_LIST))))
1212
VERSION ?= $(GIT_VERSION)
1313
IMAGE ?= $(REPO):$(VERSION)
1414
BASE_IMAGE ?= public.ecr.aws/eks-distro-build-tooling/eks-distro-minimal-base-nonroot:latest.2
15-
BUILD_IMAGE ?= public.ecr.aws/bitnami/golang:1.20.5
15+
BUILD_IMAGE ?= public.ecr.aws/bitnami/golang:1.21.3
1616
GOARCH ?= amd64
1717
PLATFORM ?= linux/amd64
1818

pkg/aws/ec2/api/eni_cleanup.go

+32
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import (
2020

2121
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
2222
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
23+
"github.com/prometheus/client_golang/prometheus"
24+
"golang.org/x/exp/slices"
2325

2426
"github.com/aws/aws-sdk-go/aws"
2527
"github.com/aws/aws-sdk-go/service/ec2"
@@ -39,6 +41,21 @@ type ENICleaner struct {
3941
ctx context.Context
4042
}
4143

44+
var (
45+
vpcCniLeakedENICleanupCnt = prometheus.NewCounter(
46+
prometheus.CounterOpts{
47+
Name: "vpc_cni_created_leaked_eni_cleanup_count",
48+
Help: "The number of leaked ENIs created by VPC-CNI that is cleaned up by the controller",
49+
},
50+
)
51+
vpcrcLeakedENICleanupCnt = prometheus.NewCounter(
52+
prometheus.CounterOpts{
53+
Name: "vpc_rc_created_leaked_eni_cleanup_count",
54+
Help: "The number of leaked ENIs created by VPC-RC that is cleaned up by the controller",
55+
},
56+
)
57+
)
58+
4259
func (e *ENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error {
4360
e.clusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName)
4461
e.availableENIs = make(map[string]struct{})
@@ -113,6 +130,21 @@ func (e *ENICleaner) cleanUpAvailableENIs() {
113130

114131
for _, networkInterface := range describeNetworkInterfaceOp.NetworkInterfaces {
115132
if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists {
133+
// Increment promethues metrics for number of leaked ENIs cleaned up
134+
if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool {
135+
return *tag.Key == config.NetworkInterfaceOwnerTagKey
136+
}); tagIdx != -1 {
137+
switch *networkInterface.TagSet[tagIdx].Value {
138+
case config.NetworkInterfaceOwnerTagValue:
139+
vpcrcLeakedENICleanupCnt.Inc()
140+
case config.NetworkInterfaceOwnerVPCCNITagValue:
141+
vpcCniLeakedENICleanupCnt.Inc()
142+
default:
143+
// We will not hit this case as we only filter for above two tag values, adding it for any future use cases
144+
e.Log.Info("found available ENI not created by VPC-CNI/VPC-RC")
145+
}
146+
}
147+
116148
// The ENI in available state has been sitting for at least the eni clean up interval and it should
117149
// be removed
118150
_, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{

pkg/aws/ec2/api/wrapper.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,10 @@ func prometheusRegister() {
344344
ec2describeTrunkInterfaceAssociationAPIErrCnt,
345345
ec2modifyNetworkInterfaceAttributeAPICallCnt,
346346
ec2modifyNetworkInterfaceAttributeAPIErrCnt,
347-
ec2APICallLatencies)
347+
ec2APICallLatencies,
348+
vpcCniLeakedENICleanupCnt,
349+
vpcrcLeakedENICleanupCnt,
350+
)
348351

349352
prometheusRegistered = true
350353
}

0 commit comments

Comments
 (0)