diff --git a/main.go b/main.go index 5b867f29..888cac68 100644 --- a/main.go +++ b/main.go @@ -107,6 +107,7 @@ func main() { var healthCheckTimeout int var enableWindowsPrefixDelegation bool var region string + var vpcID string flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") @@ -141,6 +142,7 @@ func main() { flag.BoolVar(&enableWindowsPrefixDelegation, "enable-windows-prefix-delegation", false, "Enable the feature flag for Windows prefix delegation") flag.StringVar(®ion, "aws-region", "", "The aws region of the k8s cluster") + flag.StringVar(&vpcID, "vpc-id", "", "The vpc-id where EKS cluster is deployed") flag.Parse() @@ -183,6 +185,11 @@ func main() { os.Exit(1) } + if vpcID == "" { + setupLog.Error(fmt.Errorf("vpc-id is a required parameter"), "unable to start the controller") + os.Exit(1) + } + // Profiler disabled by default, to enable set the enableProfiling argument if enableProfiling { // To use the profiler - https://golang.org/pkg/net/http/pprof/ @@ -336,6 +343,7 @@ func main() { EC2Wrapper: ec2Wrapper, ClusterName: clusterName, Log: ctrl.Log.WithName("eni cleaner"), + VPCID: vpcID, }).SetupWithManager(ctx, mgr, healthzHandler); err != nil { setupLog.Error(err, "unable to start eni cleaner") os.Exit(1) diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go index 18f1c8f4..19f7e104 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go @@ -196,18 +196,18 @@ func (mr *MockEC2APIHelperMockRecorder) DetachNetworkInterfaceFromInstance(arg0 } // GetBranchNetworkInterface mocks base method. -func (m *MockEC2APIHelper) GetBranchNetworkInterface(arg0 *string) ([]*ec2.NetworkInterface, error) { +func (m *MockEC2APIHelper) GetBranchNetworkInterface(arg0, arg1 *string) ([]*ec2.NetworkInterface, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetBranchNetworkInterface", arg0) + ret := m.ctrl.Call(m, "GetBranchNetworkInterface", arg0, arg1) ret0, _ := ret[0].([]*ec2.NetworkInterface) ret1, _ := ret[1].(error) return ret0, ret1 } // GetBranchNetworkInterface indicates an expected call of GetBranchNetworkInterface. -func (mr *MockEC2APIHelperMockRecorder) GetBranchNetworkInterface(arg0 interface{}) *gomock.Call { +func (mr *MockEC2APIHelperMockRecorder) GetBranchNetworkInterface(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetBranchNetworkInterface", reflect.TypeOf((*MockEC2APIHelper)(nil).GetBranchNetworkInterface), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetBranchNetworkInterface", reflect.TypeOf((*MockEC2APIHelper)(nil).GetBranchNetworkInterface), arg0, arg1) } // GetInstanceDetails mocks base method. diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go index f40d94c6..d89a5b9d 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go @@ -182,6 +182,21 @@ func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfaces(arg0 interface{} return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfaces", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfaces), arg0) } +// DescribeNetworkInterfacesPages mocks base method. +func (m *MockEC2Wrapper) DescribeNetworkInterfacesPages(arg0 *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DescribeNetworkInterfacesPages", arg0) + ret0, _ := ret[0].([]*ec2.NetworkInterface) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DescribeNetworkInterfacesPages indicates an expected call of DescribeNetworkInterfacesPages. +func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfacesPages(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfacesPages", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfacesPages), arg0) +} + // DescribeSubnets mocks base method. func (m *MockEC2Wrapper) DescribeSubnets(arg0 *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) { m.ctrl.T.Helper() diff --git a/pkg/aws/ec2/api/eni_cleanup.go b/pkg/aws/ec2/api/eni_cleanup.go index 6f3db155..e8f265e4 100644 --- a/pkg/aws/ec2/api/eni_cleanup.go +++ b/pkg/aws/ec2/api/eni_cleanup.go @@ -34,6 +34,7 @@ type ENICleaner struct { EC2Wrapper EC2Wrapper ClusterName string Log logr.Logger + VPCID string availableENIs map[string]struct{} shutdown bool @@ -42,16 +43,22 @@ type ENICleaner struct { } var ( - vpcCniLeakedENICleanupCnt = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "vpc_cni_created_leaked_eni_cleanup_count", - Help: "The number of leaked ENIs created by VPC-CNI that is cleaned up by the controller", + vpccniAvailableENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "vpc_cni_created_available_eni_count", + Help: "The number of available ENIs created by VPC-CNI that controller will try to delete in each cleanup cycle", }, ) - vpcrcLeakedENICleanupCnt = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "vpc_rc_created_leaked_eni_cleanup_count", - Help: "The number of leaked ENIs created by VPC-RC that is cleaned up by the controller", + vpcrcAvailableENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "vpc_rc_created_available_eni_count", + Help: "The number of available ENIs created by VPC-RC that controller will try to delete in each cleanup cycle", + }, + ) + leakedENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "leaked_eni_count", + Help: "The number of available ENIs that failed to be deleted by the controller in each cleanup cycle", }, ) ) @@ -101,6 +108,9 @@ func (e *ENICleaner) Start(ctx context.Context) error { // interval between cycle 1 and 2 and hence can be safely deleted. And we can also conclude that Interface 1 was // created but not attached at the the time when 1st cycle ran and hence it should not be deleted. func (e *ENICleaner) cleanUpAvailableENIs() { + vpcrcAvailableCount := 0 + vpccniAvailableCount := 0 + leakedENICount := 0 describeNetworkInterfaceIp := &ec2.DescribeNetworkInterfacesInput{ Filters: []*ec2.Filter{ { @@ -116,63 +126,65 @@ func (e *ENICleaner) cleanUpAvailableENIs() { Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, config.NetworkInterfaceOwnerVPCCNITagValue}), }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(e.VPCID)}, + }, }, } availableENIs := make(map[string]struct{}) - for { - describeNetworkInterfaceOp, err := e.EC2Wrapper.DescribeNetworkInterfaces(describeNetworkInterfaceIp) - if err != nil { - e.Log.Error(err, "failed to describe network interfaces, will retry") - return - } - - for _, networkInterface := range describeNetworkInterfaceOp.NetworkInterfaces { - if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists { - // Increment promethues metrics for number of leaked ENIs cleaned up - if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool { - return *tag.Key == config.NetworkInterfaceOwnerTagKey - }); tagIdx != -1 { - switch *networkInterface.TagSet[tagIdx].Value { - case config.NetworkInterfaceOwnerTagValue: - vpcrcLeakedENICleanupCnt.Inc() - case config.NetworkInterfaceOwnerVPCCNITagValue: - vpcCniLeakedENICleanupCnt.Inc() - default: - // We will not hit this case as we only filter for above two tag values, adding it for any future use cases - e.Log.Info("found available ENI not created by VPC-CNI/VPC-RC") - } - } + networkInterfaces, err := e.EC2Wrapper.DescribeNetworkInterfacesPages(describeNetworkInterfaceIp) + if err != nil { + e.Log.Error(err, "failed to describe network interfaces, cleanup will be retried in next cycle") + return + } - // The ENI in available state has been sitting for at least the eni clean up interval and it should - // be removed - _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ - NetworkInterfaceId: networkInterface.NetworkInterfaceId, - }) - if err != nil { - // Log and continue, if the ENI is still present it will be cleaned up in next 2 cycles - e.Log.Error(err, "failed to delete the dangling network interface", - "id", *networkInterface.NetworkInterfaceId) + for _, networkInterface := range networkInterfaces { + if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists { + // Increment promethues metrics for number of leaked ENIs cleaned up + if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool { + return *tag.Key == config.NetworkInterfaceOwnerTagKey + }); tagIdx != -1 { + switch *networkInterface.TagSet[tagIdx].Value { + case config.NetworkInterfaceOwnerTagValue: + vpcrcAvailableCount += 1 + case config.NetworkInterfaceOwnerVPCCNITagValue: + vpccniAvailableCount += 1 + default: + // We should not hit this case as we only filter for relevant tag values, log error and continue if unexpected ENIs found + e.Log.Error(fmt.Errorf("found available ENI not created by VPC-CNI/VPC-RC"), "eniID", *networkInterface.NetworkInterfaceId) continue } - e.Log.Info("deleted dangling ENI successfully", - "eni id", networkInterface.NetworkInterfaceId) - } else { - // Seeing the ENI for the first time, add it to the new list of available network interfaces - availableENIs[*networkInterface.NetworkInterfaceId] = struct{}{} - e.Log.V(1).Info("adding eni to to the map of available ENIs, will be removed if present in "+ - "next run too", "id", *networkInterface.NetworkInterfaceId) } - } - if describeNetworkInterfaceOp.NextToken == nil { - break + // The ENI in available state has been sitting for at least the eni clean up interval and it should + // be removed + _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ + NetworkInterfaceId: networkInterface.NetworkInterfaceId, + }) + if err != nil { + leakedENICount += 1 + // Log and continue, if the ENI is still present it will be cleaned up in next 2 cycles + e.Log.Error(err, "failed to delete the dangling network interface", + "id", *networkInterface.NetworkInterfaceId) + continue + } + e.Log.Info("deleted dangling ENI successfully", + "eni id", networkInterface.NetworkInterfaceId) + } else { + // Seeing the ENI for the first time, add it to the new list of available network interfaces + availableENIs[*networkInterface.NetworkInterfaceId] = struct{}{} + e.Log.V(1).Info("adding eni to to the map of available ENIs, will be removed if present in "+ + "next run too", "id", *networkInterface.NetworkInterfaceId) } - - describeNetworkInterfaceIp.NextToken = describeNetworkInterfaceOp.NextToken } + // Update leaked ENI metrics + vpcrcAvailableENICnt.Set(float64(vpcrcAvailableCount)) + vpccniAvailableENICnt.Set(float64(vpccniAvailableCount)) + leakedENICnt.Set(float64(leakedENICount)) // Set the available ENIs to the list of ENIs seen in the current cycle e.availableENIs = availableENIs } diff --git a/pkg/aws/ec2/api/eni_cleanup_test.go b/pkg/aws/ec2/api/eni_cleanup_test.go index 199f6368..484a0722 100644 --- a/pkg/aws/ec2/api/eni_cleanup_test.go +++ b/pkg/aws/ec2/api/eni_cleanup_test.go @@ -37,6 +37,8 @@ var ( mockNetworkInterfaceId2 = "eni-000000000000001" mockNetworkInterfaceId3 = "eni-000000000000002" + mockVPCID = "vpc-0000000000000000" + mockDescribeNetworkInterfaceIp = &ec2.DescribeNetworkInterfacesInput{ Filters: []*ec2.Filter{ { @@ -52,19 +54,19 @@ var ( Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, config.NetworkInterfaceOwnerVPCCNITagValue}), }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(mockVPCID)}, + }, }, } - mockDescribeInterfaceOpWith1And2 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{ - {NetworkInterfaceId: &mockNetworkInterfaceId1}, - {NetworkInterfaceId: &mockNetworkInterfaceId2}, - }, + mockDescribeInterfaceOpWith1And2 = []*ec2.NetworkInterface{ + {NetworkInterfaceId: &mockNetworkInterfaceId1}, + {NetworkInterfaceId: &mockNetworkInterfaceId2}, } - mockDescribeInterfaceOpWith1And3 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{ - {NetworkInterfaceId: &mockNetworkInterfaceId1}, - {NetworkInterfaceId: &mockNetworkInterfaceId3}, - }, + mockDescribeInterfaceOpWith1And3 = []*ec2.NetworkInterface{ + {NetworkInterfaceId: &mockNetworkInterfaceId1}, + {NetworkInterfaceId: &mockNetworkInterfaceId3}, } ) @@ -74,6 +76,7 @@ func getMockENICleaner(ctrl *gomock.Controller) (*ENICleaner, *mock_api.MockEC2W EC2Wrapper: mockEC2Wrapper, availableENIs: map[string]struct{}{}, Log: zap.New(zap.UseDevMode(true)), + VPCID: mockVPCID, clusterNameTagKey: mockClusterNameTagKey, ctx: context.Background(), }, mockEC2Wrapper @@ -85,10 +88,10 @@ func TestENICleaner_cleanUpAvailableENIs(t *testing.T) { gomock.InOrder( // Return network interface 1 and 2 in first cycle - mockWrapper.EXPECT().DescribeNetworkInterfaces(mockDescribeNetworkInterfaceIp). + mockWrapper.EXPECT().DescribeNetworkInterfacesPages(mockDescribeNetworkInterfaceIp). Return(mockDescribeInterfaceOpWith1And2, nil), // Return network interface 1 and 3 in the second cycle - mockWrapper.EXPECT().DescribeNetworkInterfaces(mockDescribeNetworkInterfaceIp). + mockWrapper.EXPECT().DescribeNetworkInterfacesPages(mockDescribeNetworkInterfaceIp). Return(mockDescribeInterfaceOpWith1And3, nil), // Expect to delete the network interface 1 mockWrapper.EXPECT().DeleteNetworkInterface( diff --git a/pkg/aws/ec2/api/helper.go b/pkg/aws/ec2/api/helper.go index 3a6cb3ea..c8d31544 100644 --- a/pkg/aws/ec2/api/helper.go +++ b/pkg/aws/ec2/api/helper.go @@ -79,7 +79,7 @@ type EC2APIHelper interface { ipResourceCount *config.IPResourceCount, interfaceType *string) (*ec2.NetworkInterface, error) DeleteNetworkInterface(interfaceId *string) error GetSubnet(subnetId *string) (*ec2.Subnet, error) - GetBranchNetworkInterface(trunkID *string) ([]*ec2.NetworkInterface, error) + GetBranchNetworkInterface(trunkID *string, subnetID *string) ([]*ec2.NetworkInterface, error) GetInstanceNetworkInterface(instanceId *string) ([]*ec2.InstanceNetworkInterface, error) DescribeNetworkInterfaces(nwInterfaceIds []*string) ([]*ec2.NetworkInterface, error) DescribeTrunkInterfaceAssociation(trunkInterfaceId *string) ([]*ec2.TrunkInterfaceAssociation, error) @@ -562,43 +562,20 @@ func (h *ec2APIHelper) UnassignIPv4Resources(eniID string, resourceType config.R return err } -func (h *ec2APIHelper) GetBranchNetworkInterface(trunkID *string) ([]*ec2.NetworkInterface, error) { - filters := []*ec2.Filter{{ - Name: aws.String("tag:" + config.TrunkENIIDTag), - Values: []*string{trunkID}, - }} - - describeNetworkInterfacesInput := &ec2.DescribeNetworkInterfacesInput{Filters: filters} - var nwInterfaces []*ec2.NetworkInterface - for { - describeNetworkInterfaceOutput, err := h.ec2Wrapper.DescribeNetworkInterfaces(describeNetworkInterfacesInput) - if err != nil { - return nil, err - } - - if describeNetworkInterfaceOutput == nil || describeNetworkInterfaceOutput.NetworkInterfaces == nil || - len(describeNetworkInterfaceOutput.NetworkInterfaces) == 0 { - // No more interface associated with the trunk, return the result - break - } - - // One or more interface associated with the trunk, return the result - for _, nwInterface := range describeNetworkInterfaceOutput.NetworkInterfaces { - // Only attach the required details to avoid consuming extra memory - nwInterfaces = append(nwInterfaces, &ec2.NetworkInterface{ - NetworkInterfaceId: nwInterface.NetworkInterfaceId, - TagSet: nwInterface.TagSet, - }) - } - - if describeNetworkInterfaceOutput.NextToken == nil { - break - } - - describeNetworkInterfacesInput.NextToken = describeNetworkInterfaceOutput.NextToken +func (h *ec2APIHelper) GetBranchNetworkInterface(trunkID *string, subnetID *string) ([]*ec2.NetworkInterface, error) { + filters := []*ec2.Filter{ + { + Name: aws.String("tag:" + config.TrunkENIIDTag), + Values: []*string{trunkID}, + }, + { + Name: aws.String("subnet-id"), + Values: []*string{subnetID}, + }, } - return nwInterfaces, nil + describeNetworkInterfacesInput := &ec2.DescribeNetworkInterfacesInput{Filters: filters} + return h.ec2Wrapper.DescribeNetworkInterfacesPages(describeNetworkInterfacesInput) } // DetachAndDeleteNetworkInterface detaches the network interface first and then deletes it diff --git a/pkg/aws/ec2/api/helper_test.go b/pkg/aws/ec2/api/helper_test.go index 971e8211..38cb16bc 100644 --- a/pkg/aws/ec2/api/helper_test.go +++ b/pkg/aws/ec2/api/helper_test.go @@ -179,27 +179,20 @@ var ( tokenID = "token" - describeTrunkInterfaceInput1 = &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{{ - Name: aws.String("tag:" + config.TrunkENIIDTag), - Values: []*string{&trunkInterfaceId}, - }}, - } - describeTrunkInterfaceInput2 = &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{{ - Name: aws.String("tag:" + config.TrunkENIIDTag), - Values: []*string{&trunkInterfaceId}, - }}, - NextToken: &tokenID, + describeTrunkInterfaceInput = &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("tag:" + config.TrunkENIIDTag), + Values: []*string{&trunkInterfaceId}, + }, + { + Name: aws.String("subnet-id"), + Values: aws.StringSlice([]string{subnetId}), + }, + }, } - describeTrunkInterfaceOutput1 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface1}, - NextToken: &tokenID, - } - describeTrunkInterfaceOutput2 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface2}, - } + describeTrunkInterfaceOutput = []*ec2.NetworkInterface{&networkInterface1, &networkInterface2} describeTrunkInterfaceAssociationsInput = &ec2.DescribeTrunkInterfaceAssociationsInput{ Filters: []*ec2.Filter{{ @@ -1178,16 +1171,15 @@ func TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Prefix_Describ } // TestEc2APIHelper_GetBranchNetworkInterface_PaginatedResults returns the branch interface when paginated results is returned -func TestEc2APIHelper_GetBranchNetworkInterface_PaginatedResults(t *testing.T) { +func TestEc2APIHelper_GetBranchNetworkInterface(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput1).Return(describeTrunkInterfaceOutput1, nil) - mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput2).Return(describeTrunkInterfaceOutput2, nil) + mockWrapper.EXPECT().DescribeNetworkInterfacesPages(describeTrunkInterfaceInput).Return(describeTrunkInterfaceOutput, nil) - branchInterfaces, err := ec2ApiHelper.GetBranchNetworkInterface(&trunkInterfaceId) + branchInterfaces, err := ec2ApiHelper.GetBranchNetworkInterface(&trunkInterfaceId, &subnetId) assert.NoError(t, err) assert.ElementsMatch(t, []*ec2.NetworkInterface{&networkInterface1, &networkInterface2}, branchInterfaces) } diff --git a/pkg/aws/ec2/api/wrapper.go b/pkg/aws/ec2/api/wrapper.go index bcf4cc74..7c7fdc78 100644 --- a/pkg/aws/ec2/api/wrapper.go +++ b/pkg/aws/ec2/api/wrapper.go @@ -21,6 +21,7 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + "k8s.io/apimachinery/pkg/util/wait" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" @@ -52,6 +53,7 @@ type EC2Wrapper interface { AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) UnassignPrivateIPAddresses(input *ec2.UnassignPrivateIpAddressesInput) (*ec2.UnassignPrivateIpAddressesOutput, error) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) + DescribeNetworkInterfacesPages(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) DescribeSubnets(input *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) AssociateTrunkInterface(input *ec2.AssociateTrunkInterfaceInput) (*ec2.AssociateTrunkInterfaceOutput, error) @@ -307,6 +309,19 @@ var ( }, ) + ec2DescribeNetworkInterfacesPagesAPICallCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_describe_network_interfaces_pages_api_call_count", + Help: "The number of calls made to describe network interfaces (paginated)", + }, + ) + ec2DescribeNetworkInterfacesPagesAPIErrCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_describe_network_interfaces_pages_api_err_count", + Help: "The number of errors encountered while making call to describe network interfaces (paginated)", + }, + ) + prometheusRegistered = false ) @@ -345,8 +360,11 @@ func prometheusRegister() { ec2modifyNetworkInterfaceAttributeAPICallCnt, ec2modifyNetworkInterfaceAttributeAPIErrCnt, ec2APICallLatencies, - vpcCniLeakedENICleanupCnt, - vpcrcLeakedENICleanupCnt, + vpccniAvailableENICnt, + vpcrcAvailableENICnt, + leakedENICnt, + ec2DescribeNetworkInterfacesPagesAPICallCnt, + ec2DescribeNetworkInterfacesPagesAPIErrCnt, ) prometheusRegistered = true @@ -639,6 +657,38 @@ func (e *ec2Wrapper) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfa return describeNetworkInterfacesOutput, err } +// DescribeNetworkInterfacesPages returns network interfaces that match the filters specified in the input with MaxResult set to 1000(max value) +// This API is used during periodic ENI cleanup routine and trunk initialization to list all network interfaces that match the given filters (vpc-id or subnet-id, and tag) +// Only required fields, network interface ID and tag set, is populated to avoid consuming extra memory +func (e *ec2Wrapper) DescribeNetworkInterfacesPages(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) { + var networkInterfaces []*ec2.NetworkInterface + input.MaxResults = aws.Int64(config.DescribeNetworkInterfacesMaxResults) + + start := time.Now() + if err := e.userServiceClient.DescribeNetworkInterfacesPages(input, func(output *ec2.DescribeNetworkInterfacesOutput, _ bool) bool { + ec2APICallCnt.Inc() + ec2DescribeNetworkInterfacesPagesAPICallCnt.Inc() + //Currently only network interface ID and the tag set is require, only add required details to avoid consuming extra memory + for _, nwInterface := range output.NetworkInterfaces { + networkInterfaces = append(networkInterfaces, &ec2.NetworkInterface{ + NetworkInterfaceId: nwInterface.NetworkInterfaceId, + TagSet: nwInterface.TagSet, + }) + } + // Add jitter to avoid EC2 API throttling in the account + time.Sleep(wait.Jitter(500*time.Millisecond, 0.5)) + return true + + }); err != nil { + ec2APIErrCnt.Inc() + ec2DescribeNetworkInterfacesPagesAPIErrCnt.Inc() + return nil, err + } + ec2APICallLatencies.WithLabelValues("describe_network_interfaces_pages").Observe(timeSinceMs(start)) + + return networkInterfaces, nil +} + func (e *ec2Wrapper) AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) { start := time.Now() assignPrivateIPAddressesOutput, err := e.userServiceClient.AssignPrivateIpAddresses(input) diff --git a/pkg/config/type.go b/pkg/config/type.go index d7673640..ee419de5 100644 --- a/pkg/config/type.go +++ b/pkg/config/type.go @@ -86,6 +86,8 @@ const ( VpcCNIDaemonSetName = "aws-node" OldVPCControllerDeploymentName = "vpc-resource-controller" BranchENICooldownPeriodKey = "branch-eni-cooldown" + // DescribeNetworkInterfacesMaxResults defines the max number of requests to return for DescribeNetworkInterfaces API call + DescribeNetworkInterfacesMaxResults = int64(1000) ) type ResourceType string diff --git a/pkg/provider/branch/trunk/trunk.go b/pkg/provider/branch/trunk/trunk.go index 6a2eb5dc..ce4f2e27 100644 --- a/pkg/provider/branch/trunk/trunk.go +++ b/pkg/provider/branch/trunk/trunk.go @@ -232,7 +232,7 @@ func (t *trunkENI) InitTrunk(instance ec2.EC2Instance, podList []v1.Pod) error { } // Get the list of branch ENIs - branchInterfaces, err := t.ec2ApiHelper.GetBranchNetworkInterface(&t.trunkENIId) + branchInterfaces, err := t.ec2ApiHelper.GetBranchNetworkInterface(&t.trunkENIId, aws.String(t.instance.SubnetID())) if err != nil { return err } diff --git a/pkg/provider/branch/trunk/trunk_test.go b/pkg/provider/branch/trunk/trunk_test.go index cb766cad..2f0eed90 100644 --- a/pkg/provider/branch/trunk/trunk_test.go +++ b/pkg/provider/branch/trunk/trunk_test.go @@ -647,7 +647,8 @@ func TestTrunkENI_InitTrunk(t *testing.T) { f.mockInstance.EXPECT().InstanceID().Return(InstanceId) f.mockEC2APIHelper.EXPECT().GetInstanceNetworkInterface(&InstanceId).Return(instanceNwInterfaces, nil) f.mockEC2APIHelper.EXPECT().WaitForNetworkInterfaceStatusChange(&trunkId, awsEc2.AttachmentStatusAttached).Return(nil) - f.mockEC2APIHelper.EXPECT().GetBranchNetworkInterface(&trunkId).Return(branchInterfaces, nil) + f.mockInstance.EXPECT().SubnetID().Return(SubnetId) + f.mockEC2APIHelper.EXPECT().GetBranchNetworkInterface(&trunkId, &SubnetId).Return(branchInterfaces, nil) }, args: args{instance: FakeInstance, podList: []v1.Pod{*MockPod1, *MockPod2}}, wantErr: false, @@ -675,7 +676,8 @@ func TestTrunkENI_InitTrunk(t *testing.T) { f.mockInstance.EXPECT().InstanceID().Return(InstanceId) f.mockEC2APIHelper.EXPECT().GetInstanceNetworkInterface(&InstanceId).Return(instanceNwInterfaces, nil) f.mockEC2APIHelper.EXPECT().WaitForNetworkInterfaceStatusChange(&trunkId, awsEc2.AttachmentStatusAttached).Return(nil) - f.mockEC2APIHelper.EXPECT().GetBranchNetworkInterface(&trunkId).Return(branchInterfaces, nil) + f.mockInstance.EXPECT().SubnetID().Return(SubnetId) + f.mockEC2APIHelper.EXPECT().GetBranchNetworkInterface(&trunkId, &SubnetId).Return(branchInterfaces, nil) }, args: args{instance: FakeInstance, podList: []v1.Pod{*MockPod2}}, wantErr: false,