From e51e7ba33c98338c620300cd624d62e956c1e112 Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Tue, 10 Jan 2023 14:49:34 -0500 Subject: [PATCH 1/5] refactor cni-metrics-helper chart for eks charts release (#2201) --- charts/aws-vpc-cni/README.md | 1 - charts/aws-vpc-cni/templates/daemonset.yaml | 4 +-- charts/cni-metrics-helper/Chart.yaml | 36 +++++++++---------- charts/cni-metrics-helper/README.md | 40 ++++++++++++++++----- 4 files changed, 49 insertions(+), 32 deletions(-) diff --git a/charts/aws-vpc-cni/README.md b/charts/aws-vpc-cni/README.md index 752eb4f154..70cd85d9be 100644 --- a/charts/aws-vpc-cni/README.md +++ b/charts/aws-vpc-cni/README.md @@ -91,7 +91,6 @@ WARNING: Substitute YOUR_HELM_RELEASE_NAME_HERE with the name of your helm relea set -euo pipefail -# don't import the crd. Helm cant manage the lifecycle of it anyway. for kind in daemonSet clusterRole clusterRoleBinding serviceAccount; do echo "setting annotations and labels on $kind/aws-node" kubectl -n kube-system annotate --overwrite $kind aws-node meta.helm.sh/release-name=YOUR_HELM_RELEASE_NAME_HERE diff --git a/charts/aws-vpc-cni/templates/daemonset.yaml b/charts/aws-vpc-cni/templates/daemonset.yaml index 261480ad5c..a78fdad500 100644 --- a/charts/aws-vpc-cni/templates/daemonset.yaml +++ b/charts/aws-vpc-cni/templates/daemonset.yaml @@ -91,8 +91,8 @@ spec: - mountPath: /host/etc/cni/net.d name: cni-net-dir {{- if .Values.cniConfig.enabled }} - # the dockerfile copies the baked in config to this location, lets overwrite it with ours - # the entrypoint.sh script will then copy our config to /host/etc/cni/net.d on boot + # The dockerfile copies the baked in config to this location, so overwrite it with ours. + # The entrypoint process will then copy our config to /host/etc/cni/net.d on boot. - name: cni-config mountPath: /app/10-aws.conflist subPath: 10-aws.conflist diff --git a/charts/cni-metrics-helper/Chart.yaml b/charts/cni-metrics-helper/Chart.yaml index 3e3fa37bee..2c010b3680 100644 --- a/charts/cni-metrics-helper/Chart.yaml +++ b/charts/cni-metrics-helper/Chart.yaml @@ -1,23 +1,19 @@ apiVersion: v2 name: cni-metrics-helper -description: A Helm chart for Kubernetes - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. -type: application - -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.14 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. +version: 0.1.15 appVersion: v1.12.1 +description: A Helm chart for the AWS VPC CNI Metrics Helper +icon: https://raw.githubusercontent.com/aws/eks-charts/master/docs/logo/aws.png +home: https://github.com/aws/amazon-vpc-cni-k8s +sources: + - https://github.com/aws/amazon-vpc-cni-k8s +keywords: + - eks + - cni + - networking + - vpc +maintainers: + - name: Jayanth Varavani + url: https://github.com/jayanthvn + email: jayanthvn@users.noreply.github.com +engine: gotpl diff --git a/charts/cni-metrics-helper/README.md b/charts/cni-metrics-helper/README.md index 254d3d7772..2637390660 100644 --- a/charts/cni-metrics-helper/README.md +++ b/charts/cni-metrics-helper/README.md @@ -9,15 +9,34 @@ This chart provides a Kubernetes deployment for the Amazon VPC CNI Metrics Helpe ## Installing the Chart -Clone the Amazon VPC CNI for Kubernetes repository to your local machine. +First add the EKS repository to Helm: + +```shell +helm repo add eks https://aws.github.io/eks-charts +``` + +To install the chart with the release name `cni-metrics-helper` and default configuration: + +```shell +$ helm install cni-metrics-helper --namespace kube-system eks/cni-metrics-helper +``` + +To install manually, clone the Amazon VPC CNI for Kubernetes repository to your local machine: ```shell $ git clone https://github.com/aws/amazon-vpc-cni-k8s.git ``` -Use the helm install command to install the chart into your Kubernetes cluster + +Use the helm install command to install the chart into your Kubernetes cluster: ```shell -$ helm install cni-metrics-helper ./amazon-vpc-cni-k8s/charts/cni-metrics-helper +$ helm install cni-metrics-helper --namespace kube-system ./charts/cni-metrics-helper +``` + +To uninstall: + +```shell +$ helm uninstall cni-metrics-helper --namespace kube-system ``` ## Configuration @@ -33,17 +52,21 @@ The following table lists the configurable parameters for this chart and their d | image.domain | ECR repository domain | amazonaws.com | | env.USE_CLOUDWATCH | Whether to export CNI metrics to CloudWatch | true | | env.AWS_CLUSTER_ID | ID of the cluster to use when exporting metrics to CloudWatch | default | -| env.METRIC_UPDATE_INTERVAL | Interval at which to update CloudWatch metrics, in seconds Metrics are published to CloudWatch at 2*METRIC_UPDATE_INTERVAL | 30 | +| env.METRIC_UPDATE_INTERVAL | Interval at which to update CloudWatch metrics, in seconds. | | +| | Metrics are published to CloudWatch at 2x the interval | 30 | | serviceAccount.name | The name of the ServiceAccount to use | nil | | serviceAccount.create | Specifies whether a ServiceAccount should be created | true | | serviceAccount.annotations | Specifies the annotations for ServiceAccount | {} | - Specify each parameter using the `--set key=value[,key=value]` argument to `helm install` or provide a YAML file containing the values for the above parameters: ```shell -$ helm install my-release ./amazon-vpc-cni-k8s/charts/cni-metrics-helper --set useCloudwatch=false --values values.yaml +$ helm install cni-metrics-handler --namespace kube-system eks/cni-metrics-handler --values values.yaml +``` +Manual install: +```shell +$ helm install cni-metrics-helper --namespace kube-system ./charts/cni-metrics-helper --values values.yaml ``` ## Resources @@ -52,9 +75,8 @@ $ helm install my-release ./amazon-vpc-cni-k8s/charts/cni-metrics-helper --set u |---------------------------|------------------------------------------------|---------| | resources | Resources for the pods. | `{}` | - -for example, to set a CPU limit of 200m and a memory limit of 256Mi for the cni-metrics-helper pods, you can use the following command: +For example, to set a CPU limit of 200m and a memory limit of 256Mi for the cni-metrics-helper pods, you can use the following command: ```shell -$ helm install my-release ./amazon-vpc-cni-k8s/charts/cni-metrics-helper --set resources.limits.cpu=200m,resources.limits.memory=256Mi +$ helm install cni-metrics-helper ./charts/cni-metrics-helper --namespace kube-system --set resources.limits.cpu=200m,resources.limits.memory=256Mi ``` From a81e07f8e9c642f9e7370b44ca02d0fd60b17ed1 Mon Sep 17 00:00:00 2001 From: Hao Zhou Date: Thu, 12 Jan 2023 14:23:47 -0800 Subject: [PATCH 2/5] enable node events for signalling SGP feature turned on from VPC CNI (#2149) merge issue updated CNI for security group event solution Resolving merge issues and removing duplicated sending events remove the node update permission remove unused func and improve some error handling clean up unit tests and format update with constants and changes suggested by comments update send events logic and unit tests cut the API calls for getNodeLabel to half changes regarding comments enable not-supported label value check change comment Co-authored-by: Jayanth Varavani <1111446+jayanthvn@users.noreply.github.com> --- charts/aws-vpc-cni/templates/clusterrole.yaml | 2 +- cmd/aws-k8s-agent/main.go | 2 +- config/master/aws-k8s-cni-cn.yaml | 2 +- config/master/aws-k8s-cni-us-gov-east-1.yaml | 2 +- config/master/aws-k8s-cni-us-gov-west-1.yaml | 2 +- config/master/aws-k8s-cni.yaml | 2 +- pkg/awsutils/awsutils.go | 7 +- pkg/awsutils/awsutils_test.go | 202 ++++++---- pkg/eniconfig/eniconfig.go | 19 +- pkg/ipamd/ipamd.go | 222 ++++++----- pkg/ipamd/ipamd_test.go | 367 +++++++++++++++--- pkg/sgpp/constants.go | 8 +- pkg/utils/eventrecorder/eventrecorder.go | 87 +++-- pkg/utils/eventrecorder/eventrecorder_test.go | 76 +++- test/framework/framework.go | 30 +- .../resources/k8s/resources/events.go | 8 +- 16 files changed, 754 insertions(+), 284 deletions(-) diff --git a/charts/aws-vpc-cni/templates/clusterrole.yaml b/charts/aws-vpc-cni/templates/clusterrole.yaml index fb096ef0f3..c62e5d5f5f 100644 --- a/charts/aws-vpc-cni/templates/clusterrole.yaml +++ b/charts/aws-vpc-cni/templates/clusterrole.yaml @@ -28,7 +28,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get", "update"] + verbs: ["list", "watch", "get"] - apiGroups: ["extensions"] resources: - '*' diff --git a/cmd/aws-k8s-agent/main.go b/cmd/aws-k8s-agent/main.go index 6368948a75..58d2c01451 100644 --- a/cmd/aws-k8s-agent/main.go +++ b/cmd/aws-k8s-agent/main.go @@ -59,7 +59,7 @@ func _main() int { return 1 } - eventrecorder.InitEventRecorder(rawK8SClient) + eventrecorder.New(rawK8SClient, cacheK8SClient) ipamContext, err := ipamd.New(rawK8SClient, cacheK8SClient) if err != nil { diff --git a/config/master/aws-k8s-cni-cn.yaml b/config/master/aws-k8s-cni-cn.yaml index a74e325081..b21d85329f 100644 --- a/config/master/aws-k8s-cni-cn.yaml +++ b/config/master/aws-k8s-cni-cn.yaml @@ -61,7 +61,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get", "update"] + verbs: ["list", "watch", "get"] - apiGroups: ["extensions"] resources: - '*' diff --git a/config/master/aws-k8s-cni-us-gov-east-1.yaml b/config/master/aws-k8s-cni-us-gov-east-1.yaml index a7f534324c..58ab644305 100644 --- a/config/master/aws-k8s-cni-us-gov-east-1.yaml +++ b/config/master/aws-k8s-cni-us-gov-east-1.yaml @@ -61,7 +61,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get", "update"] + verbs: ["list", "watch", "get"] - apiGroups: ["extensions"] resources: - '*' diff --git a/config/master/aws-k8s-cni-us-gov-west-1.yaml b/config/master/aws-k8s-cni-us-gov-west-1.yaml index 4f7e79f5b6..d81bff045b 100644 --- a/config/master/aws-k8s-cni-us-gov-west-1.yaml +++ b/config/master/aws-k8s-cni-us-gov-west-1.yaml @@ -61,7 +61,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get", "update"] + verbs: ["list", "watch", "get"] - apiGroups: ["extensions"] resources: - '*' diff --git a/config/master/aws-k8s-cni.yaml b/config/master/aws-k8s-cni.yaml index 28013abeda..2eac923085 100644 --- a/config/master/aws-k8s-cni.yaml +++ b/config/master/aws-k8s-cni.yaml @@ -61,7 +61,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get", "update"] + verbs: ["list", "watch", "get"] - apiGroups: ["extensions"] resources: - '*' diff --git a/pkg/awsutils/awsutils.go b/pkg/awsutils/awsutils.go index 3e9129087c..f6f712761f 100644 --- a/pkg/awsutils/awsutils.go +++ b/pkg/awsutils/awsutils.go @@ -248,6 +248,8 @@ type EC2InstanceMetadataCache struct { imds TypedIMDS ec2SVC ec2wrapper.EC2 + + eventRecorder *eventrecorder.EventRecorder } // ENIMetadata contains information about an ENI @@ -394,7 +396,7 @@ func (i instrumentedIMDS) GetMetadataWithContext(ctx context.Context, p string) } // New creates an EC2InstanceMetadataCache -func New(useCustomNetworking, disableENIProvisioning, v4Enabled, v6Enabled bool) (*EC2InstanceMetadataCache, error) { +func New(useCustomNetworking, disableENIProvisioning, v4Enabled, v6Enabled bool, eventRecorder *eventrecorder.EventRecorder) (*EC2InstanceMetadataCache, error) { //ctx is passed to initWithEC2Metadata func to cancel spawned go-routines when tests are run ctx := context.Background() @@ -408,6 +410,7 @@ func New(useCustomNetworking, disableENIProvisioning, v4Enabled, v6Enabled bool) cache.imds = TypedIMDS{instrumentedIMDS{ec2Metadata}} cache.clusterName = os.Getenv(clusterNameEnvVar) cache.additionalENITags = loadAdditionalENITags() + cache.eventRecorder = eventRecorder region, err := ec2Metadata.Region() if err != nil { @@ -432,8 +435,6 @@ func New(useCustomNetworking, disableENIProvisioning, v4Enabled, v6Enabled bool) if err != nil { return nil, err } - // event recorder to raise events for failed EC2 API calls - eventRecorder = eventrecorder.Get() // Clean up leaked ENIs in the background if !disableENIProvisioning { diff --git a/pkg/awsutils/awsutils_test.go b/pkg/awsutils/awsutils_test.go index 60e52481fa..0d2e3774d4 100644 --- a/pkg/awsutils/awsutils_test.go +++ b/pkg/awsutils/awsutils_test.go @@ -31,6 +31,13 @@ import ( "github.com/aws/aws-sdk-go/aws/awserr" mock_ec2wrapper "github.com/aws/amazon-vpc-cni-k8s/pkg/ec2wrapper/mocks" + "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/eventrecorder" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/events" + testclient "sigs.k8s.io/controller-runtime/pkg/client/fake" ) const ( @@ -73,6 +80,7 @@ const ( eni2v6Prefix = "2001:db8::/64" eni2ID = "eni-12341234" metadataVPCIPv4CIDRs = "192.168.0.0/16 100.66.0.0/1" + myNodeName = "testNodeName" ) func testMetadata(overrides map[string]interface{}) FakeIMDS { @@ -131,6 +139,28 @@ func setup(t *testing.T) (*gomock.Controller, mock_ec2wrapper.NewMockEC2(ctrl) } +func setupEventRecorder(t *testing.T) *eventrecorder.EventRecorder { + ctx := context.Background() + fakeRecorder := events.NewFakeRecorder(3) + k8sSchema := runtime.NewScheme() + clientgoscheme.AddToScheme(k8sSchema) + + mockEventRecorder := &eventrecorder.EventRecorder{ + Recorder: fakeRecorder, + RawK8SClient: testclient.NewClientBuilder().WithScheme(k8sSchema).Build(), + CachedK8SClient: testclient.NewClientBuilder().WithScheme(k8sSchema).Build(), + } + + fakeNode := v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{Name: myNodeName}, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + } + _ = mockEventRecorder.CachedK8SClient.Create(ctx, &fakeNode) + return mockEventRecorder +} + func TestInitWithEC2metadata(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond) defer cancel() @@ -139,15 +169,15 @@ func TestInitWithEC2metadata(t *testing.T) { defer ctrl.Finish() mockMetadata := testMetadata(nil) - ins := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}, ec2SVC: mockEC2} - err := ins.initWithEC2Metadata(ctx) + cache := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}, ec2SVC: mockEC2} + err := cache.initWithEC2Metadata(ctx) if assert.NoError(t, err) { - assert.Equal(t, az, ins.availabilityZone) - assert.Equal(t, localIP, ins.localIPv4.String()) - assert.Equal(t, ins.instanceID, instanceID) - assert.Equal(t, ins.primaryENImac, primaryMAC) - assert.Equal(t, ins.primaryENI, primaryeniID) - assert.Equal(t, subnetID, ins.subnetID) + assert.Equal(t, az, cache.availabilityZone) + assert.Equal(t, localIP, cache.localIPv4.String()) + assert.Equal(t, cache.instanceID, instanceID) + assert.Equal(t, cache.primaryENImac, primaryMAC) + assert.Equal(t, cache.primaryENI, primaryeniID) + assert.Equal(t, subnetID, cache.subnetID) } } @@ -168,11 +198,11 @@ func TestInitWithEC2metadataErr(t *testing.T) { key: fmt.Errorf("An error with %s", key), }) - ins := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}, ec2SVC: mockEC2} + cache := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}, ec2SVC: mockEC2} // This test is a bit silly. We expect broken metadata to result in an err return here. But if the code is resilient and _succeeds_, then of course that's ok too. Mostly we just want it not to panic. assert.NotPanics(t, func() { - _ = ins.initWithEC2Metadata(ctx) + _ = cache.initWithEC2Metadata(ctx) }, "Broken metadata %s resulted in panic", key) } } @@ -186,8 +216,8 @@ func TestGetAttachedENIs(t *testing.T) { metadataMACPath + eni2MAC + metadataIPv4s: eni2PrivateIP, }) - ins := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}} - ens, err := ins.GetAttachedENIs() + cache := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}} + ens, err := cache.GetAttachedENIs() if assert.NoError(t, err) { assert.Equal(t, len(ens), 2) } @@ -203,8 +233,8 @@ func TestGetAttachedENIsWithPrefixes(t *testing.T) { metadataMACPath + eni2MAC + metadataIPv4Prefixes: eni2Prefix, }) - ins := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}} - ens, err := ins.GetAttachedENIs() + cache := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}} + ens, err := cache.GetAttachedENIs() if assert.NoError(t, err) { assert.Equal(t, len(ens), 2) } @@ -217,8 +247,8 @@ func TestAWSGetFreeDeviceNumberOnErr(t *testing.T) { // test error handling mockEC2.EXPECT().DescribeInstancesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New("error on DescribeInstancesWithContext")) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - _, err := ins.awsGetFreeDeviceNumber() + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2} + _, err := cache.awsGetFreeDeviceNumber() assert.Error(t, err) } @@ -240,8 +270,8 @@ func TestAWSGetFreeDeviceNumberNoDevice(t *testing.T) { mockEC2.EXPECT().DescribeInstancesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(result, nil) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - _, err := ins.awsGetFreeDeviceNumber() + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2} + _, err := cache.awsGetFreeDeviceNumber() assert.Error(t, err) } @@ -300,8 +330,8 @@ func TestGetENIAttachmentID(t *testing.T) { for _, tc := range testCases { mockEC2.EXPECT().DescribeNetworkInterfacesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(tc.output, tc.awsErr) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - id, err := ins.getENIAttachmentID("test-eni") + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2} + id, err := cache.getENIAttachmentID("test-eni") assert.Equal(t, tc.expErr, err) assert.Equal(t, tc.expID, id) } @@ -343,8 +373,8 @@ func TestDescribeAllENIs(t *testing.T) { for _, tc := range testCases { mockEC2.EXPECT().DescribeNetworkInterfacesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Times(tc.n).Return(result, tc.awsErr) - ins := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}, ec2SVC: mockEC2} - metaData, err := ins.DescribeAllENIs() + cache := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}, ec2SVC: mockEC2} + metaData, err := cache.DescribeAllENIs() assert.Equal(t, tc.expErr, err, tc.name) assert.Equal(t, tc.exptags, metaData.TagMap, tc.name) } @@ -380,11 +410,13 @@ func TestAllocENI(t *testing.T) { mockEC2.EXPECT().AttachNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(attachResult, nil) mockEC2.EXPECT().ModifyNetworkInterfaceAttributeWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) - ins := &EC2InstanceMetadataCache{ - ec2SVC: mockEC2, - imds: TypedIMDS{mockMetadata}, + cache := &EC2InstanceMetadataCache{ + ec2SVC: mockEC2, + imds: TypedIMDS{mockMetadata}, + eventRecorder: setupEventRecorder(t), } - _, err := ins.AllocENI(false, nil, "") + + _, err := cache.AllocENI(false, nil, "") assert.NoError(t, err) } @@ -413,11 +445,13 @@ func TestAllocENINoFreeDevice(t *testing.T) { mockEC2.EXPECT().DescribeInstancesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(result, nil) mockEC2.EXPECT().DeleteNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) - ins := &EC2InstanceMetadataCache{ - ec2SVC: mockEC2, - imds: TypedIMDS{mockMetadata}, + cache := &EC2InstanceMetadataCache{ + ec2SVC: mockEC2, + imds: TypedIMDS{mockMetadata}, + eventRecorder: setupEventRecorder(t), } - _, err := ins.AllocENI(false, nil, "") + + _, err := cache.AllocENI(false, nil, "") assert.Error(t, err) } @@ -448,11 +482,13 @@ func TestAllocENIMaxReached(t *testing.T) { mockEC2.EXPECT().AttachNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New("AttachmentLimitExceeded")) mockEC2.EXPECT().DeleteNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) - ins := &EC2InstanceMetadataCache{ - ec2SVC: mockEC2, - imds: TypedIMDS{mockMetadata}, + cache := &EC2InstanceMetadataCache{ + ec2SVC: mockEC2, + imds: TypedIMDS{mockMetadata}, + eventRecorder: setupEventRecorder(t), } - _, err := ins.AllocENI(false, nil, "") + + _, err := cache.AllocENI(false, nil, "") assert.Error(t, err) } @@ -468,8 +504,12 @@ func TestFreeENI(t *testing.T) { mockEC2.EXPECT().DetachNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) mockEC2.EXPECT().DeleteNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - err := ins.freeENI("test-eni", time.Millisecond, time.Millisecond) + cache := &EC2InstanceMetadataCache{ + ec2SVC: mockEC2, + eventRecorder: setupEventRecorder(t), + } + + err := cache.freeENI("test-eni", time.Millisecond, time.Millisecond) assert.NoError(t, err) } @@ -488,8 +528,12 @@ func TestFreeENIRetry(t *testing.T) { mockEC2.EXPECT().DeleteNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New("testing retrying delete")) mockEC2.EXPECT().DeleteNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - err := ins.freeENI("test-eni", time.Millisecond, time.Millisecond) + cache := &EC2InstanceMetadataCache{ + ec2SVC: mockEC2, + eventRecorder: setupEventRecorder(t), + } + + err := cache.freeENI("test-eni", time.Millisecond, time.Millisecond) assert.NoError(t, err) } @@ -508,8 +552,12 @@ func TestFreeENIRetryMax(t *testing.T) { mockEC2.EXPECT().DeleteNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New("testing retrying delete")) } - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - err := ins.freeENI("test-eni", time.Millisecond, time.Millisecond) + cache := &EC2InstanceMetadataCache{ + ec2SVC: mockEC2, + eventRecorder: setupEventRecorder(t), + } + + err := cache.freeENI("test-eni", time.Millisecond, time.Millisecond) assert.Error(t, err) } @@ -519,8 +567,12 @@ func TestFreeENIDescribeErr(t *testing.T) { mockEC2.EXPECT().DescribeNetworkInterfacesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New("Error on DescribeNetworkInterfacesWithContext")) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - err := ins.FreeENI("test-eni") + cache := &EC2InstanceMetadataCache{ + ec2SVC: mockEC2, + eventRecorder: setupEventRecorder(t), + } + + err := cache.FreeENI("test-eni") assert.Error(t, err) } @@ -537,13 +589,13 @@ func TestDescribeInstanceTypes(t *testing.T) { NextToken: nil, }, nil) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - ins.instanceType = "not-there" - err := ins.FetchInstanceTypeLimits() + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2} + cache.instanceType = "not-there" + err := cache.FetchInstanceTypeLimits() assert.NoError(t, err) - value := ins.GetENILimit() + value := cache.GetENILimit() assert.Equal(t, 9, value) - pv4Limit := ins.GetENIIPv4Limit() + pv4Limit := cache.GetENIIPv4Limit() assert.Equal(t, 98, pv4Limit) } @@ -553,8 +605,8 @@ func TestAllocIPAddress(t *testing.T) { mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(&ec2.AssignPrivateIpAddressesOutput{}, nil) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - err := ins.AllocIPAddress("eni-id") + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2} + err := cache.AllocIPAddress("eni-id") assert.NoError(t, err) } @@ -564,8 +616,8 @@ func TestAllocIPAddressOnErr(t *testing.T) { mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New("Error on AssignPrivateIpAddressesWithContext")) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} - err := ins.AllocIPAddress("eni-id") + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2} + err := cache.AllocIPAddress("eni-id") assert.Error(t, err) } @@ -580,8 +632,8 @@ func TestAllocIPAddresses(t *testing.T) { } mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(nil, nil) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge"} - _, err := ins.AllocIPAddresses(eniID, 5) + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge"} + _, err := cache.AllocIPAddresses(eniID, 5) assert.NoError(t, err) // when required IP numbers(50) is higher than ENI's limit(49) @@ -596,12 +648,12 @@ func TestAllocIPAddresses(t *testing.T) { } mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(&output, nil) - ins = &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge"} - _, err = ins.AllocIPAddresses(eniID, 50) + cache = &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge"} + _, err = cache.AllocIPAddresses(eniID, 50) assert.NoError(t, err) // Adding 0 should do nothing - _, err = ins.AllocIPAddresses(eniID, 0) + _, err = cache.AllocIPAddresses(eniID, 0) assert.NoError(t, err) } @@ -613,12 +665,12 @@ func TestAllocIPAddressesAlreadyFull(t *testing.T) { NetworkInterfaceId: aws.String(eniID), SecondaryPrivateIpAddressCount: aws.Int64(14), } - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "t3.xlarge"} + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "t3.xlarge"} retErr := awserr.New("PrivateIpAddressLimitExceeded", "Too many IPs already allocated", nil) mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(nil, retErr) // If EC2 says that all IPs are already attached, we do nothing - _, err := ins.AllocIPAddresses(eniID, 14) + _, err := cache.AllocIPAddresses(eniID, 14) assert.NoError(t, err) } @@ -633,12 +685,12 @@ func TestAllocPrefixAddresses(t *testing.T) { } mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(nil, nil) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge", enablePrefixDelegation: true} - _, err := ins.AllocIPAddresses(eniID, 1) + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge", enablePrefixDelegation: true} + _, err := cache.AllocIPAddresses(eniID, 1) assert.NoError(t, err) // Adding 0 should do nothing - _, err = ins.AllocIPAddresses(eniID, 0) + _, err = cache.AllocIPAddresses(eniID, 0) assert.NoError(t, err) } @@ -650,12 +702,12 @@ func TestAllocPrefixesAlreadyFull(t *testing.T) { NetworkInterfaceId: aws.String(eniID), Ipv4PrefixCount: aws.Int64(1), } - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "t3.xlarge", enablePrefixDelegation: true} + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "t3.xlarge", enablePrefixDelegation: true} retErr := awserr.New("PrivateIpAddressLimitExceeded", "Too many IPs already allocated", nil) mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(nil, retErr) // If EC2 says that all IPs are already attached, we do nothing - _, err := ins.AllocIPAddresses(eniID, 1) + _, err := cache.AllocIPAddresses(eniID, 1) assert.NoError(t, err) } @@ -853,14 +905,14 @@ func TestEC2InstanceMetadataCache_waitForENIAndPrefixesAttached(t *testing.T) { func TestEC2InstanceMetadataCache_SetUnmanagedENIs(t *testing.T) { mockMetadata := testMetadata(nil) - ins := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}} - ins.SetUnmanagedENIs(nil) - assert.False(t, ins.IsUnmanagedENI("eni-1")) - ins.SetUnmanagedENIs([]string{"eni-1", "eni-2"}) - assert.True(t, ins.IsUnmanagedENI("eni-1")) - assert.False(t, ins.IsUnmanagedENI("eni-99")) - ins.SetUnmanagedENIs(nil) - assert.False(t, ins.IsUnmanagedENI("eni-1")) + cache := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}} + cache.SetUnmanagedENIs(nil) + assert.False(t, cache.IsUnmanagedENI("eni-1")) + cache.SetUnmanagedENIs([]string{"eni-1", "eni-2"}) + assert.True(t, cache.IsUnmanagedENI("eni-1")) + assert.False(t, cache.IsUnmanagedENI("eni-99")) + cache.SetUnmanagedENIs(nil) + assert.False(t, cache.IsUnmanagedENI("eni-1")) } func TestEC2InstanceMetadataCache_cleanUpLeakedENIsInternal(t *testing.T) { @@ -878,9 +930,9 @@ func TestEC2InstanceMetadataCache_cleanUpLeakedENIsInternal(t *testing.T) { setupDescribeNetworkInterfacesPagesWithContextMock(t, mockEC2, interfaces, nil, 1) mockEC2.EXPECT().CreateTagsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2} + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2} // Test checks that both mocks gets called. - ins.cleanUpLeakedENIsInternal(time.Millisecond) + cache.cleanUpLeakedENIsInternal(time.Millisecond) } func setupDescribeNetworkInterfacesPagesWithContextMock( @@ -1382,8 +1434,8 @@ func TestEC2InstanceMetadataCache_getLeakedENIs(t *testing.T) { return nil }) } - ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2, clusterName: tt.fields.clusterName} - got, err := ins.getLeakedENIs() + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2, clusterName: tt.fields.clusterName} + got, err := cache.getLeakedENIs() if tt.wantErr != nil { assert.EqualError(t, err, tt.wantErr.Error()) } else { diff --git a/pkg/eniconfig/eniconfig.go b/pkg/eniconfig/eniconfig.go index d16d55ab6c..fb7dc951db 100644 --- a/pkg/eniconfig/eniconfig.go +++ b/pkg/eniconfig/eniconfig.go @@ -31,8 +31,7 @@ import ( const ( defaultEniConfigAnnotationDef = "k8s.amazonaws.com/eniConfig" defaultEniConfigLabelDef = "k8s.amazonaws.com/eniConfig" - eniConfigDefault = "default" - eniConfigLabel = "vpc.amazonaws.com/eniConfig" + EniConfigDefault = "default" // when this is defined, it is to be treated as the source of truth for the eniconfig. // it is meant to be used for out-of-band mananagement of the eniConfig - i.e. on the kubelet or elsewhere @@ -129,23 +128,17 @@ func GetNodeSpecificENIConfigName(ctx context.Context, k8sClient client.Client) } //Derive ENIConfig Name from either externally managed label, Node Annotations or Labels - val, ok := node.GetLabels()[externalEniConfigLabel] + labels := node.GetLabels() + eniConfigName, ok := labels[externalEniConfigLabel] if !ok { - val, ok = node.GetAnnotations()[getEniConfigAnnotationDef()] + eniConfigName, ok = node.GetAnnotations()[getEniConfigAnnotationDef()] if !ok { - val, ok = node.GetLabels()[getEniConfigLabelDef()] + eniConfigName, ok = node.GetLabels()[getEniConfigLabelDef()] if !ok { - val = eniConfigDefault + eniConfigName = EniConfigDefault } } } - eniConfigName = val - if val != eniConfigDefault { - labels := node.GetLabels() - labels["vpc.amazonaws.com/eniConfig"] = eniConfigName - node.SetLabels(labels) - } - return eniConfigName, nil } diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index 74884b81fd..abd52faa70 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -41,6 +41,8 @@ import ( "github.com/aws/amazon-vpc-cni-k8s/pkg/eniconfig" "github.com/aws/amazon-vpc-cni-k8s/pkg/ipamd/datastore" "github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils" + "github.com/aws/amazon-vpc-cni-k8s/pkg/sgpp" + "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/eventrecorder" "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/logger" ) @@ -130,6 +132,11 @@ const ( // vpcENIConfigLabel is used by the VPC resource controller to pick the right ENI config. vpcENIConfigLabel = "vpc.amazonaws.com/eniConfig" + // trunkInterfaceLabel is used by CNI to check if correct label was added by the VPC resource controller + trunkInterfaceLabel = "vpc.amazonaws.com/has-trunk-attached" + // VPC resource controller label nodes as not-supported if the EC2 instance doesn't support ENI trunking/branching feature + unsupportedInstanceValue = "not-supported" + //envEnableIpv4PrefixDelegation is used to allocate /28 prefix instead of secondary IP for an ENI. envEnableIpv4PrefixDelegation = "ENABLE_PREFIX_DELEGATION" @@ -261,6 +268,7 @@ type IPAMContext struct { lastInsufficientCidrError time.Time enableManageUntaggedMode bool enablePodIPAnnotation bool + eventRecorder *eventrecorder.EventRecorder } // setUnmanagedENIs will rebuild the set of ENI IDs for ENIs tagged as "no_manage" @@ -374,9 +382,15 @@ func New(rawK8SClient client.Client, cachedK8SClient client.Client) (*IPAMContex c.enableIPv4 = isIPv4Enabled() c.enableIPv6 = isIPv6Enabled() + eventrecorder, err := eventrecorder.New(rawK8SClient, cachedK8SClient) + if err != nil { + return nil, errors.Wrap(err, "ipamd: unable to initialize event recorder") + } + c.eventRecorder = eventrecorder + c.disableENIProvisioning = disablingENIProvisioning() - client, err := awsutils.New(c.useCustomNetworking, c.disableENIProvisioning, c.enableIPv4, c.enableIPv6) + client, err := awsutils.New(c.useCustomNetworking, c.disableENIProvisioning, c.enableIPv4, c.enableIPv6, c.eventRecorder) if err != nil { return nil, errors.Wrap(err, "ipamd: can not initialize with AWS SDK interface") } @@ -407,7 +421,7 @@ func New(rawK8SClient client.Client, cachedK8SClient client.Client) (*IPAMContex } c.awsClient.InitCachedPrefixDelegation(c.enablePrefixDelegation) - c.myNodeName = os.Getenv("MY_NODE_NAME") + c.myNodeName = os.Getenv(envNodeName) checkpointer := datastore.NewJSONFile(dsBackingStorePath()) c.dataStore = datastore.NewDataStore(log, checkpointer, c.enablePrefixDelegation) @@ -443,7 +457,10 @@ func (c *IPAMContext) nodeInit() error { log.Debugf("Start node init") primaryV4IP := c.awsClient.GetLocalIPv4() - err = c.initENIAndIPLimits() + if err = c.initENIAndIPLimits(); err != nil { + return err + } + if c.enableIPv4 { //Subnets currently will have both v4 and v6 CIDRs. Once EC2 launches v6 only Subnets, that will no longer //be true and so it is safe (and only required) to get the v4 CIDR info only when IPv4 mode is enabled. @@ -538,37 +555,34 @@ func (c *IPAMContext) nodeInit() error { vpcV4CIDRs = c.updateCIDRsRulesOnChange(vpcV4CIDRs) }, 30*time.Second) - eniConfigName, err := eniconfig.GetNodeSpecificENIConfigName(ctx, c.cachedK8SClient) - if err == nil && c.useCustomNetworking && eniConfigName != "default" { - // Signal to VPC Resource Controller that the node is using custom networking - err := c.SetNodeLabel(ctx, vpcENIConfigLabel, eniConfigName) + if c.enablePodENI { + // call GET labels only once to avoid duplicate the call from labelling trunk and eniconfig + labels, err := c.getNodeLabels(ctx) if err != nil { - log.Errorf("Failed to set eniConfig node label", err) - podENIErrInc("nodeInit") return err } - } else { - // Remove the custom networking label - err := c.SetNodeLabel(ctx, vpcENIConfigLabel, "") - if err != nil { - log.Errorf("Failed to delete eniConfig node label", err) - podENIErrInc("nodeInit") - return err + + unsupported := false + + // if node labels is nil in rare cases, we will log the info instead of failing VPC CNI startup + if labels == nil { + // if labels is nil, ipamd logs the warning for debugging + log.Warn("node labels are nil during ipamd init, may need investigation before using the feature of security group for pods") + } else { + if value, ok := labels[trunkInterfaceLabel]; ok { + unsupported = value == unsupportedInstanceValue + } } - } - if metadataResult.TrunkENI != "" { - // Signal to VPC Resource Controller that the node has a trunk already - err := c.SetNodeLabel(ctx, "vpc.amazonaws.com/has-trunk-attached", "true") - if err != nil { - log.Errorf("Failed to set node label", err) - podENIErrInc("nodeInit") - // If this fails, we probably can't talk to the API server. Let the pod restart - return err + // if the node instance is supported, we proceed. Otherwise we stop sending events. + if !unsupported { + if c.useCustomNetworking { + // not doing retry here, don't have to handle error since error has been logged in the called method + c.askForENIConfigIfNeeded(ctx, labels) + } + // we want to ask VPC RC once during node init to cover special cases + c.askForTrunkENIIfNeeded(ctx, labels) } - } else { - // Check if we want to ask for one - c.askForTrunkENIIfNeeded(ctx) } if !c.disableENIProvisioning { @@ -655,7 +669,31 @@ func (c *IPAMContext) StartNodeIPPoolManager() { } func (c *IPAMContext) updateIPPoolIfRequired(ctx context.Context) { - c.askForTrunkENIIfNeeded(ctx) + if c.enablePodENI { + if labels, err := c.getNodeLabels(ctx); err == nil { + unsupported := false + // if node labels is nil in rare cases, we will log the info instead of failing VPC CNI startup + if labels == nil { + // if labels is nil, ipamd logs the warning for debugging + log.Warn("labels are nil during ipamd reconciling resources, may need attention and investigation before using the feature of security group for pods") + } else { + if value, ok := labels[trunkInterfaceLabel]; ok { + unsupported = value == unsupportedInstanceValue + } + } + + // if the node instance is supported, we proceed. Otherwise we stop sending events. + if !unsupported { + c.askForTrunkENIIfNeeded(ctx, labels) + + // if custom networking is enabled and SGP is enabled, we should also check for eniConfig + if c.useCustomNetworking { + c.askForENIConfigIfNeeded(ctx, labels) + } + } + } + } + if c.isDatastorePoolTooLow() { c.increaseDatastorePool(ctx) } else if c.isDatastorePoolTooHigh() { @@ -707,6 +745,7 @@ func (c *IPAMContext) tryFreeENI() { log.Errorf("Failed to free ENI %s, err: %v", eni, err) return } + } // tryUnassignIPsorPrefixesFromAll determines if there are IPs to free when we have extra IPs beyond the target and warmIPTargetDefined @@ -1197,20 +1236,70 @@ func (c *IPAMContext) logPoolStats(dataStoreStats *datastore.DataStoreStats) { log.Debugf("%s: %s, c.maxIPsPerENI = %d", prefix, dataStoreStats, c.maxIPsPerENI) } -func (c *IPAMContext) askForTrunkENIIfNeeded(ctx context.Context) { - if c.enablePodENI && c.dataStore.GetTrunkENI() == "" { +func (c *IPAMContext) askForTrunkENIIfNeeded(ctx context.Context, labels map[string]string) error { + var err error + hasTrunkLabel := false + + // if node labels is nil map which is rare, ipamd still sends the event and lets resource controller to handle this case + if labels != nil { + _, hasTrunkLabel = labels[trunkInterfaceLabel] + } + + // check if trunk interface is not present OR trunk label is not present that covers special use case + // some special settings can boot up nodes with trunk interface attached already + // we still need to signal VPC RC to manage the nodes + if c.dataStore.GetTrunkENI() == "" || !hasTrunkLabel { // Check that there is room for a trunk ENI to be attached: if c.dataStore.GetENIs() >= (c.maxENI - c.unmanagedENI) { - log.Debug("No slot available for a trunk ENI to be attached. Not labeling the node") - return + log.Debug("No slot available for a trunk ENI to be attached. Will not create a node event for resource controller") + return nil } // We need to signal that VPC Resource Controller needs to attach a trunk ENI - err := c.SetNodeLabel(ctx, "vpc.amazonaws.com/has-trunk-attached", "false") + log.Debug("VPC CNI is asking RC to initialize trunk interface") + err = c.eventRecorder.SendNodeEvent(corev1.EventTypeNormal, eventrecorder.EventReason, sgpp.VpcCNINodeEventActionForTrunk, sgpp.TrunkEventNote) if err != nil { - podENIErrInc("askForTrunkENIIfNeeded") - log.Errorf("Failed to set node label", err) + log.Error("Failed sending a node event to VPC RC for enabling trunk interface") } } + + return err +} + +func (c *IPAMContext) getNodeLabels(ctx context.Context) (map[string]string, error) { + var node corev1.Node + if err := c.cachedK8SClient.Get(ctx, types.NamespacedName{Name: c.myNodeName}, &node); err != nil { + return nil, err + } else { + return node.GetLabels(), err + } +} + +func (c *IPAMContext) askForENIConfigIfNeeded(ctx context.Context, labels map[string]string) error { + var err error + hasLabelled := false + + // if node labels is nil map which is rare, ipamd still sends the event and lets resource controller to handle this case + if labels != nil { + _, hasLabelled = labels[vpcENIConfigLabel] + } + + if !hasLabelled { + var eniConfigName string + if eniConfigName, err = eniconfig.GetNodeSpecificENIConfigName(ctx, c.cachedK8SClient); err != nil { + return err + } + + if eniConfigName != eniconfig.EniConfigDefault { + // Signal event to VPC RC that the custom networking is enabled + err = c.eventRecorder.SendNodeEvent(corev1.EventTypeNormal, eventrecorder.EventReason, sgpp.VpcCNINodeEventActionForEniConfig, vpcENIConfigLabel+"="+eniConfigName) + if err == nil { + log.Infof("Send an event to notify RC custom networking is enabled. Config name should be labelled on node as %s", eniConfigName) + } else { + log.Errorf("Failed sending an event to notify RC custom networking is enabled. Error is %s", err.Error()) + } + } + } + return err } // shouldRemoveExtraENIs returns true if we should attempt to find an ENI to free. When WARM_IP_TARGET is set, we @@ -1319,13 +1408,7 @@ func (c *IPAMContext) nodeIPPoolReconcile(ctx context.Context, interval time.Dur } if c.enablePodENI && metadataResult.TrunkENI != "" { - // Label the node that we have a trunk - err = c.SetNodeLabel(ctx, "vpc.amazonaws.com/has-trunk-attached", "true") - if err != nil { - podENIErrInc("askForTrunkENIIfNeeded") - log.Errorf("Failed to set node label for trunk. Aborting reconcile", err) - return - } + log.Debugf("Trunk interface (%s) has been added to the node already.", metadataResult.TrunkENI) } // Update trunk ENI trunkENI = metadataResult.TrunkENI @@ -1882,49 +1965,6 @@ func (c *IPAMContext) getTrunkLinkIndex() (int, error) { return -1, errors.New("no trunk found") } -// SetNodeLabel sets or deletes a node label -func (c *IPAMContext) SetNodeLabel(ctx context.Context, key, value string) error { - request := types.NamespacedName{ - Name: c.myNodeName, - } - - err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - node := &corev1.Node{} - // Find my node - err := c.cachedK8SClient.Get(ctx, request, node) - if err != nil { - log.Errorf("Failed to get node: %v", err) - return err - } - log.Debugf("Node found %q - no of labels - %d", node.Name, len(node.Labels)) - - if labelValue, ok := node.Labels[key]; ok && labelValue == value { - log.Debugf("Node label %q is already %q", key, labelValue) - return nil - } - - // Make deep copy for modification - updateNode := node.DeepCopy() - - // Set node label - if value != "" { - updateNode.Labels[key] = value - } else { - // Empty value, delete the label - log.Debugf("Deleting label %q", key) - delete(updateNode.Labels, key) - } - - if err = c.cachedK8SClient.Update(ctx, updateNode); err != nil { - log.Errorf("Failed to patch node %s with label %q: %q, error: %v", c.myNodeName, key, value, err) - return err - } - log.Debugf("Updated node %s with label %q: %q", c.myNodeName, key, value) - return nil - }) - return err -} - // GetPod returns the pod matching the name and namespace func (c *IPAMContext) GetPod(podName, namespace string) (*corev1.Pod, error) { ctx := context.TODO() @@ -1936,7 +1976,7 @@ func (c *IPAMContext) GetPod(podName, namespace string) (*corev1.Pod, error) { } err := c.rawK8SClient.Get(ctx, podKey, &pod) if err != nil { - return nil, fmt.Errorf("Error while trying to retrieve Pod Info: %s", err) + return nil, fmt.Errorf("error while trying to retrieve pod info: %s", err.Error()) } return &pod, nil } @@ -1947,8 +1987,12 @@ func (c *IPAMContext) AnnotatePod(podName, podNamespace, key, val string) error var err error err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { - pod := &corev1.Pod{} - if pod, err = c.GetPod(podName, podNamespace); err != nil { + var pod *corev1.Pod + if pod, err = c.GetPod(podName, podNamespace); err != nil || pod == nil { + // if pod is nil and err is nil for any reason, this is not retriable case, returning a nil error to not-retry + if err == nil && pod == nil { + log.Warnf("get a nil pod for pod name %s and namespace %s", podName, podNamespace) + } return err } diff --git a/pkg/ipamd/ipamd_test.go b/pkg/ipamd/ipamd_test.go index 3f3263709a..b8b5f92f91 100644 --- a/pkg/ipamd/ipamd_test.go +++ b/pkg/ipamd/ipamd_test.go @@ -28,11 +28,9 @@ import ( "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" "github.com/vishvananda/netlink" - corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client" testclient "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -44,37 +42,41 @@ import ( mock_eniconfig "github.com/aws/amazon-vpc-cni-k8s/pkg/eniconfig/mocks" "github.com/aws/amazon-vpc-cni-k8s/pkg/ipamd/datastore" mock_networkutils "github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils/mocks" + "github.com/aws/amazon-vpc-cni-k8s/pkg/sgpp" + "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/eventrecorder" + "k8s.io/client-go/tools/events" ) const ( - primaryENIid = "eni-00000000" - secENIid = "eni-00000001" - terENIid = "eni-00000002" - primaryMAC = "12:ef:2a:98:e5:5a" - secMAC = "12:ef:2a:98:e5:5b" - terMAC = "12:ef:2a:98:e5:5c" - primaryDevice = 0 - secDevice = 2 - terDevice = 3 - primarySubnet = "10.10.10.0/24" - secSubnet = "10.10.20.0/24" - terSubnet = "10.10.30.0/24" - ipaddr01 = "10.10.10.11" - ipaddr02 = "10.10.10.12" - ipaddr03 = "10.10.10.13" - ipaddr11 = "10.10.20.11" - ipaddr12 = "10.10.20.12" - ipaddr21 = "10.10.30.11" - ipaddr22 = "10.10.30.12" - vpcCIDR = "10.10.0.0/16" - myNodeName = "testNodeName" - prefix01 = "10.10.30.0/28" - prefix02 = "10.10.40.0/28" - ipaddrPD01 = "10.10.30.0" - ipaddrPD02 = "10.10.40.0" - v6ipaddr01 = "2001:db8::1/128" - v6prefix01 = "2001:db8::/64" - instanceID = "i-0e1f3b9eb950e4980" + primaryENIid = "eni-00000000" + secENIid = "eni-00000001" + terENIid = "eni-00000002" + primaryMAC = "12:ef:2a:98:e5:5a" + secMAC = "12:ef:2a:98:e5:5b" + terMAC = "12:ef:2a:98:e5:5c" + primaryDevice = 0 + secDevice = 2 + terDevice = 3 + primarySubnet = "10.10.10.0/24" + secSubnet = "10.10.20.0/24" + terSubnet = "10.10.30.0/24" + ipaddr01 = "10.10.10.11" + ipaddr02 = "10.10.10.12" + ipaddr03 = "10.10.10.13" + ipaddr11 = "10.10.20.11" + ipaddr12 = "10.10.20.12" + ipaddr21 = "10.10.30.11" + ipaddr22 = "10.10.30.12" + vpcCIDR = "10.10.0.0/16" + myNodeName = "testNodeName" + prefix01 = "10.10.30.0/28" + prefix02 = "10.10.40.0/28" + ipaddrPD01 = "10.10.30.0" + ipaddrPD02 = "10.10.40.0" + v6ipaddr01 = "2001:db8::1/128" + v6prefix01 = "2001:db8::/64" + instanceID = "i-0e1f3b9eb950e4980" + externalEniConfigLabel = "vpc.amazonaws.com/externalEniConfig" ) type testMocks struct { @@ -1850,6 +1852,26 @@ func TestIPAMContext_askForTrunkENIIfNeeded(t *testing.T) { defer m.ctrl.Finish() ctx := context.Background() + fakeRecorder := events.NewFakeRecorder(3) + k8sSchema := runtime.NewScheme() + clientgoscheme.AddToScheme(k8sSchema) + + mockEventRecorder := &eventrecorder.EventRecorder{ + Recorder: fakeRecorder, + RawK8SClient: testclient.NewClientBuilder().WithScheme(k8sSchema).Build(), + CachedK8SClient: testclient.NewClientBuilder().WithScheme(k8sSchema).Build(), + } + eventrecorder.MyNodeName = myNodeName //constant set for node in tests + + fakeNode := v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{Name: myNodeName}, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + } + + _ = mockEventRecorder.CachedK8SClient.Create(ctx, &fakeNode) + mockContext := &IPAMContext{ rawK8SClient: m.rawK8SClient, cachedK8SClient: m.cachedK8SClient, @@ -1860,12 +1882,13 @@ func TestIPAMContext_askForTrunkENIIfNeeded(t *testing.T) { terminating: int32(0), maxENI: 1, myNodeName: myNodeName, + eventRecorder: mockEventRecorder, } labels := map[string]string{ "testKey": "testValue", } - fakeNode := v1.Node{ + fakeNode = v1.Node{ TypeMeta: metav1.TypeMeta{Kind: "Node"}, ObjectMeta: metav1.ObjectMeta{Name: myNodeName, Labels: labels}, Spec: v1.NodeSpec{}, @@ -1875,31 +1898,279 @@ func TestIPAMContext_askForTrunkENIIfNeeded(t *testing.T) { _ = mockContext.dataStore.AddENI("eni-1", 1, true, false, false) // If ENABLE_POD_ENI is not set, nothing happens - mockContext.askForTrunkENIIfNeeded(ctx) + mockContext.askForTrunkENIIfNeeded(ctx, labels) + assert.Len(t, fakeRecorder.Events, 0) // No events mockContext.enablePodENI = true // Enabled, we should try to set the label if there is room - mockContext.askForTrunkENIIfNeeded(ctx) - var notUpdatedNode corev1.Node - var updatedNode corev1.Node - NodeKey := types.NamespacedName{ - Namespace: "", - Name: myNodeName, - } - err := m.cachedK8SClient.Get(ctx, NodeKey, ¬UpdatedNode) + mockContext.askForTrunkENIIfNeeded(ctx, labels) + // Since there was no room, no label should be added - assert.NoError(t, err) - assert.Equal(t, 1, len(notUpdatedNode.Labels)) + assert.Len(t, fakeRecorder.Events, 0) // No events mockContext.maxENI = 4 // Now there is room! - mockContext.askForTrunkENIIfNeeded(ctx) + mockContext.askForTrunkENIIfNeeded(ctx, labels) - // Fetch the updated node and verify that the label is set - //updatedNode, err := m.clientset.CoreV1().Nodes().Get(myNodeName, metav1.GetOptions{}) - err = m.cachedK8SClient.Get(ctx, NodeKey, &updatedNode) - assert.NoError(t, err) - assert.Equal(t, "false", updatedNode.Labels["vpc.amazonaws.com/has-trunk-attached"]) + assert.Len(t, fakeRecorder.Events, 1) + + expected := fmt.Sprintf("%s %s %s", v1.EventTypeNormal, sgpp.VpcCNIEventReason, sgpp.TrunkEventNote) + got := <-fakeRecorder.Events + assert.Equal(t, expected, got) + + // has trunk but not label, send the event + _ = mockContext.dataStore.AddENI("eni-2", 2, true, true, false) + mockContext.askForTrunkENIIfNeeded(ctx, labels) + assert.Len(t, fakeRecorder.Events, 1) + expected = fmt.Sprintf("%s %s %s", v1.EventTypeNormal, sgpp.VpcCNIEventReason, sgpp.TrunkEventNote) + assert.Equal(t, expected, <-fakeRecorder.Events) +} + +func TestIPAMContext_askForENIConfigIfNeeded(t *testing.T) { + os.Setenv("MY_NODE_NAME", myNodeName) + var m *testMocks + + eniConfigName := "testConfig" + + tests := []struct { + node v1.Node + events int + hasError bool + msg string + }{ + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: map[string]string{externalEniConfigLabel: eniConfigName}, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + events: 1, + hasError: false, + msg: "Having external EniCnnfig Label but not having EniConfig Label on node", + }, + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: map[string]string{vpcENIConfigLabel: eniConfigName}, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + events: 0, + hasError: false, + msg: "Having EniConfig Label on node", + }, + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: map[string]string{"labelKey": "value"}, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + events: 0, + hasError: false, + msg: "Labels map doesn't have any required labels", + }, + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: make(map[string]string), + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + events: 0, + hasError: false, + msg: "Having empty map", + }, + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: nil, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + events: 0, + hasError: false, + msg: "Having nil map", + }, + } + + for _, test := range tests { + m = setup(t) + ctx := context.Background() + fakeRecorder := events.NewFakeRecorder(3) + k8sSchema := runtime.NewScheme() + clientgoscheme.AddToScheme(k8sSchema) + mockEventRecorder := &eventrecorder.EventRecorder{ + Recorder: fakeRecorder, + RawK8SClient: testclient.NewClientBuilder().WithScheme(k8sSchema).Build(), + CachedK8SClient: testclient.NewClientBuilder().WithScheme(k8sSchema).Build(), + } + fakeNode := v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{Name: myNodeName}, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + } + _ = mockEventRecorder.CachedK8SClient.Create(ctx, &fakeNode) + eventrecorder.MyNodeName = myNodeName + mockContext := &IPAMContext{ + rawK8SClient: m.rawK8SClient, + cachedK8SClient: m.cachedK8SClient, + dataStore: datastore.NewDataStore(log, datastore.NewTestCheckpoint(datastore.CheckpointData{Version: datastore.CheckpointFormatVersion}), false), + awsClient: m.awsutils, + networkClient: m.network, + primaryIP: make(map[string]string), + terminating: int32(0), + maxENI: 1, + myNodeName: myNodeName, + eventRecorder: mockEventRecorder, + } + + _ = m.cachedK8SClient.Create(ctx, &test.node) + + err := mockContext.askForENIConfigIfNeeded(ctx, test.node.Labels) + assert.Equal(t, test.hasError, err != nil, test.msg) + assert.Len(t, fakeRecorder.Events, test.events, test.msg) + + if test.events > 0 { + expected := fmt.Sprintf("%s %s %s", v1.EventTypeNormal, sgpp.VpcCNIEventReason, vpcENIConfigLabel+"="+eniConfigName) + got := <-fakeRecorder.Events + assert.Equal(t, expected, got) + } + } + + m.ctrl.Finish() +} + +func TestGetNodeLabels(t *testing.T) { + os.Setenv("MY_NODE_NAME", myNodeName) + var m *testMocks + + eniConfigName := "testConfig" + + tests := []struct { + node v1.Node + hasError bool + labelsLen int + hasLabel bool + msg string + }{ + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: map[string]string{vpcENIConfigLabel: eniConfigName}, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + hasError: false, + hasLabel: true, + labelsLen: 1, + msg: "Having EniConfig Label on node", + }, + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: map[string]string{"key_1": "value_1", "key_2": "value_2"}, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + hasError: false, + hasLabel: true, + labelsLen: 2, + msg: "Having irrelevant labels map", + }, + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: make(map[string]string), + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + hasError: false, + hasLabel: false, + msg: "Having empty map", + }, + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: myNodeName, + Labels: nil, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + hasError: false, + hasLabel: false, + msg: "Having nil map", + }, + { + node: v1.Node{ + TypeMeta: metav1.TypeMeta{Kind: "Node"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Labels: map[string]string{vpcENIConfigLabel: eniConfigName}, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + }, + hasError: true, + hasLabel: false, + labelsLen: 0, + msg: "Wrong node name", + }, + } + + for _, test := range tests { + m = setup(t) + ctx := context.Background() + k8sSchema := runtime.NewScheme() + clientgoscheme.AddToScheme(k8sSchema) + + mockContext := &IPAMContext{ + rawK8SClient: m.rawK8SClient, + cachedK8SClient: m.cachedK8SClient, + dataStore: datastore.NewDataStore(log, datastore.NewTestCheckpoint(datastore.CheckpointData{Version: datastore.CheckpointFormatVersion}), false), + awsClient: m.awsutils, + networkClient: m.network, + primaryIP: make(map[string]string), + terminating: int32(0), + myNodeName: myNodeName, + } + + _ = m.cachedK8SClient.Create(ctx, &test.node) + labels, err := mockContext.getNodeLabels(ctx) + assert.Equal(t, test.hasError, err != nil, test.msg) + assert.Equal(t, test.hasLabel, labels != nil, test.msg) + assert.Equal(t, test.labelsLen, len(labels), test.msg) + } + + m.ctrl.Finish() } func TestIsConfigValid(t *testing.T) { diff --git a/pkg/sgpp/constants.go b/pkg/sgpp/constants.go index 5895bf01f3..fcfd7abcef 100644 --- a/pkg/sgpp/constants.go +++ b/pkg/sgpp/constants.go @@ -3,8 +3,12 @@ package sgpp type EnforcingMode string const ( - EnforcingModeStrict EnforcingMode = "strict" - EnforcingModeStandard EnforcingMode = "standard" + EnforcingModeStrict EnforcingMode = "strict" + EnforcingModeStandard EnforcingMode = "standard" + VpcCNINodeEventActionForTrunk string = "NeedTrunk" + TrunkEventNote string = "vpc.amazonaws.com/has-trunk-attached=false" + VpcCNINodeEventActionForEniConfig string = "NeedEniConfig" + VpcCNIEventReason string = "AwsNodeNotificationToRc" ) const ( diff --git a/pkg/utils/eventrecorder/eventrecorder.go b/pkg/utils/eventrecorder/eventrecorder.go index 9ebe831bc5..123fe02bd0 100644 --- a/pkg/utils/eventrecorder/eventrecorder.go +++ b/pkg/utils/eventrecorder/eventrecorder.go @@ -16,63 +16,56 @@ package eventrecorder import ( "context" - "errors" "os" "github.com/aws/amazon-vpc-cni-k8s/pkg/k8sapi" + "github.com/aws/amazon-vpc-cni-k8s/pkg/sgpp" "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/logger" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" - typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" - "k8s.io/client-go/tools/record" + "k8s.io/client-go/tools/events" "sigs.k8s.io/controller-runtime/pkg/client" ) var log = logger.Get() -var myNodeName = os.Getenv("MY_NODE_NAME") -var eventRecorder *EventRecorder +var MyNodeName = os.Getenv("MY_NODE_NAME") const ( awsNode = "aws-node" specNodeName = "spec.nodeName" labelK8sapp = "k8s-app" + EventReason = sgpp.VpcCNIEventReason ) type EventRecorder struct { - recorder record.EventRecorder - k8sClient client.Client + Recorder events.EventRecorder + RawK8SClient client.Client + CachedK8SClient client.Client } -func InitEventRecorder(k8sClient client.Client) error { +func New(rawK8SClient, cachedK8SClient client.Client) (*EventRecorder, error) { clientSet, err := k8sapi.GetKubeClientSet() if err != nil { log.Fatalf("Error Fetching Kubernetes Client: %s", err) - return err + return nil, err } - eventBroadcaster := record.NewBroadcaster() - eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{ - Interface: clientSet.CoreV1().Events(""), + eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{ + Interface: clientSet.EventsV1(), }) + stopCh := make(chan struct{}) + eventBroadcaster.StartRecordingToSink(stopCh) - recorder := &EventRecorder{} - recorder.recorder = eventBroadcaster.NewRecorder(clientgoscheme.Scheme, corev1.EventSource{ - Component: awsNode, - Host: myNodeName, - }) - recorder.k8sClient = k8sClient - eventRecorder = recorder - return nil -} + eventRecorder := &EventRecorder{} + eventRecorder.Recorder = eventBroadcaster.NewRecorder(clientgoscheme.Scheme, "aws-node") + eventRecorder.RawK8SClient = rawK8SClient + eventRecorder.CachedK8SClient = cachedK8SClient + + return eventRecorder, nil -func Get() *EventRecorder { - if eventRecorder == nil { - err := errors.New("error fetching event recoder, not initialized") - panic(err.Error()) - } - return eventRecorder } // BroadcastEvent will raise event on aws-node with given type, reason, & message @@ -80,20 +73,54 @@ func (e *EventRecorder) BroadcastEvent(eventType, reason, message string) { // Get aws-node pod objects with label & field selectors labelSelector := labels.SelectorFromSet(labels.Set{labelK8sapp: awsNode}) - fieldSelector := fields.SelectorFromSet(fields.Set{specNodeName: myNodeName}) + fieldSelector := fields.SelectorFromSet(fields.Set{specNodeName: MyNodeName}) listOptions := client.ListOptions{ LabelSelector: labelSelector, FieldSelector: fieldSelector, } var podList corev1.PodList - err := e.k8sClient.List(context.TODO(), &podList, &listOptions) + err := e.RawK8SClient.List(context.TODO(), &podList, &listOptions) if err != nil { log.Errorf("Failed to get pods, cannot broadcast events: %v", err) return } for _, pod := range podList.Items { log.Debugf("Broadcasting event on pod %s", pod.Name) - e.recorder.Event(&pod, eventType, reason, message) + e.Recorder.Eventf(&pod, nil, eventType, reason, "", message) + } +} + +func (e *EventRecorder) findMyNode(ctx context.Context) (corev1.Node, error) { + var node corev1.Node + // Find my node + err := e.CachedK8SClient.Get(ctx, types.NamespacedName{Name: MyNodeName}, &node) + if err != nil { + log.Errorf("Cached client failed GET node (%s)", MyNodeName) + } else { + log.Debugf("Node found %s - labels - %d", node.Name, len(node.Labels)) } + return node, err +} + +// SendNodeEvent sends an event regarding node object +func (e *EventRecorder) SendNodeEvent(eventType, reason, action, message string) error { + // Find my node + node, err := e.findMyNode(context.TODO()) + if err != nil { + log.Errorf("Failed to get node: %v", err) + return err + } + + // make a copy before modifying the UID + // Note: kubectl uses the filter involvedObject.uid=NodeName to fetch the events + // that are listed in 'kubectl describe node' output. So setting the node UID to + // nodename before sending the event + nodeCopy := node.DeepCopy() + nodeCopy.SetUID(types.UID(MyNodeName)) + + e.Recorder.Eventf(nodeCopy, nil, eventType, reason, action, message) + log.Debugf("Sent node event: eventType: %s, reason: %s, message: %s, action %s", eventType, reason, message, action) + + return nil } diff --git a/pkg/utils/eventrecorder/eventrecorder_test.go b/pkg/utils/eventrecorder/eventrecorder_test.go index 314ae9e103..3d37d15a01 100644 --- a/pkg/utils/eventrecorder/eventrecorder_test.go +++ b/pkg/utils/eventrecorder/eventrecorder_test.go @@ -16,36 +16,41 @@ package eventrecorder import ( "context" "fmt" + "strings" "testing" + "github.com/aws/amazon-vpc-cni-k8s/pkg/sgpp" "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" - "k8s.io/client-go/tools/record" + "k8s.io/client-go/tools/events" "sigs.k8s.io/controller-runtime/pkg/client" testclient "sigs.k8s.io/controller-runtime/pkg/client/fake" ) var ctrl *gomock.Controller -var fakeRecorder *record.FakeRecorder +var fakeRecorder *events.FakeRecorder type testMocks struct { - ctrl *gomock.Controller - mockK8sClient client.Client + ctrl *gomock.Controller + mockK8SClient client.Client + mockcachedK8SClient client.Client } func setup(t *testing.T) *testMocks { ctrl = gomock.NewController(t) k8sSchema := runtime.NewScheme() - k8sClient := testclient.NewFakeClientWithScheme(k8sSchema) + K8SClient := testclient.NewClientBuilder().WithScheme(k8sSchema).Build() + cachedK8SClient := testclient.NewClientBuilder().WithScheme(k8sSchema).Build() clientgoscheme.AddToScheme(k8sSchema) return &testMocks{ - ctrl: ctrl, - mockK8sClient: k8sClient, + ctrl: ctrl, + mockK8SClient: K8SClient, + mockcachedK8SClient: cachedK8SClient, } } @@ -54,10 +59,11 @@ func TestBroadcastEvents(t *testing.T) { defer m.ctrl.Finish() ctx := context.Background() - fakeRecorder = record.NewFakeRecorder(3) + fakeRecorder = events.NewFakeRecorder(3) mockEventRecorder := &EventRecorder{ - recorder: fakeRecorder, - k8sClient: m.mockK8sClient, + Recorder: fakeRecorder, + RawK8SClient: m.mockK8SClient, + CachedK8SClient: m.mockcachedK8SClient, } labels := map[string]string{"k8s-app": "aws-node"} @@ -69,7 +75,7 @@ func TestBroadcastEvents(t *testing.T) { Labels: labels, }, Spec: v1.PodSpec{ - NodeName: myNodeName, + NodeName: MyNodeName, }, }, { @@ -77,7 +83,7 @@ func TestBroadcastEvents(t *testing.T) { Name: "mockPodWithSpec", }, Spec: v1.PodSpec{ - NodeName: myNodeName, + NodeName: MyNodeName, }, }, { @@ -90,7 +96,7 @@ func TestBroadcastEvents(t *testing.T) { //Create above fake pods for _, mockPod := range pods { - _ = mockEventRecorder.k8sClient.Create(ctx, &mockPod) + _ = mockEventRecorder.RawK8SClient.Create(ctx, &mockPod) } // Testing missing permissions event case: failed to call @@ -103,3 +109,47 @@ func TestBroadcastEvents(t *testing.T) { got := <-fakeRecorder.Events assert.Equal(t, expected, got) } + +func TestSendNodeEvent(t *testing.T) { + m := setup(t) + defer m.ctrl.Finish() + ctx := context.Background() + MyNodeName = "test-node" + + fakeRecorder = events.NewFakeRecorder(3) + mockEventRecorder := &EventRecorder{ + Recorder: fakeRecorder, + RawK8SClient: m.mockK8SClient, + CachedK8SClient: m.mockcachedK8SClient, + } + + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: MyNodeName, + }, + } + + labels := map[string]string{"k8s-app": "aws-node"} + + pod := v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mockPodWithLabelAndSpec", + Labels: labels, + }, + Spec: v1.PodSpec{ + NodeName: MyNodeName, + }, + } + + mockEventRecorder.CachedK8SClient.Create(ctx, &node) + mockEventRecorder.RawK8SClient.Create(ctx, &pod) + reason := sgpp.VpcCNIEventReason + msg := sgpp.TrunkEventNote + action := sgpp.VpcCNINodeEventActionForTrunk + mockEventRecorder.SendNodeEvent(v1.EventTypeNormal, reason, action, msg) + assert.Len(t, fakeRecorder.Events, 1) + + sgpEvent := <-fakeRecorder.Events + assert.True(t, strings.Contains(sgpEvent, reason) && strings.Contains(sgpEvent, msg)) + +} diff --git a/test/framework/framework.go b/test/framework/framework.go index fe69b5cd4e..14717473c1 100644 --- a/test/framework/framework.go +++ b/test/framework/framework.go @@ -14,6 +14,9 @@ package framework import ( + "context" + "log" + eniConfig "github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1" "github.com/aws/amazon-vpc-cni-k8s/test/framework/controller" "github.com/aws/amazon-vpc-cni-k8s/test/framework/helm" @@ -23,9 +26,13 @@ import ( sgp "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" "github.com/go-logr/logr" . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + eventsv1 "k8s.io/api/events/v1" "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/clientcmd" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -53,9 +60,30 @@ func New(options Options) *Framework { eniConfig.AddToScheme(k8sSchema) sgp.AddToScheme(k8sSchema) - k8sClient, err := client.New(config, client.Options{Scheme: k8sSchema}) + cache, err := cache.New(config, cache.Options{Scheme: k8sSchema}) + Expect(err).NotTo(HaveOccurred()) + err = cache.IndexField(context.TODO(), &eventsv1.Event{}, "reason", func(o client.Object) []string { + return []string{o.(*v1.Event).Reason} + }) // default indexing only on ns, need this for ipamd_event_test + Expect(err).NotTo(HaveOccurred()) + + // set up a context for the signals in tests + stopChan := ctrl.SetupSignalHandler() + go func() { + cache.Start(stopChan) + }() + cache.WaitForCacheSync(stopChan) + realClient, err := client.New(config, client.Options{Scheme: k8sSchema}) Expect(err).NotTo(HaveOccurred()) + k8sClient, err := client.NewDelegatingClient(client.NewDelegatingClientInput{ + CacheReader: cache, + Client: realClient, + }) + if err != nil { + log.Fatalf("failed to create delegation client: %v", err) + } + cloudConfig := aws.CloudConfig{Region: options.AWSRegion, VpcID: options.AWSVPCID, EKSEndpoint: options.EKSEndpoint} diff --git a/test/framework/resources/k8s/resources/events.go b/test/framework/resources/k8s/resources/events.go index 9592c3b1b5..2517903e7b 100644 --- a/test/framework/resources/k8s/resources/events.go +++ b/test/framework/resources/k8s/resources/events.go @@ -16,12 +16,12 @@ package resources import ( "context" - v1 "k8s.io/api/core/v1" + eventsv1 "k8s.io/api/events/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) type EventManager interface { - GetEventsWithOptions(opts *client.ListOptions) (v1.EventList, error) + GetEventsWithOptions(opts *client.ListOptions) (eventsv1.EventList, error) } type defaultEventManager struct { @@ -32,8 +32,8 @@ func NewEventManager(k8sClient client.Client) EventManager { return &defaultEventManager{k8sClient: k8sClient} } -func (d defaultEventManager) GetEventsWithOptions(opts *client.ListOptions) (v1.EventList, error) { - eventList := v1.EventList{} +func (d defaultEventManager) GetEventsWithOptions(opts *client.ListOptions) (eventsv1.EventList, error) { + eventList := eventsv1.EventList{} err := d.k8sClient.List(context.Background(), &eventList, opts) return eventList, err } From bc1afd3923b60711f63124131997599de0d93918 Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Fri, 13 Jan 2023 11:56:22 -0600 Subject: [PATCH 3/5] restore node update permission in manifest (#2213) --- config/master/aws-k8s-cni-cn.yaml | 2 +- config/master/aws-k8s-cni-us-gov-east-1.yaml | 2 +- config/master/aws-k8s-cni-us-gov-west-1.yaml | 2 +- config/master/aws-k8s-cni.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/master/aws-k8s-cni-cn.yaml b/config/master/aws-k8s-cni-cn.yaml index b21d85329f..a74e325081 100644 --- a/config/master/aws-k8s-cni-cn.yaml +++ b/config/master/aws-k8s-cni-cn.yaml @@ -61,7 +61,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get"] + verbs: ["list", "watch", "get", "update"] - apiGroups: ["extensions"] resources: - '*' diff --git a/config/master/aws-k8s-cni-us-gov-east-1.yaml b/config/master/aws-k8s-cni-us-gov-east-1.yaml index 58ab644305..a7f534324c 100644 --- a/config/master/aws-k8s-cni-us-gov-east-1.yaml +++ b/config/master/aws-k8s-cni-us-gov-east-1.yaml @@ -61,7 +61,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get"] + verbs: ["list", "watch", "get", "update"] - apiGroups: ["extensions"] resources: - '*' diff --git a/config/master/aws-k8s-cni-us-gov-west-1.yaml b/config/master/aws-k8s-cni-us-gov-west-1.yaml index d81bff045b..4f7e79f5b6 100644 --- a/config/master/aws-k8s-cni-us-gov-west-1.yaml +++ b/config/master/aws-k8s-cni-us-gov-west-1.yaml @@ -61,7 +61,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get"] + verbs: ["list", "watch", "get", "update"] - apiGroups: ["extensions"] resources: - '*' diff --git a/config/master/aws-k8s-cni.yaml b/config/master/aws-k8s-cni.yaml index 2eac923085..28013abeda 100644 --- a/config/master/aws-k8s-cni.yaml +++ b/config/master/aws-k8s-cni.yaml @@ -61,7 +61,7 @@ rules: - apiGroups: [""] resources: - nodes - verbs: ["list", "watch", "get"] + verbs: ["list", "watch", "get", "update"] - apiGroups: ["extensions"] resources: - '*' From daf5ccd3cfbbd078e2fa5640bf22d123e5c43b2f Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Fri, 13 Jan 2023 15:33:00 -0600 Subject: [PATCH 4/5] test: fix event type (#2215) --- test/framework/framework.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/framework/framework.go b/test/framework/framework.go index 14717473c1..e293fb788c 100644 --- a/test/framework/framework.go +++ b/test/framework/framework.go @@ -17,23 +17,23 @@ import ( "context" "log" - eniConfig "github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/controller" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/helm" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/aws" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" sgp "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" "github.com/go-logr/logr" . "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" - eventsv1 "k8s.io/api/events/v1" "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/clientcmd" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" + + eniConfig "github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/controller" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/helm" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/aws" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" ) type Framework struct { @@ -62,7 +62,7 @@ func New(options Options) *Framework { cache, err := cache.New(config, cache.Options{Scheme: k8sSchema}) Expect(err).NotTo(HaveOccurred()) - err = cache.IndexField(context.TODO(), &eventsv1.Event{}, "reason", func(o client.Object) []string { + err = cache.IndexField(context.TODO(), &v1.Event{}, "reason", func(o client.Object) []string { return []string{o.(*v1.Event).Reason} }) // default indexing only on ns, need this for ipamd_event_test Expect(err).NotTo(HaveOccurred()) From ebbdcad058433207f4bf8c164f61867c19cbaf00 Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Tue, 17 Jan 2023 12:01:28 -0600 Subject: [PATCH 5/5] login to ECR public before docker-build step (#2217) --- .github/workflows/pr-automated-tests.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/pr-automated-tests.yaml b/.github/workflows/pr-automated-tests.yaml index dbe12882fb..5b6f2e4093 100644 --- a/.github/workflows/pr-automated-tests.yaml +++ b/.github/workflows/pr-automated-tests.yaml @@ -42,6 +42,17 @@ jobs: steps: - name: Checkout latest commit in the PR uses: actions/checkout@v3 + - name: Set up AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: ${{ secrets.OSS_TEST_ROLE_ARN }} + role-duration-seconds: 14400 # 4 hours + aws-region: ${{ secrets.AWS_DEFAULT_REGION }} + - name: Login to Amazon ECR Public + id: login-ecr-public + uses: aws-actions/amazon-ecr-login@v1 + with: + registry-type: public - name: Set up QEMU uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx