diff --git a/.github/workflows/build-multi-arch.yml b/.github/workflows/build-multi-arch.yml index 664b3a3157..03019a6b31 100644 --- a/.github/workflows/build-multi-arch.yml +++ b/.github/workflows/build-multi-arch.yml @@ -29,26 +29,27 @@ jobs: - name: Build run: make docker all - - build_arm: - name: Build ARM64 - runs-on: [self-hosted, linux, arm64] - steps: - - - name: Set up Go 1.18 - uses: actions/setup-go@v2 - with: - go-version: '1.18' - id: go - - - name: Check out code into the Go module directory - uses: actions/checkout@v2 - - - name: Clean up docker - run: docker system prune -a --volumes -f - - - name: Clean up stale docker images - run: sudo docker image prune -f - - - name: Build - run: make docker all + + # Disable arm-build + # build_arm: + # name: Build ARM64 + # runs-on: [self-hosted, linux, arm64] + # steps: + + # - name: Set up Go 1.18 + # uses: actions/setup-go@v2 + # with: + # go-version: '1.18' + # id: go + + # - name: Check out code into the Go module directory + # uses: actions/checkout@v2 + + # - name: Clean up docker + # run: docker system prune -a --volumes -f + + # - name: Clean up stale docker images + # run: sudo docker image prune -f + + # - name: Build + # run: make docker all diff --git a/.github/workflows/stale_issue_pr.yaml b/.github/workflows/stale_issue_pr.yaml index f69a7f71ed..6547f33beb 100644 --- a/.github/workflows/stale_issue_pr.yaml +++ b/.github/workflows/stale_issue_pr.yaml @@ -23,5 +23,3 @@ jobs: operations-per-run: 100 stale-issue-message: 'This issue is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 14 days' stale-pr-message: 'This pull request is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 14 days' - - name: Print outputs - run: echo ${{ join(steps.stale.outputs.*, ',') }} diff --git a/Makefile b/Makefile index 375f29c630..36bf61833d 100644 --- a/Makefile +++ b/Makefile @@ -99,6 +99,8 @@ MULTI_PLATFORM_BUILD_TARGETS = linux/amd64,linux/arm64 # Default to building an executable using the host's Go toolchain. .DEFAULT_GOAL = build-linux +##@ Building + # Build both CNI and metrics helper container images. all: docker docker-init docker-metrics ## Builds Init, CNI and metrics helper container images. @@ -137,6 +139,23 @@ docker-func-test: docker ## Run the built CNI container image to use in func docker run $(DOCKER_RUN_FLAGS) \ "$(IMAGE_NAME)" +multi-arch-cni-build-push: ## Build multi-arch VPC CNI container image. + docker buildx build $(DOCKER_BUILD_FLAGS) \ + -f scripts/dockerfiles/Dockerfile.release \ + --platform "$(MULTI_PLATFORM_BUILD_TARGETS)"\ + -t "$(IMAGE_NAME)" \ + --push \ + . + +multi-arch-cni-init-build-push: ## Build VPC CNI plugin Init container image. + docker buildx build $(DOCKER_BUILD_FLAGS) \ + -f scripts/dockerfiles/Dockerfile.init \ + --platform "$(MULTI_PLATFORM_BUILD_TARGETS)"\ + -t "$(INIT_IMAGE_NAME)" \ + --push \ + . + +##@ Run Unit Tests # Run unit tests unit-test: export AWS_VPC_K8S_CNI_LOG_FILE=stdout unit-test: ## Run unit tests @@ -155,6 +174,7 @@ unit-test-race: ## Run unit tests with race detection (can only be run nativ go test -v -cover -race -timeout 10s ./pkg/eniconfig/... go test -v -cover -race -timeout 10s ./pkg/ipamd/... +##@ Build and Run Unit Tests # Build the unit test driver container image. build-docker-test: ## Build the unit test driver container image. docker build $(DOCKER_BUILD_FLAGS) \ @@ -167,6 +187,8 @@ docker-unit-tests: build-docker-test ## Run unit tests inside of the testing docker run $(DOCKER_RUN_ARGS) \ $(TEST_IMAGE_NAME) \ make unit-test + +##@ Build metrics helper agent # Build metrics helper agent. build-metrics: ## Build metrics helper agent. @@ -180,6 +202,8 @@ docker-metrics: ## Build metrics helper agent Docker image. . @echo "Built Docker image \"$(METRICS_IMAGE_NAME)\"" +##@ Run metrics helper Unit Tests + # Run metrics helper unit test suite (must be run natively). metrics-unit-test: CGO_ENABLED=1 metrics-unit-test: GOARCH= @@ -195,31 +219,6 @@ docker-metrics-test: ## Run metrics helper unit test suite in a container. $(GOLANG_IMAGE) \ make metrics-unit-test -generate: - PATH=$(CURDIR)/scripts:$(PATH) go generate -x ./... - $(MAKE) format - -# Generate limit file go code -# Generate eni-max-pods.txt file for EKS AMI -generate-limits: GOOS= -generate-limits: ## Generate limit file go code - go run $(VENDOR_OVERRIDE_FLAG) scripts/gen_vpc_ip_limits.go - -multi-arch-cni-build-push: ## Build multi-arch VPC CNI container image. - docker buildx build $(DOCKER_BUILD_FLAGS) \ - -f scripts/dockerfiles/Dockerfile.release \ - --platform "$(MULTI_PLATFORM_BUILD_TARGETS)"\ - -t "$(IMAGE_NAME)" \ - --push \ - . - -multi-arch-cni-init-build-push: ## Build VPC CNI plugin Init container image. - docker buildx build $(DOCKER_BUILD_FLAGS) \ - -f scripts/dockerfiles/Dockerfile.init \ - --platform "$(MULTI_PLATFORM_BUILD_TARGETS)"\ - -t "$(INIT_IMAGE_NAME)" \ - --push \ - . # Fetch the CNI plugins plugins: FETCH_VERSION=1.1.1 plugins: FETCH_URL=https://github.com/containernetworking/plugins/releases/download/v$(FETCH_VERSION)/cni-plugins-$(GOOS)-$(GOARCH)-v$(FETCH_VERSION).tgz @@ -232,6 +231,8 @@ plugins: ## Fetch the CNI plugins @echo curl -L $(FETCH_URL) | tar -zx $(PLUGIN_BINS) +##@ Debug script + debug-script: FETCH_URL=https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh debug-script: VISIT_URL=https://github.com/awslabs/amazon-eks-ami/tree/master/log-collector-script/linux debug-script: ## Fetching debug script from awslabs/amazon-eks-ami @@ -243,6 +244,8 @@ debug-script: ## Fetching debug script from awslabs/amazon-eks-ami curl -L $(FETCH_URL) -o ./aws-cni-support.sh chmod +x ./aws-cni-support.sh +##@ Formatting + # Run all source code checks. check: check-format lint vet ## Run all source code checks. @@ -289,6 +292,18 @@ check-format: format version: @echo ${VERSION} +##@ Generate ENI/IP limits + +generate: + PATH=$(CURDIR)/scripts:$(PATH) go generate -x ./... + $(MAKE) format + +# Generate limit file go code +# Generate eni-max-pods.txt file for EKS AMI +generate-limits: GOOS= +generate-limits: ## Generate limit file go code + go run $(VENDOR_OVERRIDE_FLAG) scripts/gen_vpc_ip_limits.go + ekscharts-sync: ${MAKEFILE_PATH}/scripts/sync-to-eks-charts.sh -b ${HELM_CHART_NAME} -r ${REPO_FULL_NAME} @@ -315,6 +330,8 @@ cleanup-ec2-sdk-override: @if [ "$(EC2_SDK_OVERRIDE)" = "y" ] ; then \ ./scripts/ec2_model_override/cleanup.sh ; \ fi + +##@ Cleanup # Clean temporary files and build artifacts from the project. clean: ## Clean temporary files and build artifacts from the project. @@ -322,6 +339,7 @@ clean: ## Clean temporary files and build artifacts from the project. @rm -f -- $(PLUGIN_BINS) @rm -f -- coverage.txt -help: ## Show this help. - @grep -F -h "##" $(MAKEFILE_LIST) | grep -F -v grep | sed -e 's/\\$$//' \ - | awk -F'[:#]' '{print $$1 = sprintf("%-30s", $$1), $$4}' +##@ Helpers + +help: ## Display this help + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) diff --git a/charts/aws-vpc-cni/Chart.yaml b/charts/aws-vpc-cni/Chart.yaml index 2590623425..96994598eb 100644 --- a/charts/aws-vpc-cni/Chart.yaml +++ b/charts/aws-vpc-cni/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: aws-vpc-cni -version: 1.1.19 +version: 1.1.20 appVersion: "v1.11.3" description: A Helm chart for the AWS VPC CNI icon: https://raw.githubusercontent.com/aws/eks-charts/master/docs/logo/aws.png diff --git a/charts/aws-vpc-cni/templates/clusterrole.yaml b/charts/aws-vpc-cni/templates/clusterrole.yaml index c7102ae444..fb096ef0f3 100644 --- a/charts/aws-vpc-cni/templates/clusterrole.yaml +++ b/charts/aws-vpc-cni/templates/clusterrole.yaml @@ -36,4 +36,4 @@ rules: - apiGroups: ["", "events.k8s.io"] resources: - events - verbs: ["create", "patch", "list", "get"] + verbs: ["create", "patch", "list"] diff --git a/config/master/aws-k8s-cni-cn.yaml b/config/master/aws-k8s-cni-cn.yaml index a920ab6239..131e90eaa8 100644 --- a/config/master/aws-k8s-cni-cn.yaml +++ b/config/master/aws-k8s-cni-cn.yaml @@ -73,7 +73,7 @@ rules: - apiGroups: ["", "events.k8s.io"] resources: - events - verbs: ["create", "patch", "list", "get"] + verbs: ["create", "patch", "list"] --- # Source: aws-vpc-cni/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 diff --git a/config/master/aws-k8s-cni-us-gov-east-1.yaml b/config/master/aws-k8s-cni-us-gov-east-1.yaml index f2bf3891b8..aa9c8d6b4f 100644 --- a/config/master/aws-k8s-cni-us-gov-east-1.yaml +++ b/config/master/aws-k8s-cni-us-gov-east-1.yaml @@ -73,7 +73,7 @@ rules: - apiGroups: ["", "events.k8s.io"] resources: - events - verbs: ["create", "patch", "list", "get"] + verbs: ["create", "patch", "list"] --- # Source: aws-vpc-cni/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 diff --git a/config/master/aws-k8s-cni-us-gov-west-1.yaml b/config/master/aws-k8s-cni-us-gov-west-1.yaml index 1f9b784c5b..79eca90deb 100644 --- a/config/master/aws-k8s-cni-us-gov-west-1.yaml +++ b/config/master/aws-k8s-cni-us-gov-west-1.yaml @@ -73,7 +73,7 @@ rules: - apiGroups: ["", "events.k8s.io"] resources: - events - verbs: ["create", "patch", "list", "get"] + verbs: ["create", "patch", "list"] --- # Source: aws-vpc-cni/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 diff --git a/config/master/aws-k8s-cni.yaml b/config/master/aws-k8s-cni.yaml index 53a1f82b57..4684af9638 100644 --- a/config/master/aws-k8s-cni.yaml +++ b/config/master/aws-k8s-cni.yaml @@ -73,7 +73,7 @@ rules: - apiGroups: ["", "events.k8s.io"] resources: - events - verbs: ["create", "patch", "list", "get"] + verbs: ["create", "patch", "list"] --- # Source: aws-vpc-cni/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 diff --git a/config/multus/v3.9.0-eksbuild.2/Readme.md b/config/multus/v3.9.0-eksbuild.2/Readme.md new file mode 100644 index 0000000000..0995327b3f --- /dev/null +++ b/config/multus/v3.9.0-eksbuild.2/Readme.md @@ -0,0 +1,2 @@ +## Changelog +Addressed CVE's \ No newline at end of file diff --git a/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-cn.yaml b/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-cn.yaml new file mode 100644 index 0000000000..a4accedb33 --- /dev/null +++ b/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-cn.yaml @@ -0,0 +1,165 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: network-attachment-definitions.k8s.cni.cncf.io +spec: + group: k8s.cni.cncf.io + scope: Namespaced + names: + plural: network-attachment-definitions + singular: network-attachment-definition + kind: NetworkAttachmentDefinition + shortNames: + - net-attach-def + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + description: 'NetworkAttachmentDefinition is a CRD schema specified by the Network Plumbing + Working Group to express the intent for attaching pods to one or more logical or physical + networks. More information available at: https://github.com/k8snetworkplumbingwg/multi-net-spec' + type: object + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this represen + tation of an object. Servers should convert recognized schemas to the + latest internal value, and may reject unrecognized values. More info: + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: 'NetworkAttachmentDefinition spec defines the desired state of a network attachment' + type: object + properties: + config: + description: 'NetworkAttachmentDefinition config is a JSON-formatted CNI configuration' + type: string +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +rules: + - apiGroups: ["k8s.cni.cncf.io"] + resources: + - '*' + verbs: + - '*' + - apiGroups: + - "" + resources: + - pods + - pods/status + verbs: + - get + - update + - apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - create + - patch + - update +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: multus +subjects: +- kind: ServiceAccount + name: multus + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: multus + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-multus-ds + namespace: kube-system + labels: + tier: node + app: multus + name: multus +spec: + selector: + matchLabels: + name: multus + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + tier: node + app: multus + name: multus + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux + - key: eks.amazonaws.com/compute-type + operator: NotIn + values: + - fargate + hostNetwork: true + tolerations: + - operator: Exists + effect: NoSchedule + serviceAccountName: multus + containers: + - name: kube-multus + image: 961992271922.dkr.ecr.cn-northwest-1.amazonaws.com.cn/eks/multus-cni:v3.9.0-eksbuild.2 + command: ["/entrypoint.sh"] + args: + - "--multus-conf-file=auto" + - "--cni-version=0.4.0" + - "--multus-master-cni-file-name=10-aws.conflist" + - "--multus-log-level=error" + - "--multus-log-file=/var/log/aws-routed-eni/multus.log" + resources: + requests: + cpu: "100m" + memory: "50Mi" + limits: + cpu: "100m" + memory: "50Mi" + securityContext: + privileged: true + volumeMounts: + - name: cni + mountPath: /host/etc/cni/net.d + - name: cnibin + mountPath: /host/opt/cni/bin + terminationGracePeriodSeconds: 10 + volumes: + - name: cni + hostPath: + path: /etc/cni/net.d + - name: cnibin + hostPath: + path: /opt/cni/bin \ No newline at end of file diff --git a/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-us-gov-east-1.yaml b/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-us-gov-east-1.yaml new file mode 100644 index 0000000000..2275364eaf --- /dev/null +++ b/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-us-gov-east-1.yaml @@ -0,0 +1,165 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: network-attachment-definitions.k8s.cni.cncf.io +spec: + group: k8s.cni.cncf.io + scope: Namespaced + names: + plural: network-attachment-definitions + singular: network-attachment-definition + kind: NetworkAttachmentDefinition + shortNames: + - net-attach-def + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + description: 'NetworkAttachmentDefinition is a CRD schema specified by the Network Plumbing + Working Group to express the intent for attaching pods to one or more logical or physical + networks. More information available at: https://github.com/k8snetworkplumbingwg/multi-net-spec' + type: object + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this represen + tation of an object. Servers should convert recognized schemas to the + latest internal value, and may reject unrecognized values. More info: + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: 'NetworkAttachmentDefinition spec defines the desired state of a network attachment' + type: object + properties: + config: + description: 'NetworkAttachmentDefinition config is a JSON-formatted CNI configuration' + type: string +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +rules: + - apiGroups: ["k8s.cni.cncf.io"] + resources: + - '*' + verbs: + - '*' + - apiGroups: + - "" + resources: + - pods + - pods/status + verbs: + - get + - update + - apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - create + - patch + - update +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: multus +subjects: +- kind: ServiceAccount + name: multus + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: multus + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-multus-ds + namespace: kube-system + labels: + tier: node + app: multus + name: multus +spec: + selector: + matchLabels: + name: multus + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + tier: node + app: multus + name: multus + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux + - key: eks.amazonaws.com/compute-type + operator: NotIn + values: + - fargate + hostNetwork: true + tolerations: + - operator: Exists + effect: NoSchedule + serviceAccountName: multus + containers: + - name: kube-multus + image: 151742754352.dkr.ecr.us-gov-east-1.amazonaws.com/eks/multus-cni:v3.9.0-eksbuild.2 + command: ["/entrypoint.sh"] + args: + - "--multus-conf-file=auto" + - "--cni-version=0.4.0" + - "--multus-master-cni-file-name=10-aws.conflist" + - "--multus-log-level=error" + - "--multus-log-file=/var/log/aws-routed-eni/multus.log" + resources: + requests: + cpu: "100m" + memory: "50Mi" + limits: + cpu: "100m" + memory: "50Mi" + securityContext: + privileged: true + volumeMounts: + - name: cni + mountPath: /host/etc/cni/net.d + - name: cnibin + mountPath: /host/opt/cni/bin + terminationGracePeriodSeconds: 10 + volumes: + - name: cni + hostPath: + path: /etc/cni/net.d + - name: cnibin + hostPath: + path: /opt/cni/bin diff --git a/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-us-gov-west-1.yaml b/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-us-gov-west-1.yaml new file mode 100644 index 0000000000..52371404c2 --- /dev/null +++ b/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus-us-gov-west-1.yaml @@ -0,0 +1,165 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: network-attachment-definitions.k8s.cni.cncf.io +spec: + group: k8s.cni.cncf.io + scope: Namespaced + names: + plural: network-attachment-definitions + singular: network-attachment-definition + kind: NetworkAttachmentDefinition + shortNames: + - net-attach-def + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + description: 'NetworkAttachmentDefinition is a CRD schema specified by the Network Plumbing + Working Group to express the intent for attaching pods to one or more logical or physical + networks. More information available at: https://github.com/k8snetworkplumbingwg/multi-net-spec' + type: object + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this represen + tation of an object. Servers should convert recognized schemas to the + latest internal value, and may reject unrecognized values. More info: + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: 'NetworkAttachmentDefinition spec defines the desired state of a network attachment' + type: object + properties: + config: + description: 'NetworkAttachmentDefinition config is a JSON-formatted CNI configuration' + type: string +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +rules: + - apiGroups: ["k8s.cni.cncf.io"] + resources: + - '*' + verbs: + - '*' + - apiGroups: + - "" + resources: + - pods + - pods/status + verbs: + - get + - update + - apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - create + - patch + - update +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: multus +subjects: +- kind: ServiceAccount + name: multus + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: multus + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-multus-ds + namespace: kube-system + labels: + tier: node + app: multus + name: multus +spec: + selector: + matchLabels: + name: multus + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + tier: node + app: multus + name: multus + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux + - key: eks.amazonaws.com/compute-type + operator: NotIn + values: + - fargate + hostNetwork: true + tolerations: + - operator: Exists + effect: NoSchedule + serviceAccountName: multus + containers: + - name: kube-multus + image: 013241004608.dkr.ecr.us-gov-west-1.amazonaws.com/eks/multus-cni:v3.9.0-eksbuild.2 + command: ["/entrypoint.sh"] + args: + - "--multus-conf-file=auto" + - "--cni-version=0.4.0" + - "--multus-master-cni-file-name=10-aws.conflist" + - "--multus-log-level=error" + - "--multus-log-file=/var/log/aws-routed-eni/multus.log" + resources: + requests: + cpu: "100m" + memory: "50Mi" + limits: + cpu: "100m" + memory: "50Mi" + securityContext: + privileged: true + volumeMounts: + - name: cni + mountPath: /host/etc/cni/net.d + - name: cnibin + mountPath: /host/opt/cni/bin + terminationGracePeriodSeconds: 10 + volumes: + - name: cni + hostPath: + path: /etc/cni/net.d + - name: cnibin + hostPath: + path: /opt/cni/bin diff --git a/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus.yaml b/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus.yaml new file mode 100644 index 0000000000..9023f9cbc3 --- /dev/null +++ b/config/multus/v3.9.0-eksbuild.2/aws-k8s-multus.yaml @@ -0,0 +1,165 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: network-attachment-definitions.k8s.cni.cncf.io +spec: + group: k8s.cni.cncf.io + scope: Namespaced + names: + plural: network-attachment-definitions + singular: network-attachment-definition + kind: NetworkAttachmentDefinition + shortNames: + - net-attach-def + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + description: 'NetworkAttachmentDefinition is a CRD schema specified by the Network Plumbing + Working Group to express the intent for attaching pods to one or more logical or physical + networks. More information available at: https://github.com/k8snetworkplumbingwg/multi-net-spec' + type: object + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this represen + tation of an object. Servers should convert recognized schemas to the + latest internal value, and may reject unrecognized values. More info: + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: 'NetworkAttachmentDefinition spec defines the desired state of a network attachment' + type: object + properties: + config: + description: 'NetworkAttachmentDefinition config is a JSON-formatted CNI configuration' + type: string +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +rules: + - apiGroups: ["k8s.cni.cncf.io"] + resources: + - '*' + verbs: + - '*' + - apiGroups: + - "" + resources: + - pods + - pods/status + verbs: + - get + - update + - apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - create + - patch + - update +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: multus +subjects: +- kind: ServiceAccount + name: multus + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: multus + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-multus-ds + namespace: kube-system + labels: + tier: node + app: multus + name: multus +spec: + selector: + matchLabels: + name: multus + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + tier: node + app: multus + name: multus + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux + - key: eks.amazonaws.com/compute-type + operator: NotIn + values: + - fargate + hostNetwork: true + tolerations: + - operator: Exists + effect: NoSchedule + serviceAccountName: multus + containers: + - name: kube-multus + image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/multus-cni:v3.9.0-eksbuild.2 + command: ["/entrypoint.sh"] + args: + - "--multus-conf-file=auto" + - "--cni-version=0.4.0" + - "--multus-master-cni-file-name=10-aws.conflist" + - "--multus-log-level=error" + - "--multus-log-file=/var/log/aws-routed-eni/multus.log" + resources: + requests: + cpu: "100m" + memory: "50Mi" + limits: + cpu: "100m" + memory: "50Mi" + securityContext: + privileged: true + volumeMounts: + - name: cni + mountPath: /host/etc/cni/net.d + - name: cnibin + mountPath: /host/opt/cni/bin + terminationGracePeriodSeconds: 10 + volumes: + - name: cni + hostPath: + path: /etc/cni/net.d + - name: cnibin + hostPath: + path: /opt/cni/bin diff --git a/docs/prefix-and-ip-target.md b/docs/prefix-and-ip-target.md index cc83c84073..be6bc1ebe0 100644 --- a/docs/prefix-and-ip-target.md +++ b/docs/prefix-and-ip-target.md @@ -24,7 +24,7 @@ The reason for this is because prefixes are allocated as /28 CIDR block or 16 co | t3.small | 1 | - | - | 58 | 2 | 48,2 | 5 | 16,16,16,10,0 | 1 | 3,2 | 22 | | | | | | | | | | | | | | | t3.small | - | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 1 | 16 | -| t3.small | - | 1 | 1 | 5 | 1 | 5 | 3 | 5 | 0 | 1 | 11 | +| t3.small | - | 1 | 1 | 5 | 1 | 5 | 1 | 5 | 0 | 1 | 11 | | t3.small | - | 1 | 1 | 17 | 1 | 17 | 2 | 16,1 | 0 | 2 | 15 | | t3.small | - | 1 | 1 | 58 | 2 | 48,10 | 4 | 16,16,16,10 | 0 | 3,1 | 6 | | | | | | | | | | | | | | diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 6f8c7cfcdf..7b48b3c056 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -215,7 +215,7 @@ created by main._main {"level":"info","ts":"2022-02-04T22:24:59.030Z","caller":"entrypoint.sh","msg":"Retrying waiting for IPAM-D"} ``` -cni v1.10.x introduced 2 new env varibales - ENABLE_IPv4 and ENABLE_IPv6. The above error can be caused if you miss adding these env variables to your cni daemonset. So the recommendation is to apply the entire manifest file corresponding to the correct [release](https://github.com/aws/amazon-vpc-cni-k8s/releases) instead of just updating the image value in existing cni daemonset. For instance, to apply the latest v1.10.x, use the below command +cni v1.10.x introduced 2 new env variables - ENABLE_IPv4 and ENABLE_IPv6. The above error can be caused if you miss adding these env variables to your cni daemonset. So the recommendation is to apply the entire manifest file corresponding to the correct [release](https://github.com/aws/amazon-vpc-cni-k8s/releases) instead of just updating the image value in existing cni daemonset. For instance, to apply the latest v1.10.x, use the below command ``` kubectl apply -f https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/release-1.10/config/master/aws-k8s-cni.yaml ``` diff --git a/misc/eni-max-pods.txt b/misc/eni-max-pods.txt index 76a8525999..8981c9c0bf 100644 --- a/misc/eni-max-pods.txt +++ b/misc/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-06-29T20:20:11Z +# This file was generated at 2022-08-15T23:21:13Z # # The regions queried were: # - ap-northeast-1 @@ -394,6 +394,7 @@ m6id.large 29 m6id.metal 737 m6id.xlarge 58 mac1.metal 234 +mac2.metal 234 p2.16xlarge 234 p2.8xlarge 234 p2.xlarge 58 @@ -475,6 +476,17 @@ r5n.8xlarge 234 r5n.large 29 r5n.metal 737 r5n.xlarge 58 +r6a.12xlarge 234 +r6a.16xlarge 737 +r6a.24xlarge 737 +r6a.2xlarge 58 +r6a.32xlarge 737 +r6a.48xlarge 737 +r6a.4xlarge 234 +r6a.8xlarge 234 +r6a.large 29 +r6a.metal 737 +r6a.xlarge 58 r6g.12xlarge 234 r6g.16xlarge 737 r6g.2xlarge 58 diff --git a/pkg/awsutils/awsutils.go b/pkg/awsutils/awsutils.go index d8c297b579..be4274b33f 100644 --- a/pkg/awsutils/awsutils.go +++ b/pkg/awsutils/awsutils.go @@ -138,7 +138,7 @@ type APIs interface { AllocIPAddress(eniID string) error // AllocIPAddresses allocates numIPs IP addresses on a ENI - AllocIPAddresses(eniID string, numIPs int) error + AllocIPAddresses(eniID string, numIPs int) (*ec2.AssignPrivateIpAddressesOutput, error) // DeallocIPAddresses deallocates the list of IP addresses from a ENI DeallocIPAddresses(eniID string, ips []string) error @@ -1439,7 +1439,7 @@ func (cache *EC2InstanceMetadataCache) IsPrefixDelegationSupported() bool { } // AllocIPAddresses allocates numIPs of IP address on an ENI -func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int) error { +func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int) (*ec2.AssignPrivateIpAddressesOutput, error) { var needIPs = numIPs ipLimit := cache.GetENIIPv4Limit() @@ -1450,7 +1450,7 @@ func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int // If we don't need any more IPs, exit if needIPs < 1 { - return nil + return nil, nil } log.Infof("Trying to allocate %d IP addresses on ENI %s", needIPs, eniID) @@ -1479,11 +1479,11 @@ func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int if containsPrivateIPAddressLimitExceededError(err) { log.Debug("AssignPrivateIpAddresses returned PrivateIpAddressLimitExceeded. This can happen if the data store is out of sync." + "Returning without an error here since we will verify the actual state by calling EC2 to see what addresses have already assigned to this ENI.") - return nil + return nil, nil } log.Errorf("Failed to allocate a private IP/Prefix addresses on ENI %v: %v", eniID, err) awsAPIErrInc("AssignPrivateIpAddresses", err) - return err + return nil, err } if output != nil { if cache.enablePrefixDelegation { @@ -1492,7 +1492,7 @@ func (cache *EC2InstanceMetadataCache) AllocIPAddresses(eniID string, numIPs int log.Infof("Allocated %d private IP addresses", len(output.AssignedPrivateIpAddresses)) } } - return nil + return output, nil } func (cache *EC2InstanceMetadataCache) AllocIPv6Prefixes(eniID string) ([]*string, error) { diff --git a/pkg/awsutils/awsutils_test.go b/pkg/awsutils/awsutils_test.go index c190402a99..60e52481fa 100644 --- a/pkg/awsutils/awsutils_test.go +++ b/pkg/awsutils/awsutils_test.go @@ -581,7 +581,7 @@ func TestAllocIPAddresses(t *testing.T) { mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(nil, nil) ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge"} - err := ins.AllocIPAddresses(eniID, 5) + _, err := ins.AllocIPAddresses(eniID, 5) assert.NoError(t, err) // when required IP numbers(50) is higher than ENI's limit(49) @@ -597,11 +597,11 @@ func TestAllocIPAddresses(t *testing.T) { mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(&output, nil) ins = &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge"} - err = ins.AllocIPAddresses(eniID, 50) + _, err = ins.AllocIPAddresses(eniID, 50) assert.NoError(t, err) // Adding 0 should do nothing - err = ins.AllocIPAddresses(eniID, 0) + _, err = ins.AllocIPAddresses(eniID, 0) assert.NoError(t, err) } @@ -618,7 +618,7 @@ func TestAllocIPAddressesAlreadyFull(t *testing.T) { retErr := awserr.New("PrivateIpAddressLimitExceeded", "Too many IPs already allocated", nil) mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(nil, retErr) // If EC2 says that all IPs are already attached, we do nothing - err := ins.AllocIPAddresses(eniID, 14) + _, err := ins.AllocIPAddresses(eniID, 14) assert.NoError(t, err) } @@ -634,11 +634,11 @@ func TestAllocPrefixAddresses(t *testing.T) { mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(nil, nil) ins := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge", enablePrefixDelegation: true} - err := ins.AllocIPAddresses(eniID, 1) + _, err := ins.AllocIPAddresses(eniID, 1) assert.NoError(t, err) // Adding 0 should do nothing - err = ins.AllocIPAddresses(eniID, 0) + _, err = ins.AllocIPAddresses(eniID, 0) assert.NoError(t, err) } @@ -655,7 +655,7 @@ func TestAllocPrefixesAlreadyFull(t *testing.T) { retErr := awserr.New("PrivateIpAddressLimitExceeded", "Too many IPs already allocated", nil) mockEC2.EXPECT().AssignPrivateIpAddressesWithContext(gomock.Any(), input, gomock.Any()).Return(nil, retErr) // If EC2 says that all IPs are already attached, we do nothing - err := ins.AllocIPAddresses(eniID, 1) + _, err := ins.AllocIPAddresses(eniID, 1) assert.NoError(t, err) } diff --git a/pkg/awsutils/mocks/awsutils_mocks.go b/pkg/awsutils/mocks/awsutils_mocks.go index 60b909a312..2c87fc55fb 100644 --- a/pkg/awsutils/mocks/awsutils_mocks.go +++ b/pkg/awsutils/mocks/awsutils_mocks.go @@ -80,11 +80,12 @@ func (mr *MockAPIsMockRecorder) AllocIPAddress(arg0 interface{}) *gomock.Call { } // AllocIPAddresses mocks base method -func (m *MockAPIs) AllocIPAddresses(arg0 string, arg1 int) error { +func (m *MockAPIs) AllocIPAddresses(arg0 string, arg1 int) (*ec2.AssignPrivateIpAddressesOutput, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "AllocIPAddresses", arg0, arg1) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].(*ec2.AssignPrivateIpAddressesOutput) + ret1, _ := ret[1].(error) + return ret0, ret1 } // AllocIPAddresses indicates an expected call of AllocIPAddresses diff --git a/pkg/awsutils/vpc_ip_resource_limit.go b/pkg/awsutils/vpc_ip_resource_limit.go index ff2aa8d01c..99b6b877dd 100644 --- a/pkg/awsutils/vpc_ip_resource_limit.go +++ b/pkg/awsutils/vpc_ip_resource_limit.go @@ -12,7 +12,7 @@ // permissions and limitations under the License. // Code generated by go generate; DO NOT EDIT. -// This file was generated at 2022-06-29T20:20:11Z +// This file was generated at 2022-08-15T23:21:13Z // // The regions queried were: // - ap-northeast-1 @@ -391,6 +391,7 @@ var InstanceNetworkingLimits = map[string]InstanceTypeLimits{ "m6id.metal": {ENILimit: 15, IPv4Limit: 50, HypervisorType: "", IsBareMetal: true}, "m6id.xlarge": {ENILimit: 4, IPv4Limit: 15, HypervisorType: "nitro", IsBareMetal: false}, "mac1.metal": {ENILimit: 8, IPv4Limit: 30, HypervisorType: "", IsBareMetal: true}, + "mac2.metal": {ENILimit: 8, IPv4Limit: 30, HypervisorType: "", IsBareMetal: true}, "p2.16xlarge": {ENILimit: 8, IPv4Limit: 30, HypervisorType: "xen", IsBareMetal: false}, "p2.8xlarge": {ENILimit: 8, IPv4Limit: 30, HypervisorType: "xen", IsBareMetal: false}, "p2.xlarge": {ENILimit: 4, IPv4Limit: 15, HypervisorType: "xen", IsBareMetal: false}, @@ -472,6 +473,17 @@ var InstanceNetworkingLimits = map[string]InstanceTypeLimits{ "r5n.large": {ENILimit: 3, IPv4Limit: 10, HypervisorType: "nitro", IsBareMetal: false}, "r5n.metal": {ENILimit: 15, IPv4Limit: 50, HypervisorType: "", IsBareMetal: true}, "r5n.xlarge": {ENILimit: 4, IPv4Limit: 15, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.12xlarge": {ENILimit: 8, IPv4Limit: 30, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.16xlarge": {ENILimit: 15, IPv4Limit: 50, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.24xlarge": {ENILimit: 15, IPv4Limit: 50, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.2xlarge": {ENILimit: 4, IPv4Limit: 15, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.32xlarge": {ENILimit: 15, IPv4Limit: 50, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.48xlarge": {ENILimit: 15, IPv4Limit: 50, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.4xlarge": {ENILimit: 8, IPv4Limit: 30, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.8xlarge": {ENILimit: 8, IPv4Limit: 30, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.large": {ENILimit: 3, IPv4Limit: 10, HypervisorType: "nitro", IsBareMetal: false}, + "r6a.metal": {ENILimit: 15, IPv4Limit: 50, HypervisorType: "", IsBareMetal: true}, + "r6a.xlarge": {ENILimit: 4, IPv4Limit: 15, HypervisorType: "nitro", IsBareMetal: false}, "r6g.12xlarge": {ENILimit: 8, IPv4Limit: 30, HypervisorType: "nitro", IsBareMetal: false}, "r6g.16xlarge": {ENILimit: 15, IPv4Limit: 50, HypervisorType: "nitro", IsBareMetal: false}, "r6g.2xlarge": {ENILimit: 4, IPv4Limit: 15, HypervisorType: "nitro", IsBareMetal: false}, diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index ea798ecb2b..656d4ad9c7 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -16,7 +16,6 @@ package ipamd import ( "context" "fmt" - "math" "net" "os" "strconv" @@ -859,7 +858,7 @@ func (c *IPAMContext) tryAllocateENI(ctx context.Context) error { resourcesToAllocate := c.GetENIResourcesToAllocate() - err = c.awsClient.AllocIPAddresses(eni, resourcesToAllocate) + _, err = c.awsClient.AllocIPAddresses(eni, resourcesToAllocate) if err != nil { log.Warnf("Failed to allocate %d IP addresses on an ENI: %v", resourcesToAllocate, err) // Continue to process the allocated IP addresses @@ -932,24 +931,29 @@ func (c *IPAMContext) tryAssignIPs() (increasedPool bool, err error) { if eni != nil && len(eni.AvailableIPv4Cidrs) < c.maxIPsPerENI { currentNumberOfAllocatedIPs := len(eni.AvailableIPv4Cidrs) // Try to allocate all available IPs for this ENI - err = c.awsClient.AllocIPAddresses(eni.ID, int(math.Min(float64(c.maxIPsPerENI-currentNumberOfAllocatedIPs), float64(toAllocate)))) + resourcesToAllocate := min((c.maxIPsPerENI - currentNumberOfAllocatedIPs), toAllocate) + output, err := c.awsClient.AllocIPAddresses(eni.ID, resourcesToAllocate) if err != nil { log.Warnf("failed to allocate all available IP addresses on ENI %s, err: %v", eni.ID, err) // Try to just get one more IP - err = c.awsClient.AllocIPAddresses(eni.ID, 1) + output, err = c.awsClient.AllocIPAddresses(eni.ID, 1) if err != nil { ipamdErrInc("increaseIPPoolAllocIPAddressesFailed") return false, errors.Wrap(err, fmt.Sprintf("failed to allocate one IP addresses on ENI %s, err ", eni.ID)) } } - // This call to EC2 is needed to verify which IPs got attached to this ENI. - ec2Addrs, err := c.awsClient.GetIPv4sFromEC2(eni.ID) - if err != nil { + + if output == nil { ipamdErrInc("increaseIPPoolGetENIaddressesFailed") return true, errors.Wrap(err, "failed to get ENI IP addresses during IP allocation") } - c.addENIsecondaryIPsToDataStore(ec2Addrs, eni.ID) + var ec2ip4s []*ec2.NetworkInterfacePrivateIpAddress + ec2Addrs := output.AssignedPrivateIpAddresses + for _, ec2Addr := range ec2Addrs { + ec2ip4s = append(ec2ip4s, &ec2.NetworkInterfacePrivateIpAddress{PrivateIpAddress: aws.String(aws.StringValue(ec2Addr.PrivateIpAddress))}) + } + c.addENIsecondaryIPsToDataStore(ec2ip4s, eni.ID) return true, nil } return false, nil @@ -1001,21 +1005,22 @@ func (c *IPAMContext) tryAssignPrefixes() (increasedPool bool, err error) { eni := c.dataStore.GetENINeedsIP(c.maxPrefixesPerENI, c.useCustomNetworking) if eni != nil { currentNumberOfAllocatedPrefixes := len(eni.AvailableIPv4Cidrs) - err = c.awsClient.AllocIPAddresses(eni.ID, min((c.maxPrefixesPerENI-currentNumberOfAllocatedPrefixes), toAllocate)) + resourcesToAllocate := min((c.maxPrefixesPerENI - currentNumberOfAllocatedPrefixes), toAllocate) + output, err := c.awsClient.AllocIPAddresses(eni.ID, resourcesToAllocate) if err != nil { log.Warnf("failed to allocate all available IPv4 Prefixes on ENI %s, err: %v", eni.ID, err) // Try to just get one more prefix - err = c.awsClient.AllocIPAddresses(eni.ID, 1) + output, err = c.awsClient.AllocIPAddresses(eni.ID, 1) if err != nil { ipamdErrInc("increaseIPPoolAllocIPAddressesFailed") return false, errors.Wrap(err, fmt.Sprintf("failed to allocate one IPv4 prefix on ENI %s, err: %v", eni.ID, err)) } } - ec2Prefixes, err := c.awsClient.GetIPv4PrefixesFromEC2(eni.ID) - if err != nil { + if output == nil { ipamdErrInc("increaseIPPoolGetENIprefixedFailed") return true, errors.Wrap(err, "failed to get ENI Prefix addresses during IPv4 Prefix allocation") } + ec2Prefixes := output.AssignedIpv4Prefixes c.addENIv4prefixesToDataStore(ec2Prefixes, eni.ID) return true, nil } @@ -2043,10 +2048,10 @@ func (c *IPAMContext) GetENIResourcesToAllocate() int { resourcesToAllocate = c.maxIPsPerENI short, _, warmTargetDefined := c.datastoreTargetState() if warmTargetDefined { - resourcesToAllocate = short + resourcesToAllocate = min(short, c.maxIPsPerENI) } } else { - resourcesToAllocate = c.getPrefixesNeeded() + resourcesToAllocate = min(c.getPrefixesNeeded(), c.maxPrefixesPerENI) } return resourcesToAllocate } diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 01dc61468d..40a2aeb37b 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -102,6 +102,7 @@ AGENT_LOG_PATH=${AGENT_LOG_PATH:-"aws-k8s-agent.log"} HOST_CNI_BIN_PATH=${HOST_CNI_BIN_PATH:-"/host/opt/cni/bin"} HOST_CNI_CONFDIR_PATH=${HOST_CNI_CONFDIR_PATH:-"/host/etc/cni/net.d"} AWS_VPC_K8S_CNI_VETHPREFIX=${AWS_VPC_K8S_CNI_VETHPREFIX:-"eni"} +AWS_VPC_K8S_CNI_RANDOMIZESNAT=${AWS_VPC_K8S_CNI_RANDOMIZESNAT:-"prng"} AWS_VPC_ENI_MTU=${AWS_VPC_ENI_MTU:-"9001"} POD_SECURITY_GROUP_ENFORCING_MODE=${POD_SECURITY_GROUP_ENFORCING_MODE:-"strict"} AWS_VPC_K8S_PLUGIN_LOG_FILE=${AWS_VPC_K8S_PLUGIN_LOG_FILE:-"/var/log/aws-routed-eni/plugin.log"} diff --git a/scripts/lib/canary.sh b/scripts/lib/canary.sh index 110cf41beb..58f6157050 100644 --- a/scripts/lib/canary.sh +++ b/scripts/lib/canary.sh @@ -17,7 +17,7 @@ fi if [[ -z "${SKIP_MAKE_TEST_BINARIES}" ]]; then echo "making ginkgo test binaries" - (cd $SCRIPT_DIR/.. && make build-test-binaries) + (cd $SCRIPT_DIR/../test && make build-test-binaries) else echo "skipping making ginkgo test binaries" fi diff --git a/test/framework/resources/aws/services/iam.go b/test/framework/resources/aws/services/iam.go index 2a990faccc..d7d8d7e14e 100644 --- a/test/framework/resources/aws/services/iam.go +++ b/test/framework/resources/aws/services/iam.go @@ -37,6 +37,7 @@ type IAM interface { CreatePolicy(policyName string, policyDocument string) (*iam.CreatePolicyOutput, error) DeletePolicy(policyARN string) error GetInstanceProfile(instanceProfileName string) (*iam.GetInstanceProfileOutput, error) + ListPolicies(scope string) (*iam.ListPoliciesOutput, error) } type defaultIAM struct { @@ -84,6 +85,13 @@ func (d *defaultIAM) GetInstanceProfile(instanceProfileName string) (*iam.GetIns return d.IAMAPI.GetInstanceProfile(getInstanceProfileInput) } +func (d *defaultIAM) ListPolicies(scope string) (*iam.ListPoliciesOutput, error) { + listPolicyInput := &iam.ListPoliciesInput{ + Scope: aws.String(scope), + } + return d.IAMAPI.ListPolicies(listPolicyInput) +} + func NewIAM(session *session.Session) IAM { return &defaultIAM{ IAMAPI: iam.New(session), diff --git a/test/helm/charts/cni-metrics-helper/values.yaml b/test/helm/charts/cni-metrics-helper/values.yaml index 60591b6f37..ef5852f5a8 100644 --- a/test/helm/charts/cni-metrics-helper/values.yaml +++ b/test/helm/charts/cni-metrics-helper/values.yaml @@ -7,7 +7,7 @@ replicaCount: 1 image: repository: 602401143452.dkr.ecr.us-west-2.amazonaws.com/cni-metrics-helper pullPolicy: Always - tag: "v1.7.10" + tag: "v1.11.2" imagePullSecrets: [] nameOverride: "" @@ -56,7 +56,8 @@ autoscaling: targetCPUUtilizationPercentage: 80 # targetMemoryUtilizationPercentage: 80 -nodeSelector: {} +nodeSelector: + kubernetes.io/os: "linux" tolerations: [] diff --git a/test/integration/README.md b/test/integration/README.md index 41e88d2708..3355fa9339 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -38,6 +38,26 @@ ginkgo -v --failOnPending -- \ ``` ### cni-metrics-helper + +> #### Prerequisites: +> +> This test expects CNIMetricsHelperPolicy to be present in the test account. Create the policy with below permissions in the test account: + +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "cloudwatch:PutMetricData" + ], + "Resource": "*" + } + ] +} +``` + The CNI Metrics Helper Integration test uses helm to install the cni-metrics-helper. The helm charts are present in local test directory and if needed can be published to a repository. In order to test a custom image you need pass the following tags along with the tags discussed above. @@ -120,6 +140,7 @@ This section is written to give a high level overview for the process of develop ### Organization of test folders The test folders are located at ```amazon-vpc-cni-k8s/tree/master/test/integration``` It has the following sub-folders ```cni```, ```ipamd```, ```ipv6``` and ```metrics-helper```. +<<<<<<< HEAD The ginkgo test for any component has generally two main components: - ```ginkgo suite file```: Every ginkgo suite file will have ```RegisterFailHandler``` and ```RunSpecs```. A Ginkgo test signals failure by calling Ginkgo’s Fail function passed to RegisterFailHandler. RunSpec tells Ginkgo to start the test suite. Running ginkgo inside the sub-folder containing the test suite should trigger the ```RunSpecs``` function in the suite. @@ -338,3 +359,224 @@ Test Suite Failed ``` For additional [Debugging Help](./Troubleshooting.md) +======= + +The ginkgo test for any component has generally two main components: +- ```ginkgo suite file```: Every ginkgo suite file will have ```RegisterFailHandler``` and ```RunSpecs```. A Ginkgo test signals failure by calling Ginkgo’s Fail function passed to RegisterFailHandler. RunSpec tells Ginkgo to start the test suite. Running ginkgo inside the sub-folder containing the test suite should trigger the ```RunSpecs``` function in the suite. + +- ```ginkgo test files```: By default, test files in the same folder as ginkgo suite file will be run on the trigger of the ```RunSpecs``` function in the ginkgo test suite. + +### Adding new test folder + +Say for instance, the cni test and suite files in the cni folder has functionality related to CNI component in VPC CNI as you would expect. If you want to add a test that does not belong to any of the modules in the integration folder, you will have to create a new folder structure as below +- ```integration``` + - ```new_component_test_xyz``` + - ```new_component_test_xyz/new_component_xyz_suite_test.go```) + - ```new_component_test_xyz/xyz_test_1.go``` + - ```new_component_test_xyz/xyz_test_2.go```) + - ```...``` + - ```cni``` + - ```...``` + +### Structure of sample test suite: +```cni/pod_networking_suite_test.go``` + +#### Logic Components + +- ```BeforeSuite``` : All common steps that should be performed before the suite are added here. In the sample BeforeSuite below, we can see a few prerequistes for the tests that run under the suite, like namespace creation and setting of env variables like WARM_IP_TARGET. +- ```AfterSuite``` : All common steps that should be performed after the suite are added here. In the sample AfterSuite below, we can see cleanup to be followed after running the tests under the suite like namespace deletion and resetting of env variables. + +```go +package cni + +import ( + // fmt is imported for printing + "fmt" + // testing is imported as it is the original go testing module used by ginkgo + "testing" + // The below folders are similar to the ones discussed above + "github.com/aws/amazon-vpc-cni-k8s/test/framework" + k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + //ginkgo and the assertion library: gomega are imported below + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + //v1 imported for Node libraries + v1 "k8s.io/api/core/v1" +) + +//Global variables for the suite are defined here +const InstanceTypeNodeLabelKey = "beta.kubernetes.io/instance-type" + +var f *framework.Framework +... + +//The function below is the starter function for running tests +//for each suite and attaching a fail handler for the same +func TestCNIPodNetworking(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "CNI Pod Networking Suite") +} + +//The following function has checks and setup needed before running the suite. +var _ = BeforeSuite(func() { + f = framework.New(framework.GlobalOptions) + + // The Sequence of By and Expect are provided by the omega package and + // ensure the correct functionality by providing assertions at every step + + By("creating test namespace") + f.K8sResourceManagers.NamespaceManager(). + CreateNamespace(utils.DefaultTestNamespace) + + ... + ... + + // Set the WARM_ENI_TARGET to 0 to prevent all pods being scheduled on secondary ENI + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, "aws-node", "kube-system", + "aws-node", map[string]string{"WARM_IP_TARGET": "3", "WARM_ENI_TARGET": "0"}) +}) + +//The following function has checks and setup needed after running the suite. +var _ = AfterSuite(func() { + By("deleting test namespace") + f.K8sResourceManagers.NamespaceManager(). + DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) + + k8sUtils.UpdateEnvVarOnDaemonSetAndWaitUntilReady(f, "aws-node", "kube-system", + "aws-node", map[string]string{ + AWS_VPC_ENI_MTU: "9001", + AWS_VPC_K8S_CNI_VETHPREFIX: "eni", + }, + map[string]struct{}{ + "WARM_IP_TARGET": {}, + "WARM_ENI_TARGET": {}, + }) +}) + +``` + +### Structure of sample test corresponding to a suite: +```cni/pod_traffic_test_PD_enabled.go``` + +#### Logic Components + +- ```It```: Individual spec specified by It. It is the innermost component that holds the core testing logic. The other components listed below are hierarchically arranged before and after It in order to provision/deprovision the setup required to run the individual spec (It). In the sample test below, our It tests for 99+% traffic success rate between client and server pods. +- ```Describe``` : This block is used describe the individual behaviors of code. In the sample test below, we try to describe a behaviour of pod traffic with PD (Prefix delegation) enabled. +- ```Context``` : Context block is used to execute the behavior used by Describe block under different scenarios. We can have different Context or scenarios for our sample test below, like testing TCP pod traffic or UDP pod traffic. +- ```JustBeforeEach``` : Executed immediately before each test, however following the execution order from outside blocks to inside blocks before an It(spec) in case of multipe JustBeforeEach blocks. We can see that in the JustBeforeEach function below, we setup server deployment and enable PD just before we run the It. +- ```JustAfterEach``` : Executed immediately after each test, however following the execution order from inside blocks to outside blocks after an It(spec) in case of multipe JustAfterEach blocks. We can see that in the JustAfterEach function below, we reset PD to false after running It. +- ```BeforeEach``` : Executed (not immediately) before each test, however following the execution order from outside blocks to inside blocks before an It(spec) in case of multipe BeforeEach blocks. +- ```AfterEach``` : Executed (not immediately) after each test, however following the execution order from inside blocks to outside blocks after an It(spec) in case of multipe AfterEach blocks. + +Each of the above components are arranged hierarchically in a way that makes most sense for abstracting the common logic from the rest of the code. + +Every ```BeforeEach``` precedes every ```JustBeforeEach``` in execution before execution of an It. +Every ```JustAfterEach``` precedes every ```AfterEach``` in execution after execution of an It. + + +```go +package cni + +import ( + // Imports similar to above test suite found here + ... +) + +// This blocks is used describe the individual behaviors of code +var _ = Describe("Test pod networking with prefix delegation enabled", func() { + var ( + // List of global variables used for the tests below + // The Pod labels for client and server in order to retrieve the + // client and server Pods belonging to a Deployment/Jobs + labelKey = "app" + serverPodLabelVal = "server-pod" + clientPodLabelVal = "client-pod" + serverDeploymentBuilder *manifest.DeploymentBuilder + // Value for the Environment variable ENABLE_PREFIX_DELEGATION + enableIPv4PrefixDelegation string + ) + + JustBeforeEach(func() { + + By("creating deployment") + serverDeploymentBuilder = manifest.NewDefaultDeploymentBuilder(). + Name("traffic-server"). + NodeSelector(f.Options.NgNameLabelKey, f.Options.NgNameLabelVal) + + By("Set PD") + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, + utils.AwsNodeNamespace, utils.AwsNodeName, + map[string]string{"ENABLE_PREFIX_DELEGATION": enableIPv4PrefixDelegation}) + }) + + JustAfterEach(func() { + + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, + utils.AwsNodeNamespace, utils.AwsNodeName, + map[string]string{"ENABLE_PREFIX_DELEGATION": "false"}) + }) + // Context block is used to execute the behavior used + // by Describe block under different scenarios + Context("when testing TCP traffic between client and server pods", func() { + BeforeEach(func() { + enableIPv4PrefixDelegation = "true" + }) + + // Below is example of individual spec specified by It + It("should have 99+% success rate", func() { + trafficTester := agent.TrafficTest{ + Framework: f, + TrafficServerDeploymentBuilder: serverDeploymentBuilder, + ... + ClientPodLabelKey: labelKey, + ClientPodLabelVal: clientPodLabelVal, + } + + successRate, err := trafficTester.TestTraffic() + Expect(err).ToNot(HaveOccurred()) + Expect(successRate).Should(BeNumerically(">=", float64(99))) + }) + }) + + // Similarly we can also test for UDP traffic between + // client and server pods in another context here +}) +``` + +More info can be found here https://github.com/onsi/ginkgo + + +### Troubleshooting Test Failure + +Everytime you run a ginkgo test suite, you will get stats on number of tests passed/failed/pending/skipped as follows: + +``` +! -- <> Passed | <> Failed | <> Pending | <> Skipped +``` + + +In case of an error, the error message will be printed in ginkgo error stack. For instance, in case kubeconfig is not correctly set, you will get an error message similar to below: + +``` + Unexpected error: + <*errors.fundamental | xxxx>: { + msg: "kubeconfig must be set!", + stack: [xxx,xxx,...], + } + kubeconfig must be set! + occurred + + ... + Test Panicked + runtime error: invalid memory address or nil pointer dereference + .. + + Full Stack Trace + .. +Test Suite Failed +``` + +For additional [Debugging Help](./Troubleshooting.md) + +>>>>>>> master diff --git a/test/integration/ipamd/eni_ip_leak_test.go b/test/integration/ipamd/eni_ip_leak_test.go index 4446e0fc48..1c5947de0a 100644 --- a/test/integration/ipamd/eni_ip_leak_test.go +++ b/test/integration/ipamd/eni_ip_leak_test.go @@ -22,11 +22,6 @@ var numOfNodes int var _ = Describe("[CANARY] ENI/IP Leak Test", func() { Context("ENI/IP Released on Pod Deletion", func() { - BeforeEach(func() { - By("creating test namespace") - f.K8sResourceManagers.NamespaceManager(). - CreateNamespace(utils.DefaultTestNamespace) - }) It("Verify that on Pod Deletion, ENI/IP State is restored", func() { // Set the WARM_ENI_TARGET to 0 to prevent all pods being scheduled on secondary ENI @@ -72,9 +67,6 @@ var _ = Describe("[CANARY] ENI/IP Leak Test", func() { }) AfterEach(func() { - By("deleting test namespace") - f.K8sResourceManagers.NamespaceManager(). - DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) By("Restoring WARM ENI Target value") k8sUtils.RemoveVarFromDaemonSetAndWaitTillUpdated(f, "aws-node", "kube-system", diff --git a/test/integration/ipamd/eni_tag_test.go b/test/integration/ipamd/eni_tag_test.go index c8ab5a7e83..3bfa75fb90 100644 --- a/test/integration/ipamd/eni_tag_test.go +++ b/test/integration/ipamd/eni_tag_test.go @@ -39,9 +39,6 @@ var _ = Describe("test tags are created on Secondary ENI", func() { // sets the desired environment variables and gets the list of new ENIs created after setting // the environment variables JustBeforeEach(func() { - By("creating test namespace") - f.K8sResourceManagers.NamespaceManager(). - CreateNamespace(utils.DefaultTestNamespace) // To re-initialize for each test case newENIs = []string{} @@ -84,9 +81,6 @@ var _ = Describe("test tags are created on Secondary ENI", func() { }) JustAfterEach(func() { - By("deleting test namespace") - f.K8sResourceManagers.NamespaceManager(). - DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) envVarToRemove := map[string]struct{}{} for key, _ := range environmentVariables { diff --git a/test/integration/ipamd/introspection_test.go b/test/integration/ipamd/introspection_test.go index d31375a7be..178ee6b899 100644 --- a/test/integration/ipamd/introspection_test.go +++ b/test/integration/ipamd/introspection_test.go @@ -34,9 +34,6 @@ var _ = Describe("test Environment Variables for IPAMD Introspection ", func() { var curlJob *v1.Job JustBeforeEach(func() { - By("creating test namespace") - f.K8sResourceManagers.NamespaceManager(). - CreateNamespace(utils.DefaultTestNamespace) // Initially the host networking job pod should succeed curlContainer = manifest.NewCurlContainer(). @@ -67,12 +64,6 @@ var _ = Describe("test Environment Variables for IPAMD Introspection ", func() { Expect(err).ToNot(HaveOccurred()) }) - JustAfterEach(func() { - By("deleting test namespace") - f.K8sResourceManagers.NamespaceManager(). - DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) - }) - Context("when disabling introspection by setting DISABLE_INTROSPECTION to true", func() { It("introspection should not work anymore", func() { diff --git a/test/integration/ipamd/ipamd_suite_test.go b/test/integration/ipamd/ipamd_suite_test.go index 441302f4d6..c4b25b9e57 100644 --- a/test/integration/ipamd/ipamd_suite_test.go +++ b/test/integration/ipamd/ipamd_suite_test.go @@ -15,9 +15,11 @@ package ipamd import ( "testing" + "time" "github.com/aws/amazon-vpc-cni-k8s/test/framework" k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -30,6 +32,10 @@ func TestIPAMD(t *testing.T) { var _ = BeforeSuite(func() { f = framework.New(framework.GlobalOptions) + By("creating test namespace") + f.K8sResourceManagers.NamespaceManager(). + CreateNamespace(utils.DefaultTestNamespace) + nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, f.Options.NgNameLabelVal) Expect(err).ToNot(HaveOccurred()) @@ -43,4 +49,17 @@ var _ = BeforeSuite(func() { instanceID := k8sUtils.GetInstanceIDFromNode(primaryNode) primaryInstance, err = f.CloudServices.EC2().DescribeInstance(instanceID) Expect(err).ToNot(HaveOccurred()) + + // Remove WARM_ENI_TARGET, WARM_IP_TARGET, MINIMUM_IP_TARGET and WARM_PREFIX_TARGET before running IPAMD tests + k8sUtils.RemoveVarFromDaemonSetAndWaitTillUpdated(f, "aws-node", "kube-system", + "aws-node", map[string]struct{}{"WARM_ENI_TARGET": {}, "WARM_IP_TARGET": {}, "MINIMUM_IP_TARGET": {}, "WARM_PREFIX_TARGET": {}}) + + // Allow reconciler to free up ENIs if any + time.Sleep(utils.PollIntervalLong) +}) + +var _ = AfterSuite(func() { + By("deleting test namespace") + f.K8sResourceManagers.NamespaceManager(). + DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) }) diff --git a/test/integration/ipamd/metrics_test.go b/test/integration/ipamd/metrics_test.go index facc6278ef..d967374956 100644 --- a/test/integration/ipamd/metrics_test.go +++ b/test/integration/ipamd/metrics_test.go @@ -32,18 +32,6 @@ var _ = Describe("test IPAMD metric environment variable", func() { // Job's output determines if the API is reachable or not var curlJob *v1.Job - JustBeforeEach(func() { - By("creating test namespace") - f.K8sResourceManagers.NamespaceManager(). - CreateNamespace(utils.DefaultTestNamespace) - }) - - JustAfterEach(func() { - By("deleting test namespace") - f.K8sResourceManagers.NamespaceManager(). - DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) - }) - Context("when metrics is disabled", func() { metricAddr := "127.0.0.1:61678/metrics" It("should not be accessible anymore", func() { diff --git a/test/integration/ipamd/warm_target_test_PD_enabled.go b/test/integration/ipamd/warm_target_test_PD_enabled.go index e115c588a7..7618ceac41 100644 --- a/test/integration/ipamd/warm_target_test_PD_enabled.go +++ b/test/integration/ipamd/warm_target_test_PD_enabled.go @@ -73,7 +73,6 @@ var _ = Describe("test warm target variables", func() { *primaryInstance.PrivateDnsName, pod.Status.PodIP, pod.Spec.NodeName)) if pod.Spec.NodeName == *primaryInstance.PrivateDnsName { assigned++ - break } } diff --git a/test/integration/metrics-helper/metric_helper_test.go b/test/integration/metrics-helper/metric_helper_test.go index b84c45d6a2..252d151dde 100644 --- a/test/integration/metrics-helper/metric_helper_test.go +++ b/test/integration/metrics-helper/metric_helper_test.go @@ -28,18 +28,6 @@ import ( var _ = Describe("test cni-metrics-helper publishes metrics", func() { - JustBeforeEach(func() { - By("creating test namespace") - f.K8sResourceManagers.NamespaceManager(). - CreateNamespace(utils.DefaultTestNamespace) - }) - - JustAfterEach(func() { - By("deleting test namespace") - f.K8sResourceManagers.NamespaceManager(). - DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) - }) - Context("when a metric is updated", func() { It("the updated metric is published to CW", func() { diff --git a/test/integration/metrics-helper/metrics_helper_suite_test.go b/test/integration/metrics-helper/metrics_helper_suite_test.go index e36d1218e1..8e8ea90f9e 100644 --- a/test/integration/metrics-helper/metrics_helper_suite_test.go +++ b/test/integration/metrics-helper/metrics_helper_suite_test.go @@ -14,13 +14,11 @@ package metrics_helper import ( - "encoding/json" "flag" "strings" "testing" "github.com/aws/amazon-vpc-cni-k8s/test/framework" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/aws/services" k8sUtil "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" @@ -54,7 +52,7 @@ const ( // Parse optional flags for setting the cni metrics helper image func init() { flag.StringVar(&imageRepository, "cni-metrics-helper-image-repo", "602401143452.dkr.ecr.us-west-2.amazonaws.com/cni-metrics-helper", "CNI Metrics Helper Image Repository") - flag.StringVar(&imageTag, "cni-metrics-helper-image-tag", "v1.7.10", "CNI Metrics Helper Image Tag") + flag.StringVar(&imageTag, "cni-metrics-helper-image-tag", "v1.11.2", "CNI Metrics Helper Image Tag") // Order in which we try fetch the keys and use it as CLUSTER_ID dimension clusterIDKeys = []string{ @@ -72,28 +70,11 @@ func TestCNIMetricsHelper(t *testing.T) { var _ = BeforeSuite(func() { f = framework.New(framework.GlobalOptions) - // Create a new policy with PutMetric Permission - policy := services.PolicyDocument{ - Version: "2012-10-17", - Statement: []services.StatementEntry{ - { - Effect: "Allow", - Action: []string{"cloudwatch:PutMetricData"}, - Resource: "*", - }, - }, - } - - b, err := json.Marshal(policy) - Expect(err).ToNot(HaveOccurred()) + By("creating test namespace") + f.K8sResourceManagers.NamespaceManager(). + CreateNamespace(utils.DefaultTestNamespace) - By("creating the CNIMetricsHelperPolicy policy") - createPolicyOutput, err := f.CloudServices.IAM(). - CreatePolicy("CNIMetricsHelperPolicy", string(b)) - Expect(err).ToNot(HaveOccurred()) - policyARN = *createPolicyOutput.Policy.Arn - - By("getting the node instance profile") + By("getting the node list") nodeList, err := f.K8sResourceManagers.NodeManager().GetAllNodes() Expect(err).ToNot(HaveOccurred()) Expect(len(nodeList.Items)).To(BeNumerically(">", 0)) @@ -133,9 +114,21 @@ var _ = BeforeSuite(func() { instanceProfileOutput, err := f.CloudServices.IAM().GetInstanceProfile(instanceProfileRoleName) Expect(err).ToNot(HaveOccurred()) - By("attaching policy to the node IAM role") ngRoleName = *instanceProfileOutput.InstanceProfile.Roles[0].RoleName - By("attaching the node instance role") + By("attaching CNIMetricsHelperPolicy to the node IAM role") + + // We should ideally use the PathPrefix argument to list the policy, but this is returning an empty list. So workaround by listing local policies & filter + // SO issue: https://stackoverflow.com/questions/66287626/aws-cli-list-policies-to-find-a-policy-with-a-specific-name + policyList, err := f.CloudServices.IAM().ListPolicies("Local") + Expect(err).ToNot((HaveOccurred())) + + for _, item := range policyList.Policies { + if strings.Contains(*item.PolicyName, "CNIMetricsHelperPolicy") { + policyARN = *item.Arn + break + } + } + err = f.CloudServices.IAM().AttachRolePolicy(policyARN, ngRoleName) Expect(err).ToNot(HaveOccurred()) @@ -156,11 +149,11 @@ var _ = AfterSuite(func() { err = f.CloudServices.IAM().DetachRolePolicy(policyARN, ngRoleName) Expect(err).ToNot(HaveOccurred()) - By("deleting the CNIMetricsHelperPolicy policy") - err = f.CloudServices.IAM().DeletePolicy(policyARN) - Expect(err).ToNot(HaveOccurred()) - By("uninstalling cni-metrics-helper using helm") err := f.InstallationManager.UnInstallCNIMetricsHelper() Expect(err).ToNot(HaveOccurred()) + + By("deleting test namespace") + f.K8sResourceManagers.NamespaceManager(). + DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) })