From 9bf7ef2e2e7f0d1e9fe142a60ab0b4afebc5a7f7 Mon Sep 17 00:00:00 2001 From: Liz Fong-Jones Date: Tue, 26 Apr 2022 15:50:33 -0700 Subject: [PATCH 001/232] add c7g support (#903) See https://github.com/aws/amazon-vpc-cni-k8s/pull/1940 --- files/eni-max-pods.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 203187e6f..0731af808 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -147,6 +147,15 @@ c6i.8xlarge 234 c6i.large 29 c6i.metal 737 c6i.xlarge 58 +c7g.12xlarge 234 +c7g.16xlarge 737 +c7g.2xlarge 58 +c7g.4xlarge 234 +c7g.8xlarge 234 +c7g.large 29 +c7g.medium 8 +c7g.metal 737 +c7g.xlarge 58 cc2.8xlarge 234 cr1.8xlarge 234 d2.2xlarge 58 From 17e89fd482c74d8d3d6ccd42e1d90052b93fbc9c Mon Sep 17 00:00:00 2001 From: Andrew Johnstone Date: Tue, 26 Apr 2022 23:51:49 +0100 Subject: [PATCH 002/232] Update bootstrap.sh (#893) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When replaying user-data in testing will bail user-data when strict... ``` mv: cannot stat ‘/etc/eks/iptables-restore.service’: No such file or directory Exited with error on line 424 ``` --- files/bootstrap.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index dfeb42a92..bac6969be 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -467,9 +467,9 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml - sudo mv /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml - sudo mv /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service - sudo mv /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service + sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/sandbox-image.service ln -sf /run/containerd/containerd.sock /run/dockershim.sock @@ -482,7 +482,7 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" - mv /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service + cp -v /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service sudo chown root:root /etc/systemd/system/iptables-restore.service systemctl daemon-reload systemctl enable iptables-restore From 08b37aa0805fd3c9697a26503e11ff1c414cd5c1 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Wed, 27 Apr 2022 18:38:42 -0400 Subject: [PATCH 003/232] Update CHANGELOG for v20220421 release (#906) --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 701e2b9a5..aa2d98fd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +### AMI Release v20220421 +* amazon-eks-gpu-node-1.22-v20220421 +* amazon-eks-gpu-node-1.21-v20220421 +* amazon-eks-gpu-node-1.20-v20220421 +* amazon-eks-gpu-node-1.19-v20220421 +* amazon-eks-arm64-node-1.22-v20220421 +* amazon-eks-arm64-node-1.21-v20220421 +* amazon-eks-arm64-node-1.20-v20220421 +* amazon-eks-arm64-node-1.19-v20220421 +* amazon-eks-node-1.22-v20220421 +* amazon-eks-node-1.21-v20220421 +* amazon-eks-node-1.20-v20220421 +* amazon-eks-node-1.19-v20220421 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.188-104.359.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0-1.amzn2 + +Notable changes: +* Includes patched Kernel for [CVE-2022-26490](https://alas.aws.amazon.com/cve/html/CVE-2022-26490.html), [CVE-2022-27666](https://alas.aws.amazon.com/cve/html/CVE-2022-27666.html) and [CVE-2022-28356](https://alas.aws.amazon.com/cve/html/CVE-2022-28356.html) +* New release with AMIs now available in ap-southeast-3 + ### AMI Release v20220420 * amazon-eks-gpu-node-1.22-v20220420 * amazon-eks-gpu-node-1.21-v20220420 From ab55838f9f9aefdac1788f006c75c6c1adf80ea2 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Mon, 2 May 2022 17:42:35 -0700 Subject: [PATCH 004/232] Update CHANGELOG.md (#909) Co-authored-by: ljosyula --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa2d98fd4..2e600a06d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +### AMI Release v20220429 +* amazon-eks-gpu-node-1.22-v20220429 +* amazon-eks-gpu-node-1.21-v20220429 +* amazon-eks-gpu-node-1.20-v20220429 +* amazon-eks-gpu-node-1.19-v20220429 +* amazon-eks-arm64-node-1.22-v20220429 +* amazon-eks-arm64-node-1.21-v20220429 +* amazon-eks-arm64-node-1.20-v20220429 +* amazon-eks-arm64-node-1.19-v20220429 +* amazon-eks-node-1.22-v20220429 +* amazon-eks-node-1.21-v20220429 +* amazon-eks-node-1.20-v20220429 +* amazon-eks-node-1.19-v20220429 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.5/2021-11-10/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.188-104.359.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0-1.amzn2 + +Notable changes: +* Added c7g support +* [When replaying user-data in testing will bail user-data when strict due to moving files](https://github.com/awslabs/amazon-eks-ami/pull/893/files) + ### AMI Release v20220421 * amazon-eks-gpu-node-1.22-v20220421 * amazon-eks-gpu-node-1.21-v20220421 From a3ce201d5b1922486c42f9bca94f36ebbcac2b01 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 3 May 2022 16:56:52 -0700 Subject: [PATCH 005/232] Update the runc,docker and containerd versions in config json (#912) --- eks-worker-al2.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index ffcf644f3..37f7af523 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,9 +13,9 @@ "kubernetes_version": null, "kubernetes_build_date": null, "kernel_version": "", - "docker_version": "20.10.7-5.amzn2", - "containerd_version": "1.4.6-8.amzn2", - "runc_version": "1.0.0-2.amzn2", + "docker_version": "20.10.13-2.amzn2", + "containerd_version": "1.4.13-2.amzn2.0.1", + "runc_version": "1.0.3-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", From edb1d413c80ab0bc12f97eb7a465d42c6c66b870 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Mon, 9 May 2022 10:15:27 -0700 Subject: [PATCH 006/232] Update k8s version (#916) * Revert "DNS_CLUSTER_IP cannot be specified in IPv6 Cluster (#860)" This reverts commit 02823975992a9291609498b00c00b6434e0e4d19. * updating the deprecated version in eks template Co-authored-by: Ravi Sinha --- amazon-eks-nodegroup.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml index 9b8cfbd65..89366e2bf 100644 --- a/amazon-eks-nodegroup.yaml +++ b/amazon-eks-nodegroup.yaml @@ -74,7 +74,7 @@ Parameters: NodeImageIdSSMParam: Type: "AWS::SSM::Parameter::Value" - Default: /aws/service/eks/optimized-ami/1.17/amazon-linux-2/recommended/image_id + Default: /aws/service/eks/optimized-ami/1.22/amazon-linux-2/recommended/image_id Description: AWS Systems Manager Parameter Store parameter of the AMI ID for the worker node instances. Change this value to match the version of Kubernetes you are using. DisableIMDSv1: From 3cbbc65d72d34698ffd1f3be70e5f024c17dae54 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 9 May 2022 11:07:24 -0700 Subject: [PATCH 007/232] Update eni-max-pods.txt (#917) --- files/eni-max-pods.txt | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 0731af808..7dcbd973f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,21 +11,26 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-02-15T18:47:49Z +# This file was generated at 2022-05-09T10:14:37-07:00 # # The regions queried were: +# - af-south-1 +# - ap-east-1 # - ap-northeast-1 # - ap-northeast-2 # - ap-northeast-3 # - ap-south-1 # - ap-southeast-1 # - ap-southeast-2 +# - ap-southeast-3 # - ca-central-1 # - eu-central-1 # - eu-north-1 +# - eu-south-1 # - eu-west-1 # - eu-west-2 # - eu-west-3 +# - me-south-1 # - sa-east-1 # - us-east-1 # - us-east-2 @@ -110,6 +115,7 @@ c6a.48xlarge 737 c6a.4xlarge 234 c6a.8xlarge 234 c6a.large 29 +c6a.metal 737 c6a.xlarge 58 c6g.12xlarge 234 c6g.16xlarge 737 @@ -233,6 +239,13 @@ i3en.6xlarge 234 i3en.large 29 i3en.metal 737 i3en.xlarge 58 +i4i.16xlarge 737 +i4i.2xlarge 58 +i4i.32xlarge 737 +i4i.4xlarge 234 +i4i.8xlarge 234 +i4i.large 29 +i4i.xlarge 58 im4gn.16xlarge 737 im4gn.2xlarge 58 im4gn.4xlarge 234 @@ -334,6 +347,7 @@ m6a.48xlarge 737 m6a.4xlarge 234 m6a.8xlarge 234 m6a.large 29 +m6a.metal 737 m6a.xlarge 58 m6g.12xlarge 234 m6g.16xlarge 737 @@ -531,6 +545,18 @@ x2gd.large 29 x2gd.medium 8 x2gd.metal 737 x2gd.xlarge 58 +x2idn.16xlarge 737 +x2idn.24xlarge 737 +x2idn.32xlarge 737 +x2idn.metal 737 +x2iedn.16xlarge 737 +x2iedn.24xlarge 737 +x2iedn.2xlarge 58 +x2iedn.32xlarge 737 +x2iedn.4xlarge 234 +x2iedn.8xlarge 234 +x2iedn.metal 737 +x2iedn.xlarge 58 x2iezn.12xlarge 737 x2iezn.2xlarge 58 x2iezn.4xlarge 234 From 5f8033d4e790e21fe7f377ead4dfb67f80e4c243 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 9 May 2022 11:20:21 -0700 Subject: [PATCH 008/232] Update install-worker.sh (#918) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 24a8c5dbf..ccaa52b69 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -116,7 +116,7 @@ sudo yum install -y yum-utils device-mapper-persistent-data lvm2 INSTALL_DOCKER="${INSTALL_DOCKER:-true}" if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker - sudo groupadd -fog 1950 docker + sudo groupadd -og 1950 docker sudo useradd --gid $(getent group docker | cut -d: -f3) docker # install runc and lock version From 8d6191f67bf4c89abb5d7605d6a369536fdb45cb Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 10 May 2022 09:50:26 -0700 Subject: [PATCH 009/232] Remove old kernels when upgrading. (#919) Some automated scanning tools may flag vulnerabilities in old kernels that are not actually in use. --- scripts/install-worker.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index ccaa52b69..b5713a726 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -66,7 +66,11 @@ sudo yum install -y \ socat \ unzip \ wget \ - yum-plugin-versionlock + yum-plugin-versionlock \ + yum-utils + +# Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" +sudo package-cleanup --oldkernels --count=1 -y # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi @@ -111,7 +115,7 @@ sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service ### Docker ##################################################################### ################################################################################ -sudo yum install -y yum-utils device-mapper-persistent-data lvm2 +sudo yum install -y device-mapper-persistent-data lvm2 INSTALL_DOCKER="${INSTALL_DOCKER:-true}" if [[ "$INSTALL_DOCKER" == "true" ]]; then From acc7115338be99109b29f3f1179098eaf97e06c0 Mon Sep 17 00:00:00 2001 From: "Michael S. Fischer" Date: Tue, 17 May 2022 10:52:34 -0700 Subject: [PATCH 010/232] Maintain dockershim compatibility symlink after instance reboot (#921) --- files/bootstrap.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index bac6969be..f7619968c 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -466,13 +466,17 @@ fi if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d + mkdir -p /etc/systemd/system/containerd.service.d + cat < /etc/systemd/system/containerd.service.d/10-compat-symlink.conf +[Service] +ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock +EOF sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/sandbox-image.service - ln -sf /run/containerd/containerd.sock /run/dockershim.sock systemctl daemon-reload systemctl enable containerd systemctl restart containerd From 210c20f24f240f38e575e0480a5e780d3c975b66 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Thu, 19 May 2022 13:24:05 -0700 Subject: [PATCH 011/232] Update Changelog for [RECALLED] v20220523 (#922) --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e600a06d..813f4a68b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +### [Recalled] AMI Release v20220513 +* amazon-eks-gpu-node-1.22-v20220513 +* amazon-eks-gpu-node-1.21-v20220513 +* amazon-eks-gpu-node-1.20-v20220513 +* amazon-eks-gpu-node-1.19-v20220513 +* amazon-eks-arm64-node-1.22-v20220513 +* amazon-eks-arm64-node-1.21-v20220513 +* amazon-eks-arm64-node-1.20-v20220513 +* amazon-eks-arm64-node-1.19-v20220513 +* amazon-eks-node-1.22-v20220513 +* amazon-eks-node-1.21-v20220513 +* amazon-eks-node-1.20-v20220513 +* amazon-eks-node-1.19-v20220513 + +Notice: +* EKS-Optimized AMI SSM parameters contained an incorrect reference to the release version of the AMIs in this release. + ### AMI Release v20220429 * amazon-eks-gpu-node-1.22-v20220429 * amazon-eks-gpu-node-1.21-v20220429 From 9371c6f2a5a9131d66c0b2e8feccd82e3d27ae10 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 25 May 2022 15:21:19 -0700 Subject: [PATCH 012/232] Updates 1.21 binary in makefile (#926) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 544be20cc..cbe8d3833 100644 --- a/Makefile +++ b/Makefile @@ -52,7 +52,7 @@ k8s: validate .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.5 kubernetes_build_date=2022-01-21 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.12 kubernetes_build_date=2022-05-20 pull_cni_from_github=true .PHONY: 1.22 1.22: From 8af5d617816447f996ee181c9e6e6aafecf75a03 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 25 May 2022 15:27:55 -0700 Subject: [PATCH 013/232] Update CHANGELOG with release 20220523 (#928) --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 813f4a68b..429c98431 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +### AMI Release 20220523 +* amazon-eks-gpu-node-1.22-20220523 +* amazon-eks-gpu-node-1.21-20220523 +* amazon-eks-gpu-node-1.20-20220523 +* amazon-eks-gpu-node-1.19-20220523 +* amazon-eks-arm64-node-1.22-20220523 +* amazon-eks-arm64-node-1.21-20220523 +* amazon-eks-arm64-node-1.20-20220523 +* amazon-eks-arm64-node-1.19-20220523 +* amazon-eks-node-1.22-20220523 +* amazon-eks-node-1.21-20220523 +* amazon-eks-node-1.20-20220523 +* amazon-eks-node-1.19-20220523 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.190-107.353.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Added i4i instance support +* Fixes regression in the docker group ID. AMI build will now fail if the docker group ID is not 1950. +* Removes unused kernels (such as 4.14) during AMI build. This prevents false-positives from automated scanning tools such as AWS Inspector. +* Maintain dockershim compatibility symlink after instance reboot +* Updates 1.21 kubelet version to 1.21.12 + ### [Recalled] AMI Release v20220513 * amazon-eks-gpu-node-1.22-v20220513 * amazon-eks-gpu-node-1.21-v20220513 From 47cbd2f379ff09f62b2a60b7164dcdad08950ac5 Mon Sep 17 00:00:00 2001 From: Suket Sharma Date: Wed, 25 May 2022 17:21:52 -0700 Subject: [PATCH 014/232] Add support for containerd-config-file to bootstrap script (#929) --- files/bootstrap.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f7619968c..536b3c462 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -23,6 +23,7 @@ function print_help { echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" @@ -75,6 +76,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --containerd-config-file) + CONTAINERD_CONFIG_FILE=$2 + shift + shift + ;; --pause-container-account) PAUSE_CONTAINER_ACCOUNT=$2 shift @@ -126,6 +132,7 @@ KUBELET_EXTRA_ARGS="${KUBELET_EXTRA_ARGS:-}" ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" +CONTAINERD_CONFIG_FILE="${CONTAINERD_CONFIG_FILE:-}" PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" IP_FAMILY="${IP_FAMILY:-}" @@ -471,6 +478,9 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then [Service] ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock EOF + if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then + sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml + fi sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service From f0597535f3cc7549d47ab8786e8c7cf33e87795c Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 27 May 2022 17:00:52 -0700 Subject: [PATCH 015/232] Updates CHANGELOG for release 20220526 (#932) --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 429c98431..88ff47e10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +### AMI Release 20220526 +* amazon-eks-gpu-node-1.22-20220526 +* amazon-eks-gpu-node-1.21-20220526 +* amazon-eks-gpu-node-1.20-20220526 +* amazon-eks-gpu-node-1.19-20220526 +* amazon-eks-arm64-node-1.22-20220526 +* amazon-eks-arm64-node-1.21-20220526 +* amazon-eks-arm64-node-1.20-20220526 +* amazon-eks-arm64-node-1.19-20220526 +* amazon-eks-node-1.22-20220526 +* amazon-eks-node-1.21-20220526 +* amazon-eks-node-1.20-20220526 +* amazon-eks-node-1.19-20220526 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.6/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.190-107.353.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-2.amzn2.0.1 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +Linux kernel upgraded to 5.4.190-107.353. + ### AMI Release 20220523 * amazon-eks-gpu-node-1.22-20220523 * amazon-eks-gpu-node-1.21-20220523 From e7b9516e9d8a7dd59ca79c6d1c0fe60f76f5b4ca Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Wed, 1 Jun 2022 16:52:42 -0700 Subject: [PATCH 016/232] Updating Makefile and install script for 1.23 version (#935) * Updating Makefile and install script for 1.23 version * Removing Makefile changes --- scripts/install-worker.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b5713a726..d7e0b882b 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -159,7 +159,7 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -if [[ $KUBERNETES_VERSION == "1.22"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then # enable CredentialProviders features in kubelet-containerd service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service @@ -272,7 +272,7 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi -if [[ $KUBERNETES_VERSION == "1.22"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then # enable CredentialProviders feature flags in kubelet service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service @@ -311,7 +311,7 @@ fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ -if [[ $KUBERNETES_VERSION == "1.22"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then ECR_BINARY="ecr-credential-provider" if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." From 66a5114f069e2052337b19af741268f24d68bbc0 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Thu, 2 Jun 2022 16:31:44 -0700 Subject: [PATCH 017/232] Updating condition for updating ECR credential flags for versions greater than 1.22 (#937) --- scripts/install-worker.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index d7e0b882b..4c653a59c 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -159,7 +159,7 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then # enable CredentialProviders features in kubelet-containerd service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service @@ -272,7 +272,7 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi -if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then # enable CredentialProviders feature flags in kubelet service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service @@ -311,7 +311,7 @@ fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ -if [[ ! $KUBERNETES_VERSION =~ "1.19"* || ! $KUBERNETES_VERSION =~ "1.20"* || ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then ECR_BINARY="ecr-credential-provider" if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." From 6f6828b908fcca526a02098b2d0cdbd96ff790f8 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Jun 2022 11:23:25 -0700 Subject: [PATCH 018/232] Correct version suffixes for recent releases (#940) --- CHANGELOG.md | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88ff47e10..ae212abc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,18 +1,18 @@ # Changelog -### AMI Release 20220526 -* amazon-eks-gpu-node-1.22-20220526 -* amazon-eks-gpu-node-1.21-20220526 -* amazon-eks-gpu-node-1.20-20220526 -* amazon-eks-gpu-node-1.19-20220526 -* amazon-eks-arm64-node-1.22-20220526 -* amazon-eks-arm64-node-1.21-20220526 -* amazon-eks-arm64-node-1.20-20220526 -* amazon-eks-arm64-node-1.19-20220526 -* amazon-eks-node-1.22-20220526 -* amazon-eks-node-1.21-20220526 -* amazon-eks-node-1.20-20220526 -* amazon-eks-node-1.19-20220526 +### AMI Release v20220526 +* amazon-eks-gpu-node-1.22-v20220526 +* amazon-eks-gpu-node-1.21-v20220526 +* amazon-eks-gpu-node-1.20-v20220526 +* amazon-eks-gpu-node-1.19-v20220526 +* amazon-eks-arm64-node-1.22-v20220526 +* amazon-eks-arm64-node-1.21-v20220526 +* amazon-eks-arm64-node-1.20-v20220526 +* amazon-eks-arm64-node-1.19-v20220526 +* amazon-eks-node-1.22-v20220526 +* amazon-eks-node-1.21-v20220526 +* amazon-eks-node-1.20-v20220526 +* amazon-eks-node-1.19-v20220526 Binaries used to build these AMIs are published: * s3://amazon-eks/1.22.6/2022-03-09/ @@ -32,19 +32,19 @@ AMI details: Notable changes: Linux kernel upgraded to 5.4.190-107.353. -### AMI Release 20220523 -* amazon-eks-gpu-node-1.22-20220523 -* amazon-eks-gpu-node-1.21-20220523 -* amazon-eks-gpu-node-1.20-20220523 -* amazon-eks-gpu-node-1.19-20220523 -* amazon-eks-arm64-node-1.22-20220523 -* amazon-eks-arm64-node-1.21-20220523 -* amazon-eks-arm64-node-1.20-20220523 -* amazon-eks-arm64-node-1.19-20220523 -* amazon-eks-node-1.22-20220523 -* amazon-eks-node-1.21-20220523 -* amazon-eks-node-1.20-20220523 -* amazon-eks-node-1.19-20220523 +### AMI Release v20220523 +* amazon-eks-gpu-node-1.22-v20220523 +* amazon-eks-gpu-node-1.21-v20220523 +* amazon-eks-gpu-node-1.20-v20220523 +* amazon-eks-gpu-node-1.19-v20220523 +* amazon-eks-arm64-node-1.22-v20220523 +* amazon-eks-arm64-node-1.21-v20220523 +* amazon-eks-arm64-node-1.20-v20220523 +* amazon-eks-arm64-node-1.19-v20220523 +* amazon-eks-node-1.22-v20220523 +* amazon-eks-node-1.21-v20220523 +* amazon-eks-node-1.20-v20220523 +* amazon-eks-node-1.19-v20220523 Binaries used to build these AMIs are published: * s3://amazon-eks/1.22.6/2022-03-09/ From c97236d0ac59e6f56e612c568c53c8e439efe50e Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 10 Jun 2022 14:12:55 -0400 Subject: [PATCH 019/232] Update containerd for CVE-2022-31030 (#945) https://alas.aws.amazon.com/cve/html/CVE-2022-31030.html --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 37f7af523..6c877a52a 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -14,7 +14,7 @@ "kubernetes_build_date": null, "kernel_version": "", "docker_version": "20.10.13-2.amzn2", - "containerd_version": "1.4.13-2.amzn2.0.1", + "containerd_version": "1.4.13-3.amzn2", "runc_version": "1.0.3-2.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", From dcfe8cf97a6bc357826ee475772904ecc8308de3 Mon Sep 17 00:00:00 2001 From: Suket Sharma Date: Tue, 14 Jun 2022 09:39:58 -0700 Subject: [PATCH 020/232] Adding c6id, m6id, r6id to eni-max-pods.txt (#944) --- files/eni-max-pods.txt | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 7dcbd973f..c4b0fd9b6 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,26 +11,21 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-05-09T10:14:37-07:00 +# This file was generated at 2022-06-10T09:23:03-07:00 # # The regions queried were: -# - af-south-1 -# - ap-east-1 # - ap-northeast-1 # - ap-northeast-2 # - ap-northeast-3 # - ap-south-1 # - ap-southeast-1 # - ap-southeast-2 -# - ap-southeast-3 # - ca-central-1 # - eu-central-1 # - eu-north-1 -# - eu-south-1 # - eu-west-1 # - eu-west-2 # - eu-west-3 -# - me-south-1 # - sa-east-1 # - us-east-1 # - us-east-2 @@ -153,6 +148,16 @@ c6i.8xlarge 234 c6i.large 29 c6i.metal 737 c6i.xlarge 58 +c6id.12xlarge 234 +c6id.16xlarge 737 +c6id.24xlarge 737 +c6id.2xlarge 58 +c6id.32xlarge 737 +c6id.4xlarge 234 +c6id.8xlarge 234 +c6id.large 29 +c6id.metal 737 +c6id.xlarge 58 c7g.12xlarge 234 c7g.16xlarge 737 c7g.2xlarge 58 @@ -245,6 +250,7 @@ i4i.32xlarge 737 i4i.4xlarge 234 i4i.8xlarge 234 i4i.large 29 +i4i.metal 737 i4i.xlarge 58 im4gn.16xlarge 737 im4gn.2xlarge 58 @@ -377,6 +383,16 @@ m6i.8xlarge 234 m6i.large 29 m6i.metal 737 m6i.xlarge 58 +m6id.12xlarge 234 +m6id.16xlarge 737 +m6id.24xlarge 737 +m6id.2xlarge 58 +m6id.32xlarge 737 +m6id.4xlarge 234 +m6id.8xlarge 234 +m6id.large 29 +m6id.metal 737 +m6id.xlarge 58 mac1.metal 234 p2.16xlarge 234 p2.8xlarge 234 @@ -486,6 +502,16 @@ r6i.8xlarge 234 r6i.large 29 r6i.metal 737 r6i.xlarge 58 +r6id.12xlarge 234 +r6id.16xlarge 737 +r6id.24xlarge 737 +r6id.2xlarge 58 +r6id.32xlarge 737 +r6id.4xlarge 234 +r6id.8xlarge 234 +r6id.large 29 +r6id.metal 737 +r6id.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 From c8fa176afb949fa0aa5507c51eb0f001fc3b018c Mon Sep 17 00:00:00 2001 From: Chunyang Wang <89669703+chunywan@users.noreply.github.com> Date: Tue, 14 Jun 2022 13:59:29 -0700 Subject: [PATCH 021/232] Support local cluster in Outposts (#939) This adds support, via new bootstrap flags (--enable-local-outpost), for local control planes on Outpost, specifically when the Outpost is disconnected from its parent region. --- files/bootstrap.sh | 74 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 536b3c462..fef75348d 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -30,6 +30,8 @@ function print_help { echo "--container-runtime Specify a container runtime (default: dockerd)" echo "--ip-family Specify ip family of the cluster" echo "--service-ipv6-cidr ipv6 cidr range of the cluster" + echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" + echo "--cluster-id Specify the id of EKS cluster" } POSITIONAL=() @@ -111,6 +113,16 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --enable-local-outpost) + ENABLE_LOCAL_OUTPOST=$2 + shift + shift + ;; + --cluster-id) + CLUSTER_ID=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -137,6 +149,8 @@ PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" +ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" +CLUSTER_ID="${CLUSTER_ID:-}" function get_pause_container_account_for_region () { local region="$1" @@ -360,7 +374,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then --region=${AWS_DEFAULT_REGION} \ --name=${CLUSTER_NAME} \ --output=text \ - --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily}' > $DESCRIBE_CLUSTER_RESULT || rc=$? + --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily, outpostArn: outpostConfig.outpostArns[0], id: id}' > $DESCRIBE_CLUSTER_RESULT || rc=$? if [[ $rc -eq 0 ]]; then break fi @@ -373,12 +387,26 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then done B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') - SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') - SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') + CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') + OUTPOST_ARN=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') + SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $6}') + SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $7}') if [[ -z "${IP_FAMILY}" ]]; then IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') fi + + # Automatically detect local cluster in outpost + if [[ -z "${OUTPOST_ARN}" ]] || [[ "${OUTPOST_ARN}" == "None" ]]; then + IS_LOCAL_OUTPOST_DETECTED=false + else + IS_LOCAL_OUTPOST_DETECTED=true + fi + + # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option + if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then + CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} + fi fi if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then @@ -389,9 +417,47 @@ fi echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH -sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig + +if [[ -z "$ENABLE_LOCAL_OUTPOST" ]]; then + # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with + # - the result of auto-detectection through describe-cluster + # - or "false" when describe-cluster is bypassed. + # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result + ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" +fi + +### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost +### is disconnected from the parent AWS Region, the following specific setup are required: +### - append entries to /etc/hosts with the mappings of control plane host IP address and API server +### domain name. So that the domain name can be resolved to IP addresses locally. +### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client +### X.509 certificate and generate kubelet kubeconfig file which uses the cleint cert. So that the +### worker node can be authentiacated through X.509 certificate which works for both connected and +#### disconnected state. +if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then + ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" + DOMAIN_NAME=$(echo "$APISERVER_ENDPOINT" | awk -F/ '{print $3}' | awk -F: '{print $1}') + getent hosts "$DOMAIN_NAME" | shuf >> /etc/hosts + + ### kubelet bootstrap kubeconfig uses aws-iam-authenticator with cluster id to authenticate to cluster + ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. + ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". + if [[ -z "${CLUSTER_ID}" ]]; then + echo "Cluster ID is required when local outpost support is enabled" + exit 1 + else + sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig + + ### use aws-iam-authenticator as bootstrap auth and download X.509 cert used in kubelet kubeconfig + mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig + KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" + fi +else + sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig +fi + ### kubelet.service configuration if [[ "${IP_FAMILY}" == "ipv6" ]]; then From 0f7d8afce913991242b9c0c5ff76def35a3ae98d Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 14 Jun 2022 16:19:22 -0700 Subject: [PATCH 022/232] Updating Makefile to update 1.22 build config (#948) Updating the 1.22 binaries including their build date and build version --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cbe8d3833..199a7debc 100644 --- a/Makefile +++ b/Makefile @@ -56,4 +56,4 @@ k8s: validate .PHONY: 1.22 1.22: - $(MAKE) k8s kubernetes_version=1.22.6 kubernetes_build_date=2022-03-09 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-06-03 pull_cni_from_github=true From 6bb3edf54235b4c744ec57dc617baa3e89d2e307 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Wed, 15 Jun 2022 16:58:23 -0700 Subject: [PATCH 023/232] Updating CHANGELOG.md for AMI release 06/10 (#950) --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae212abc3..312a33566 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,39 @@ # Changelog +### AMI Release v20220610 +* amazon-eks-gpu-node-1.22-v20220610 +* amazon-eks-gpu-node-1.21-v20220610 +* amazon-eks-gpu-node-1.20-v20220610 +* amazon-eks-gpu-node-1.19-v20220610 +* amazon-eks-arm64-node-1.22-v20220610 +* amazon-eks-arm64-node-1.21-v20220610 +* amazon-eks-arm64-node-1.20-v20220610 +* amazon-eks-arm64-node-1.19-v20220610 +* amazon-eks-node-1.22-v20220610 +* amazon-eks-node-1.21-v20220610 +* amazon-eks-node-1.20-v20220610 +* amazon-eks-node-1.19-v20220610 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-06-03/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.11/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Containerd version upgraded to 1.4.13-3.amzn2 for [CVE-2022-31030](https://alas.aws.amazon.com/cve/html/CVE-2022-31030.html). +* Kernel version upgraded to 5.4.196-108.356.amzn2 for [CVE-2022-0494](https://alas.aws.amazon.com/cve/html/CVE-2022-0494.html), [CVE-2022-0854](https://alas.aws.amazon.com/cve/html/CVE-2022-0854.html), [CVE-2022-1729](https://alas.aws.amazon.com/cve/html/CVE-2022-1729.html), [CVE-2022-1836](https://alas.aws.amazon.com/cve/html/CVE-2022-1836.html), [CVE-2022-28893](https://alas.aws.amazon.com/cve/html/CVE-2022-28893.html), [CVE-2022-29581](https://alas.aws.amazon.com/cve/html/CVE-2022-29581.html) +* Updating the kubelet version for 1.22 from 1.22.6 to 1.22.9 + ### AMI Release v20220526 * amazon-eks-gpu-node-1.22-v20220526 * amazon-eks-gpu-node-1.21-v20220526 From b905f2afc3b14210a8604d44df462ab83298dc1b Mon Sep 17 00:00:00 2001 From: Denis Salamanca Date: Fri, 17 Jun 2022 03:17:07 +1000 Subject: [PATCH 024/232] Increase /var/log/messages limit to 100M (#930) 10M resulted in insufficient logs for investigation in too many cases. Rather than removing the limit entirely, this raises it to include the entire /var/log/messages in most cases while making the resulting archive easy to pass between support and service teams. --- log-collector-script/linux/eks-log-collector.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 84af9bc17..f33381941 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -320,7 +320,7 @@ get_common_logs() { for entry in ${COMMON_LOGS[*]}; do if [[ -e "/var/log/${entry}" ]]; then if [[ "${entry}" == "messages" ]]; then - tail -c 10M /var/log/messages > "${COLLECT_DIR}"/var_log/messages + tail -c 100M /var/log/messages > "${COLLECT_DIR}"/var_log/messages continue fi if [[ "${entry}" == "containers" ]]; then From 69b1c73dce130b4a468564e82e15e62336d7fbc0 Mon Sep 17 00:00:00 2001 From: Pubudu Perera <892600+suharshan@users.noreply.github.com> Date: Fri, 17 Jun 2022 05:26:50 +0800 Subject: [PATCH 025/232] Support packer's ami_regions feature (#752) Allows `ami_regions` to be passed through the `make` line, which will instruct Packer to copy the resulting AMI(s) to additional regions. --- Makefile | 2 +- eks-worker-al2.json | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 199a7debc..ef0f45d98 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry ami_regions K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 6c877a52a..1ace29f65 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -35,7 +35,8 @@ "launch_block_device_mappings_volume_size": "4", "ami_users": "", "additional_yum_repos": "", - "sonobuoy_e2e_registry": "" + "sonobuoy_e2e_registry": "", + "ami_regions": "" }, "builders": [ { @@ -74,6 +75,7 @@ "delete_on_termination": true } ], + "ami_regions": "{{user `ami_regions`}}", "ssh_username": "{{user `ssh_username`}}", "ssh_interface": "{{user `ssh_interface`}}", "temporary_security_group_source_cidrs": "{{user `temporary_security_group_source_cidrs`}}", From 86b2ab96b341ce27bdc3503858f665eac963241e Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Thu, 23 Jun 2022 13:26:15 -0700 Subject: [PATCH 026/232] Update Makefile to reflect latest 1.20 binaries (#955) We recently updated the 1.20 binaries with latest build dates and those respective binaries have been uploaded to the s3 bucket. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef0f45d98..c9be7eb6e 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ k8s: validate .PHONY: 1.20 1.20: - $(MAKE) k8s kubernetes_version=1.20.11 kubernetes_build_date=2021-11-10 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-06-20 pull_cni_from_github=true .PHONY: 1.21 1.21: From 4cf8509fe94a71d9b495b4c65c077a3cf02b519a Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Thu, 23 Jun 2022 17:42:19 -0700 Subject: [PATCH 027/232] Updating changelog for AMI release 20220620 (#956) --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 312a33566..9bd805846 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +### AMI Release v20220620 +* amazon-eks-gpu-node-1.22-v20220620 +* amazon-eks-gpu-node-1.21-v20220620 +* amazon-eks-gpu-node-1.20-v20220620 +* amazon-eks-gpu-node-1.19-v20220620 +* amazon-eks-arm64-node-1.22-v20220620 +* amazon-eks-arm64-node-1.21-v20220620 +* amazon-eks-arm64-node-1.20-v20220620 +* amazon-eks-arm64-node-1.19-v20220620 +* amazon-eks-node-1.22-v20220620 +* amazon-eks-node-1.21-v20220620 +* amazon-eks-node-1.20-v20220620 +* amazon-eks-node-1.19-v20220620 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Notable changes: +* Update kubelet binaries for 1.20 +* Support packer's ami_regions feature +* Increase /var/log/messages limit to 100M +* Support local cluster in Outposts +* Adding c6id, m6id, r6id to eni-max-pods.txt + ### AMI Release v20220610 * amazon-eks-gpu-node-1.22-v20220610 * amazon-eks-gpu-node-1.21-v20220610 From 5c74a515ddb7f59f8a94306e7de0a7f9e6f0a1fb Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 30 Jun 2022 15:26:21 -0700 Subject: [PATCH 028/232] Adds release v20220629 to the changelog (#958) --- CHANGELOG.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bd805846..461edc503 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +### AMI Release v20220629 +* amazon-eks-gpu-node-1.22-v20220629 +* amazon-eks-gpu-node-1.21-v20220629 +* amazon-eks-gpu-node-1.20-v20220629 +* amazon-eks-gpu-node-1.19-v20220629 +* amazon-eks-arm64-node-1.22-v20220629 +* amazon-eks-arm64-node-1.21-v20220629 +* amazon-eks-arm64-node-1.20-v20220629 +* amazon-eks-arm64-node-1.19-v20220629 +* amazon-eks-node-1.22-v20220629 +* amazon-eks-node-1.21-v20220629 +* amazon-eks-node-1.20-v20220629 +* amazon-eks-node-1.19-v20220629 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2021-11-10/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.196-108.356.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1188.0 + +Noted software versions are identical to release v20220620 in the commercial partition. + ### AMI Release v20220620 * amazon-eks-gpu-node-1.22-v20220620 * amazon-eks-gpu-node-1.21-v20220620 From 28bd4f8aa3891b637789059f7aca8466be04531a Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 30 Jun 2022 15:34:11 -0700 Subject: [PATCH 029/232] Fixes 1.20 reference in changelog for v20220629 (#959) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 461edc503..51c2fe3a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ Binaries used to build these AMIs are published: * s3://amazon-eks/1.22.9/2022-03-09/ * s3://amazon-eks/1.21.12/2022-05-20/ -* s3://amazon-eks/1.20.15/2021-11-10/ +* s3://amazon-eks/1.20.15/2022-06-20/ * s3://amazon-eks/1.19.15/2021-11-10/ AMI details: From 4e4cc4ec46213132b0ec8a6035b5f640c1e1bab1 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Wed, 13 Jul 2022 14:22:30 -0700 Subject: [PATCH 030/232] updating pause-container-version to 3.5 from 3.1 (#965) Co-authored-by: Ravi Sinha --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index fef75348d..cff81924c 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -145,7 +145,7 @@ ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" CONTAINERD_CONFIG_FILE="${CONTAINERD_CONFIG_FILE:-}" -PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.1-eksbuild.1}" +PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.5}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" From 584f9a56c76fc9e7e8632f6ea45e29d45f2eab63 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Tue, 19 Jul 2022 15:50:24 -0700 Subject: [PATCH 031/232] adding log-collector-script in EKS AL2 AMI (#967) Co-authored-by: Ravi Sinha --- eks-worker-al2.json | 7 ++++++- scripts/install-worker.sh | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 1ace29f65..f44f44521 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -125,7 +125,7 @@ "pause_before": "90s", "remote_folder": "{{ user `remote_folder`}}", "inline": [ - "mkdir -p /tmp/worker/" + "mkdir -p /tmp/worker/log-collector-script/" ] }, { @@ -133,6 +133,11 @@ "source": "{{template_dir}}/files/", "destination": "/tmp/worker/" }, + { + "type": "file", + "source": "{{template_dir}}/log-collector-script/linux/", + "destination": "/tmp/worker/log-collector-script/" + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 4c653a59c..639bbe1bb 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -367,6 +367,12 @@ echo fs.inotify.max_user_instances=8192 | sudo tee -a /etc/sysctl.conf echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf +################################################################################ +### adding log-collector-script ############################################### +################################################################################ +sudo mkdir -p /etc/eks/log-collector-script/ +sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ + ################################################################################ ### Cleanup #################################################################### ################################################################################ From 03cc9d7a3eaec8ad145dd73cd7ee21970b4e9d54 Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Thu, 21 Jul 2022 08:41:48 -0700 Subject: [PATCH 032/232] adding log-collector-script directory as dependency dir (#968) Co-authored-by: Ravi Sinha --- ArchiveBuildConfig.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index e6663a0fa..a93376845 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -9,6 +9,7 @@ dependencies: dirs: - src: files/ - src: scripts/ + - src: log-collector-script/ files: - src: Makefile - src: eks-worker-al2.json From 5dafe2088499bb3c33b3e56625e9852f2cda2956 Mon Sep 17 00:00:00 2001 From: Steve Hipwell Date: Mon, 25 Jul 2022 19:21:16 +0100 Subject: [PATCH 033/232] Use systemd as the containerd cgroup driver (#717) Authored-by: Steve Hipwell --- files/bootstrap.sh | 17 +++++++++-------- files/containerd-config.toml | 3 +++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index cff81924c..1cc5ca6c3 100644 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -402,7 +402,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then else IS_LOCAL_OUTPOST_DETECTED=true fi - + # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} @@ -421,20 +421,20 @@ sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig if [[ -z "$ENABLE_LOCAL_OUTPOST" ]]; then - # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with + # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with # - the result of auto-detectection through describe-cluster # - or "false" when describe-cluster is bypassed. # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result - ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" + ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" fi -### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost +### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost ### is disconnected from the parent AWS Region, the following specific setup are required: -### - append entries to /etc/hosts with the mappings of control plane host IP address and API server +### - append entries to /etc/hosts with the mappings of control plane host IP address and API server ### domain name. So that the domain name can be resolved to IP addresses locally. -### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client -### X.509 certificate and generate kubelet kubeconfig file which uses the cleint cert. So that the -### worker node can be authentiacated through X.509 certificate which works for both connected and +### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client +### X.509 certificate and generate kubelet kubeconfig file which uses the cleint cert. So that the +### worker node can be authentiacated through X.509 certificate which works for both connected and #### disconnected state. if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" @@ -547,6 +547,7 @@ EOF if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml fi + echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service diff --git a/files/containerd-config.toml b/files/containerd-config.toml index 31404027f..8a668ce84 100644 --- a/files/containerd-config.toml +++ b/files/containerd-config.toml @@ -14,6 +14,9 @@ sandbox_image = "SANDBOX_IMAGE" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runc.v2" +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] +SystemdCgroup = true + [plugins."io.containerd.grpc.v1.cri".cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" From 4bc398e73ae17729114bea725ba669eb499e6543 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 26 Jul 2022 12:22:43 -0700 Subject: [PATCH 034/232] Adds instructions for PR testing (#970) --- CONTRIBUTING.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7e0b7332b..c22acf627 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -39,6 +39,72 @@ To send us a pull request, please: GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). +### Testing Changes + +When submitting PRs, we want to verify that there are no regressions in the AMI with the new changes. EKS runs various tests before publishing new Amazon EKS optimized Amazon Linux AMIs, which will ensure the highest level of confidence that there are no regressions in officially published AMIs. To maintain the health of this repo, we need to do some basic validation prior to merging PRs. Eventually, we hope to automate this process. Until then, here are the basic steps that we should take before merging PRs. + +**Test #1: Verify that building AMIs still works** + +If your change is relevant to a specific Kubernetes version, build all AMIs that apply. Otherwise, just choose the latest available Kubernetes version. + +``` +# Configure AWS credentials +make 1.22 +``` + +**Test #2: Create a nodegroup with new AMI and confirm it joins a cluster** + +Once the AMI is built, we need to verify that it can join a cluster. You can use `eksctl`, or your method of choice, to create a cluster and add nodes to it using the AMI you built. Below is an example config file. + +`cluster.yaml` + +``` +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: basic-cluster + region: us-west-2 + version: '1.22' + +nodeGroups: + - name: ng + instanceType: m5.large + ami: [INSERT_AMI_ID] + overrideBootstrapCommand: | + #!/bin/bash + /etc/eks/bootstrap.sh basic-cluster +``` + +Then run: + +``` +eksctl create cluster -f cluster.yaml +``` + +`eksctl` will verify that the nodes join the cluster before completing. + +**Test #3: Verify that the nodes are Kubernetes conformant** + +You can use [sonobuoy](https://sonobuoy.io/) to run conformance tests on the cluster you've create in *Test #2*. You should only include nodes with the custom AMI built in *Test #1*. You must install `sonobuoy` locally before running. + +``` +sonobuoy run --wait +``` + +By default, `sonobuoy` will run `e2e` and `systemd-logs`. This step may take multiple hours to run. + +**Test #4: [Optional] Test your specific PR changes** + +If your PR has changes that require additional, custom validation, provide the appropriate steps to verify that the changes don't cause regressions and behave as expected. Document the steps taken in the CR. + +**Clean Up** + +Delete the cluster: + +``` +eksctl delete cluster -f cluster.yaml +``` ## Finding contributions to work on Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-eks-ami/labels/help%20wanted) issues is a great place to start. From e4dd6ad98200d1f457f7f8043ef520cd9e4eac5e Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 26 Jul 2022 12:50:23 -0700 Subject: [PATCH 035/232] Update PR template to reference testing steps (#971) --- .github/PULL_REQUEST_TEMPLATE.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e779d6499..09b7a68ad 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,8 +1,14 @@ -*Issue #, if available:* +**Issue #, if available:** -*Description of changes:* +**Description of changes:** By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. + +**Testing Done** + + + +*[See this guide for recommended testing for PRs.](https://github.com/awslabs/amazon-eks-ami/blob/master/CONTRIBUTING.md#testing-changes) Some tests may not apply. Completing tests and providing additional validation steps are not required, but it is recommended and may reduce review time and time to merge.* From ca85d94b0bc8dc03bcaa569cd983d06e20c72b8d Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Fri, 29 Jul 2022 23:54:41 -0700 Subject: [PATCH 036/232] Updating changelog for AMI release v20220725 (#973) Co-authored-by: Ravi Sinha --- CHANGELOG.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51c2fe3a4..df4343055 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,40 @@ # Changelog +### AMI Release v20220725 +* amazon-eks-gpu-node-1.22-v20220725 +* amazon-eks-gpu-node-1.21-v20220725 +* amazon-eks-gpu-node-1.20-v20220725 +* amazon-eks-gpu-node-1.19-v20220725 +* amazon-eks-arm64-node-1.22-v20220725 +* amazon-eks-arm64-node-1.21-v20220725 +* amazon-eks-arm64-node-1.20-v20220725 +* amazon-eks-arm64-node-1.19-v20220725 +* amazon-eks-node-1.22-v20220725 +* amazon-eks-node-1.21-v20220725 +* amazon-eks-node-1.20-v20220725 +* amazon-eks-node-1.19-v20220725 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0 + +Notable changes: +* Updating pause-container version from 3.1 to 3.5 +* Adding log-collector-script to the AMI +* Kernel version upgraded to 5.4.204-113.362.amzn2 for [CVE-2022-0494](https://alas.aws.amazon.com/cve/html/CVE-2022-0494.html) [CVE-2022-0812](https://alas.aws.amazon.com/cve/html/CVE-2022-0812.html) [CVE-2022-1012](https://alas.aws.amazon.com/cve/html/CVE-2022-1012.html) [CVE-2022-1184](https://alas.aws.amazon.com/cve/html/CVE-2022-1184.html) [CVE-2022-1966](https://alas.aws.amazon.com/cve/html/CVE-2022-1966.html) [CVE-2022-32250](https://alas.aws.amazon.com/cve/html/CVE-2022-32250.html) [CVE-2022-32296](https://alas.aws.amazon.com/cve/html/CVE-2022-32296.html) [CVE-2022-32981](https://alas.aws.amazon.com/cve/html/CVE-2022-32981.html) + + ### AMI Release v20220629 * amazon-eks-gpu-node-1.22-v20220629 * amazon-eks-gpu-node-1.21-v20220629 From 7dc3326f2a529e34e1aab765d507df1c8db58c8e Mon Sep 17 00:00:00 2001 From: icarthick <93390344+icarthick@users.noreply.github.com> Date: Tue, 2 Aug 2022 14:38:26 -0500 Subject: [PATCH 037/232] =?UTF-8?q?Update=20Readme=20file=20to=20have=20cl?= =?UTF-8?q?ear=20instructions=20about=20how=20to=20build=20usin=E2=80=A6?= =?UTF-8?q?=20(#794)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update Readme file to have clear instructions about how to build using different versions of Kubernetes binaries * Change readme text to exclude older k8s versions --- README.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 37cbc9c3f..f3bc37212 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,71 @@ when building this AMI. ## Building the AMI -A Makefile is provided to build the AMI, but it is just a small wrapper around +A Makefile is provided to build the Amazon EKS Worker AMI, but it is just a small wrapper around invoking Packer directly. You can initiate the build process by running the following command in the root of this repository: ```bash make ``` +The Makefile chooses a particular kubelet binary to use per kubernetes version which you can [view here](Makefile). +To build an Amazon EKS Worker AMI for a particular Kubernetes version run the following command +```bash +make 1.21 ## Build a Amazon EKS Worker AMI for k8s 1.21 +``` +### Building against other versions of Kubernetes binaries +To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command +Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 +```bash +#List of all avalable Kuberenets Versions: +aws s3 ls s3://amazon-eks +KUBERNETES_VERSION=1.17.9 # Chose a version and set the variable + +#List of all builds for the specified Kubernetes Version: +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ +KUBERNETES_BUILD_DATE=2020-08-04 # Chose a date and set the variable + +#List of all platforms available for the selected Kubernetes Version and build date +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/2020-08-04/bin/ +PLATFORM=linux # Chose a platform and set the variable + +#List of all architectures for the selected Kubernetes Version, build date and platform +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/2020-08-04/bin/linux/ +ARCH=amd64 #Chose an architecture and set the variable +``` +Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step +```bash +make k8s \ + kubernetes_version=$KUBERNETES_VERSION \ + kubernetes_build_date=$KUBERNETES_BUILD_DATE \ + arch=$ARCH +``` + +### Providing your own Kubernetes Binaries + +By default, binaries are downloaded from the Amazon EKS public Amazon Simple Storage Service (Amazon S3) +bucket amazon-eks in us-west-2. You can instead choose to provide your own version of Kubernetes binaries to be used. To use your own binaries + +1. Copy the binaries to your own S3 bucket using the AWS CLI. Here is an example that uses Kubelet binary +```bash + aws s3 cp kubelet s3://my-custom-bucket/kubernetes_version/kubernetes_build_date/bin/linux/arch/kubelet +``` +**Note**: Replace my-custom-bucket, amazon-eks, kubernetes_version, kubernetes_build_date, and arch with your values. + +**Important**: You must provide all the binaries listed in the default amazon-eks bucket for a specific kubernetes_version, kubernetes_build_date, and arch combination. These binaries must be accessible through AWS Identity and Access Management (IAM) credentials configured in the Install and configure HashiCorp Packer section. + +2. Run the following command to start the build process to use your own Kubernetes binaries +```bash +make k8s \ + binary_bucket_name=my-custom-bucket \ + binary_bucket_region=eu-west-1 \ + kubernetes_version=1.14.9 \ + kubernetes_build_date=2020-01-22 +``` +**Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. + + + The Makefile runs Packer with the `eks-worker-al2.json` build specification template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) From 3d44534f1bf4a4f45e4d9d266f7c1e36562ad707 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 5 Aug 2022 14:12:20 -0700 Subject: [PATCH 038/232] Generate version-info.json during build (#974) --- eks-worker-al2.json | 12 ++++++++++++ scripts/generate-version-info.sh | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 scripts/generate-version-info.sh diff --git a/eks-worker-al2.json b/eks-worker-al2.json index f44f44521..521c1b7de 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -174,6 +174,18 @@ "environment_vars": [ "KERNEL_VERSION={{user `kernel_version`}}" ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/generate-version-info.sh", + "execute_command": "chmod +x {{ .Path }}; {{ .Path }} /tmp/version-info.json" + }, + { + "type": "file", + "direction": "download", + "source": "/tmp/version-info.json", + "destination": "{{ user `ami_name` }}-version-info.json" } ], "post-processors": [ diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh new file mode 100644 index 000000000..fa7baa196 --- /dev/null +++ b/scripts/generate-version-info.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +# generates a JSON file containing version information for the software in this AMI + +set -o errexit +set -o pipefail + +if [ "$#" -ne 1 ] +then + echo "usage: $0 OUTPUT_FILE" + exit 1 +fi + +OUTPUT_FILE="$1" + +# packages +rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' | jq --slurp --sort-keys 'add | {packages:(.)}' > "$OUTPUT_FILE" + +# binaries +echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE From c847bba4fea9773d40446155d8e670192500f133 Mon Sep 17 00:00:00 2001 From: guessi Date: Tue, 9 Aug 2022 01:05:35 +0800 Subject: [PATCH 039/232] Extend support for containerd log collecting (#964) --- .../linux/eks-log-collector.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index f33381941..0e64f2278 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -559,11 +559,25 @@ get_containerd_info() { warning "The Containerd daemon is not running." fi - ok + ok + + try "Collect Containerd running information" + if ! command -v ctr >/dev/null 2>&1; then + warning "ctr not installed" + else + timeout 75 ctr version > "${COLLECT_DIR}"/containerd/containerd-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr namespaces list > "${COLLECT_DIR}"/containerd/containerd-namespaces.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io images list > "${COLLECT_DIR}"/containerd/containerd-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io containers list > "${COLLECT_DIR}"/containerd/containerd-containers.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io tasks list > "${COLLECT_DIR}"/containerd/containerd-tasks.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io plugins list > "${COLLECT_DIR}"/containerd/containerd-plugins.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + fi + + ok } get_sandboxImage_info() { - try "Collect sandbox-image daemon information" + try "Collect sandbox-image daemon information" timeout 75 journalctl -u sandbox-image > "${COLLECT_DIR}"/sandbox-image/sandbox-image-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"sandbox-image info output \" " ok } From 56bcf1946d814b0d56bfd4c09a5688e95c8c0268 Mon Sep 17 00:00:00 2001 From: siddharth Date: Mon, 8 Aug 2022 23:52:06 +0530 Subject: [PATCH 040/232] eks-log-collector.sh: add timeout to df command. (#949) df hangs in some scenarios like https://github.com/kubernetes-sigs/aws-efs-csi-driver/issues/616 --- log-collector-script/linux/eks-log-collector.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 0e64f2278..5469babb0 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -210,7 +210,7 @@ is_diskfull() { # 1.5GB in KB threshold=1500000 - result=$(df / | grep --invert-match "Filesystem" | awk '{ print $4 }') + result=$(timeout 75 df / | grep --invert-match "Filesystem" | awk '{ print $4 }') # If "result" is less than or equal to "threshold", fail. if [[ "${result}" -le "${threshold}" ]]; then @@ -278,7 +278,7 @@ get_mounts_info() { try "collect mount points and volume information" mount > "${COLLECT_DIR}"/storage/mounts.txt echo >> "${COLLECT_DIR}"/storage/mounts.txt - df --human-readable >> "${COLLECT_DIR}"/storage/mounts.txt + timeout 75 df --human-readable >> "${COLLECT_DIR}"/storage/mounts.txt lsblk > "${COLLECT_DIR}"/storage/lsblk.txt lvs > "${COLLECT_DIR}"/storage/lvs.txt pvs > "${COLLECT_DIR}"/storage/pvs.txt From 5cb2ba8d69fcc8c2e692120c830854a3a0891ef7 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 13:25:30 -0500 Subject: [PATCH 041/232] record inode usage (#978) This will allow diagnosing issues where we run out of inodes instead of disk space more easily. --- log-collector-script/linux/eks-log-collector.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 5469babb0..1241f4529 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -279,6 +279,7 @@ get_mounts_info() { mount > "${COLLECT_DIR}"/storage/mounts.txt echo >> "${COLLECT_DIR}"/storage/mounts.txt timeout 75 df --human-readable >> "${COLLECT_DIR}"/storage/mounts.txt + timeout 75 df --inodes >> "${COLLECT_DIR}"/storage/inodes.txt lsblk > "${COLLECT_DIR}"/storage/lsblk.txt lvs > "${COLLECT_DIR}"/storage/lvs.txt pvs > "${COLLECT_DIR}"/storage/pvs.txt From 1cf9757299499daf8d7a62dcad8a18feb5cc2d38 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 13:25:48 -0500 Subject: [PATCH 042/232] copy AWS EBS/EFS driver logs (#979) --- log-collector-script/linux/eks-log-collector.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 1241f4529..fd0290cf3 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -329,6 +329,8 @@ get_common_logs() { cp --force --dereference --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null cp --force --dereference --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2>/dev/null cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2>/dev/null continue fi if [[ "${entry}" == "pods" ]]; then @@ -336,6 +338,8 @@ get_common_logs() { cp --force --dereference --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null cp --force --dereference --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2>/dev/null cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_ebs-csi-* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_efs-csi-* "${COLLECT_DIR}"/var_log/ 2>/dev/null continue fi cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2>/dev/null From ab6845498f5c70f230fa997fe5809d531d7056c8 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 13:26:05 -0500 Subject: [PATCH 043/232] eks-log-collector.sh: record a ps listing including threads (#980) * record a ps listing including threads * print headers after every page of ps output --- log-collector-script/linux/eks-log-collector.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index fd0290cf3..429e2029f 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -548,7 +548,8 @@ get_system_services() { esac timeout 75 top -b -n 1 > "${COLLECT_DIR}"/system/top.txt 2>&1 - timeout 75 ps fauxwww > "${COLLECT_DIR}"/system/ps.txt 2>&1 + timeout 75 ps fauxwww --headers > "${COLLECT_DIR}"/system/ps.txt 2>&1 + timeout 75 ps -eTF --headers > "${COLLECT_DIR}"/system/ps-threads.txt 2>&1 timeout 75 netstat -plant > "${COLLECT_DIR}"/system/netstat.txt 2>&1 ok From b6cdc39e2dbf652f3675a941f34a8c913fef4683 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 13:26:36 -0500 Subject: [PATCH 044/232] eks-log-collector.sh: collect information regarding throttled processes (#975) * collect information regarding throttled processes Collects process throttling information from /sys/fs/cgroup. In the event of detected CPU throttling, the log will show the amount of throttling, the pod and container UIDs if applicable and the processes being throttled: $ cat system/cpu_throttling.txt /sys/fs/cgroup/cpu,cpuacct/kubepods/burstable/poda207af95-18de-463f-a3f6-181f78900fc7/071f5ea890c12c15f98a8fdc3c60cd0fd31df4788194effd9add9713442adb23/cpu.stat nr_periods 104065 nr_throttled 103964 throttled_time 18601742785642 12248 ? SLs 0:00 /bin/stress-ng --cpu 0 --metrics-brief -v 12325 ? R 10:39 /bin/stress-ng --cpu 0 --metrics-brief -v 12326 ? R 11:01 /bin/stress-ng --cpu 0 --metrics-brief -v 12327 ? R 11:04 /bin/stress-ng --cpu 0 --metrics-brief -v 12328 ? R 10:36 /bin/stress-ng --cpu 0 --metrics-brief -v * fix indentation --- .../linux/eks-log-collector.sh | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 429e2029f..7018d1dfa 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -259,6 +259,7 @@ collect() { get_cni_config get_docker_logs get_sandboxImage_info + get_cpu_throttled_processes } pack() { @@ -589,7 +590,7 @@ get_sandboxImage_info() { } get_docker_info() { - try "collect Docker daemon information" + try "Collect Docker daemon information" if [[ "$(pgrep -o dockerd)" -ne 0 ]]; then timeout 75 docker info > "${COLLECT_DIR}"/docker/docker-info.txt 2>&1 || echo -e "\tTimed out, ignoring \"docker info output \" " @@ -604,6 +605,36 @@ get_docker_info() { ok } + +get_cpu_throttled_processes() { + try "Collect CPU Throttled Process Information" + readonly THROTTLE_LOG="${COLLECT_DIR}"/system/cpu_throttling.txt + command find /sys/fs/cgroup -iname "cpu.stat" -print0 | while IFS= read -r -d '' cs + do + # look for a non-zero nr_throttled value + if grep -q "nr_throttled [1-9]" "${cs}"; then + pids=${cs/cpu.stat/cgroup.procs} + lines=$(wc -l < "${pids}") + # ignore if no PIDs are listed + if [ "${lines}" -eq "0" ] ; then + continue + fi + + echo "$cs" >> "${THROTTLE_LOG}" + cat "${cs}" >> "${THROTTLE_LOG}" + while IFS= read -r pid + do + command ps ax | grep "^${pid}" >> "${THROTTLE_LOG}" + done < "${pids}" + echo "" >> "${THROTTLE_LOG}" + fi + done + if [ ! -e "${THROTTLE_LOG}" ]; then + echo "No CPU Throttling Found" >> "${THROTTLE_LOG}" + fi + ok +} + # ----------------------------------------------------------------------------- # Entrypoint parse_options "$@" From debf1a9b0f94f942bd1eeda4ab4cdf6e0cc8edca Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 8 Aug 2022 13:58:13 -0700 Subject: [PATCH 045/232] Add timestamps to packer console output (#982) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c9be7eb6e..6e8324856 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ validate: .PHONY: k8s k8s: validate @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html From dc22b732904141b389934d32ff2d1922c4929e96 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 8 Aug 2022 21:31:49 -0500 Subject: [PATCH 046/232] eks-log-collector.sh: identify IO throttled processes (#981) * identify IO throttled processes * bump log collector version * update filename and version --- log-collector-script/linux/eks-log-collector.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 7018d1dfa..561889915 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.6.2" +readonly PROGRAM_VERSION="0.7.0" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -260,6 +260,7 @@ collect() { get_docker_logs get_sandboxImage_info get_cpu_throttled_processes + get_io_throttled_processes } pack() { @@ -552,6 +553,8 @@ get_system_services() { timeout 75 ps fauxwww --headers > "${COLLECT_DIR}"/system/ps.txt 2>&1 timeout 75 ps -eTF --headers > "${COLLECT_DIR}"/system/ps-threads.txt 2>&1 timeout 75 netstat -plant > "${COLLECT_DIR}"/system/netstat.txt 2>&1 + timeout 75 cat /proc/stat > "${COLLECT_DIR}"/system/procstat.txt 2>&1 + timeout 75 cat /proc/[0-9]*/stat > "${COLLECT_DIR}"/system/allprocstat.txt 2>&1 ok } @@ -605,7 +608,6 @@ get_docker_info() { ok } - get_cpu_throttled_processes() { try "Collect CPU Throttled Process Information" readonly THROTTLE_LOG="${COLLECT_DIR}"/system/cpu_throttling.txt @@ -635,6 +637,16 @@ get_cpu_throttled_processes() { ok } +get_io_throttled_processes() { + try "Collect IO Throttled Process Information" + readonly IO_THROTTLE_LOG="${COLLECT_DIR}"/system/io_throttling.txt + command echo -e "PID Name Block IO Delay (centisconds)" > ${IO_THROTTLE_LOG} + # column 42 is Aggregated block I/O delays, measured in centiseconds so we capture the non-zero block + # I/O delays. + command cut -d" " -f 1,2,42 /proc/[0-9]*/stat | sort -n -k+3 -r | grep -v 0$ >> ${IO_THROTTLE_LOG} + ok +} + # ----------------------------------------------------------------------------- # Entrypoint parse_options "$@" From 918b1df72a1d8dfd8926b878a0dcd33c05a9d587 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 9 Aug 2022 12:54:53 -0700 Subject: [PATCH 047/232] Adding make command to support 1.23 AMI creation (#983) --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 6e8324856..dd229ba75 100644 --- a/Makefile +++ b/Makefile @@ -57,3 +57,7 @@ k8s: validate .PHONY: 1.22 1.22: $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-06-03 pull_cni_from_github=true + +.PHONY: 1.23 +1.23: + $(MAKE) k8s kubernetes_version=1.23.7 kubernetes_build_date=2022-06-29 pull_cni_from_github=true From ce469680ded2a9b55facfc8e30b9a9ce4c660cdf Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 9 Aug 2022 13:15:01 -0700 Subject: [PATCH 048/232] Update CHANGELOG.md for AMI release 20220802 (#984) --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df4343055..4820885ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ # Changelog +### AMI Release v20220802 +* amazon-eks-gpu-node-1.23-v20220802 +* amazon-eks-gpu-node-1.22-v20220802 +* amazon-eks-gpu-node-1.21-v20220802 +* amazon-eks-gpu-node-1.20-v20220802 +* amazon-eks-gpu-node-1.19-v20220802 +* amazon-eks-arm64-node-1.23-v20220802 +* amazon-eks-arm64-node-1.22-v20220802 +* amazon-eks-arm64-node-1.21-v20220802 +* amazon-eks-arm64-node-1.20-v20220802 +* amazon-eks-arm64-node-1.19-v20220802 +* amazon-eks-node-1.23-v20220802 +* amazon-eks-node-1.22-v20220802 +* amazon-eks-node-1.21-v20220802 +* amazon-eks-node-1.20-v20220802 +* amazon-eks-node-1.19-v20220802 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.7/2022-06-29/ +* s3://amazon-eks/1.22.9/2022-03-09/ +* s3://amazon-eks/1.21.12/2022-05-20/ +* s3://amazon-eks/1.20.15/2022-06-20/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +* Release 1.23 AMIs publicly + ### AMI Release v20220725 * amazon-eks-gpu-node-1.22-v20220725 * amazon-eks-gpu-node-1.21-v20220725 From 090678eafabd0c9f03aa18d8dad8bf46361b0196 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 12 Aug 2022 16:37:14 -0700 Subject: [PATCH 049/232] Update kubelet 1.20-1.23 build dates to `2022-07-27` (#987) --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index dd229ba75..8e9c3b8ca 100644 --- a/Makefile +++ b/Makefile @@ -48,16 +48,16 @@ k8s: validate .PHONY: 1.20 1.20: - $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-06-20 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.12 kubernetes_build_date=2022-05-20 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.22 1.22: - $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-06-03 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.23 1.23: - $(MAKE) k8s kubernetes_version=1.23.7 kubernetes_build_date=2022-06-29 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.7 kubernetes_build_date=2022-07-27 pull_cni_from_github=true From 07da85734df21518c9d8472c7820ce194a02cdec Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 15 Aug 2022 08:58:10 -0700 Subject: [PATCH 050/232] Fix kubelet versions for build date 2022-07-27 (#988) This was missed when build dates were updated to 2022-07-27. --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 8e9c3b8ca..b77731722 100644 --- a/Makefile +++ b/Makefile @@ -52,12 +52,12 @@ k8s: validate .PHONY: 1.21 1.21: - $(MAKE) k8s kubernetes_version=1.21.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.22 1.22: - $(MAKE) k8s kubernetes_version=1.22.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.23 1.23: - $(MAKE) k8s kubernetes_version=1.23.7 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true From 2bd132dfb3ab604c4156a9c82d2a0948b5e2b8d5 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 16 Aug 2022 15:08:56 -0700 Subject: [PATCH 051/232] Update CHANGELOG.md (#994) --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4820885ef..c5b72d87f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +### AMI Release v20220811 +* amazon-eks-gpu-node-1.23-v20220811 +* amazon-eks-gpu-node-1.22-v20220811 +* amazon-eks-gpu-node-1.21-v20220811 +* amazon-eks-gpu-node-1.20-v20220811 +* amazon-eks-gpu-node-1.19-v20220811 +* amazon-eks-arm64-node-1.23-v20220811 +* amazon-eks-arm64-node-1.22-v20220811 +* amazon-eks-arm64-node-1.21-v20220811 +* amazon-eks-arm64-node-1.20-v20220811 +* amazon-eks-arm64-node-1.19-v20220811 +* amazon-eks-node-1.23-v20220811 +* amazon-eks-node-1.22-v20220811 +* amazon-eks-node-1.21-v20220811 +* amazon-eks-node-1.20-v20220811 +* amazon-eks-node-1.19-v20220811 + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.204-113.362.amzn2 +* dockerd: 20.10.13-2.amzn2 +* containerd: 1.4.13-3.amzn2 +* runc: 1.0.3-2.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +- Kubelet binaries updated, including a backport of [#109676](https://github.com/kubernetes/kubernetes/pull/109676). +- When using `containerd` as the container runtime, `systemd` will now be used as the cgroup driver. For more information, see [the Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/configure-cgroup-driver/). +- Updated `aws-neuron-dkms` to `2.3.26` to address [a security issue](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/neuron-driver.html#ndriver-2-3-26-0). This is a recommended upgrade for all users of the GPU AMI. + ### AMI Release v20220802 * amazon-eks-gpu-node-1.23-v20220802 * amazon-eks-gpu-node-1.22-v20220802 From ad1ae43865d7d03a9ba83268cc34f5a2fb5bf098 Mon Sep 17 00:00:00 2001 From: Jim DeWaard Date: Tue, 16 Aug 2022 17:27:54 -0500 Subject: [PATCH 052/232] Update container-runtime package versions (#992) --- eks-worker-al2.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 521c1b7de..b53a6566f 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,9 +13,9 @@ "kubernetes_version": null, "kubernetes_build_date": null, "kernel_version": "", - "docker_version": "20.10.13-2.amzn2", - "containerd_version": "1.4.13-3.amzn2", - "runc_version": "1.0.3-2.amzn2", + "docker_version": "20.10.17-1.amzn2", + "containerd_version": "1.6.6-1.amzn2", + "runc_version": "1.1.3-1.amzn2", "cni_plugin_version": "v0.8.6", "pull_cni_from_github": "true", "source_ami_id": "", From b0012e8031a5c0f8839a7e0f33fc6e5651da35ab Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 17 Aug 2022 08:30:27 -0700 Subject: [PATCH 053/232] Removes unmaintained CFN template and modernizes README (#995) --- README.md | 41 +----- amazon-eks-nodegroup.yaml | 301 -------------------------------------- 2 files changed, 6 insertions(+), 336 deletions(-) delete mode 100644 amazon-eks-nodegroup.yaml diff --git a/README.md b/README.md index f3bc37212..b503b29b8 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,7 @@ For more information, see [Authentication](https://www.packer.io/docs/builders/a in the Packer documentation. **Note** -The default instance type to build this AMI is an `m4.large` and does not -qualify for the AWS free tier. You are charged for any instances created +The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created when building this AMI. ## Building the AMI @@ -31,7 +30,7 @@ make The Makefile chooses a particular kubelet binary to use per kubernetes version which you can [view here](Makefile). To build an Amazon EKS Worker AMI for a particular Kubernetes version run the following command ```bash -make 1.21 ## Build a Amazon EKS Worker AMI for k8s 1.21 +make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 ``` ### Building against other versions of Kubernetes binaries To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command @@ -39,18 +38,18 @@ Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBER ```bash #List of all avalable Kuberenets Versions: aws s3 ls s3://amazon-eks -KUBERNETES_VERSION=1.17.9 # Chose a version and set the variable +KUBERNETES_VERSION=1.23.9 # Chose a version and set the variable #List of all builds for the specified Kubernetes Version: aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ -KUBERNETES_BUILD_DATE=2020-08-04 # Chose a date and set the variable +KUBERNETES_BUILD_DATE=2022-07-27 # Chose a date and set the variable #List of all platforms available for the selected Kubernetes Version and build date -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/2020-08-04/bin/ +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/ PLATFORM=linux # Chose a platform and set the variable #List of all architectures for the selected Kubernetes Version, build date and platform -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/2020-08-04/bin/linux/ +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ ARCH=amd64 #Chose an architecture and set the variable ``` Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step @@ -104,34 +103,6 @@ want to launch a node group with your new AMI, see [Launching Amazon EKS Worker Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) in the Amazon EKS User Guide. -The [`amazon-eks-nodegroup.yaml`](amazon-eks-nodegroup.yaml) AWS CloudFormation -template in this repository is provided to launch a node group with the new AMI -ID that is returned when Packer finishes building. Note that there is important -Amazon EC2 user data in this CloudFormation template that bootstraps the worker -nodes when they are launched so that they can register with your Amazon EKS -cluster. Your nodes cannot register properly without this user data. - -### Compatibility with CloudFormation Template - -The CloudFormation template for EKS Nodes is published in the S3 bucket -`amazon-eks` under the path `cloudformation`. You can see a list of previous -versions by running `aws s3 ls s3://amazon-eks/cloudformation/`. - -| CloudFormation Version | EKS AMI versions | [amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s/releases) | -| ---------------------- | ------------------------------------------ | -------------------- | -| 2019-09-27 | amazon-eks-node-(1.14,1.13,1.12,1.11)-v20190927 | v1.5.4 -| 2019-09-17 | amazon-eks-node-(1.14,1.13,1.12,1.11)-v20190906 | v1.5.3 -| 2019-02-11 | amazon-eks-node-(1.12,1.11,1.10)-v20190327 | v1.3.2 (for p3dn.24xlarge instances) | -| 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190220 | v1.3.2 (for p3dn.24xlarge instances) | -| 2019-02-11 | amazon-eks-node-(1.11,1.10)-v20190211 | v1.3.2 (for p3dn.24xlarge instances) | -| 2018-12-10 | amazon-eks-node-(1.11,1.10)-v20181210 | v1.2.1 | -| 2018-11-07 | amazon-eks-node-v25+ | v1.2.1 (for t3 and r5 instances) | -| 2018-08-30 | amazon-eks-node-v23+ | v1.1.0 | -| 2018-08-21 | amazon-eks-node-v23+ | v1.1.0 | - -For older versions of the EKS AMI (v20-v22), you can find the CloudFormation -templates in the same bucket under the path `s3://amazon-eks/1.10.3/2018-06-05/`. - ## AL2 / Linux Kernel Information By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). diff --git a/amazon-eks-nodegroup.yaml b/amazon-eks-nodegroup.yaml deleted file mode 100644 index 89366e2bf..000000000 --- a/amazon-eks-nodegroup.yaml +++ /dev/null @@ -1,301 +0,0 @@ -AWSTemplateFormatVersion: "2010-09-09" - -Description: Amazon EKS - Node Group - -Metadata: - "AWS::CloudFormation::Interface": - ParameterGroups: - - Label: - default: EKS Cluster - Parameters: - - ClusterName - - ClusterControlPlaneSecurityGroup - - Label: - default: Worker Node Configuration - Parameters: - - NodeGroupName - - NodeAutoScalingGroupMinSize - - NodeAutoScalingGroupDesiredCapacity - - NodeAutoScalingGroupMaxSize - - NodeInstanceType - - NodeImageIdSSMParam - - NodeImageId - - NodeVolumeSize - - KeyName - - BootstrapArguments - - DisableIMDSv1 - - Label: - default: Worker Network Configuration - Parameters: - - VpcId - - Subnets - -Parameters: - BootstrapArguments: - Type: String - Default: "" - Description: "Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami" - - ClusterControlPlaneSecurityGroup: - Type: "AWS::EC2::SecurityGroup::Id" - Description: The security group of the cluster control plane. - - ClusterName: - Type: String - Description: The cluster name provided when the cluster was created. If it is incorrect, nodes will not be able to join the cluster. - - KeyName: - Type: "AWS::EC2::KeyPair::KeyName" - Description: The EC2 Key Pair to allow SSH access to the instances - - NodeAutoScalingGroupDesiredCapacity: - Type: Number - Default: 3 - Description: Desired capacity of Node Group ASG. - - NodeAutoScalingGroupMaxSize: - Type: Number - Default: 4 - Description: Maximum size of Node Group ASG. Set to at least 1 greater than NodeAutoScalingGroupDesiredCapacity. - - NodeAutoScalingGroupMinSize: - Type: Number - Default: 1 - Description: Minimum size of Node Group ASG. - - NodeGroupName: - Type: String - Description: Unique identifier for the Node Group. - - NodeImageId: - Type: String - Default: "" - Description: (Optional) Specify your own custom image ID. This value overrides any AWS Systems Manager Parameter Store value specified above. - - NodeImageIdSSMParam: - Type: "AWS::SSM::Parameter::Value" - Default: /aws/service/eks/optimized-ami/1.22/amazon-linux-2/recommended/image_id - Description: AWS Systems Manager Parameter Store parameter of the AMI ID for the worker node instances. Change this value to match the version of Kubernetes you are using. - - DisableIMDSv1: - Type: String - Default: "false" - AllowedValues: - - "false" - - "true" - - NodeInstanceType: - Type: String - Default: t3.medium - Description: EC2 instance type for the node instances - - NodeVolumeSize: - Type: Number - Default: 20 - Description: Node volume size - - Subnets: - Type: "List" - Description: The subnets where workers can be created. - - VpcId: - Type: "AWS::EC2::VPC::Id" - Description: The VPC of the worker instances - -Mappings: - PartitionMap: - aws: - EC2ServicePrincipal: "ec2.amazonaws.com" - aws-us-gov: - EC2ServicePrincipal: "ec2.amazonaws.com" - aws-cn: - EC2ServicePrincipal: "ec2.amazonaws.com.cn" - aws-iso: - EC2ServicePrincipal: "ec2.c2s.ic.gov" - aws-iso-b: - EC2ServicePrincipal: "ec2.sc2s.sgov.gov" - -Conditions: - HasNodeImageId: !Not - - "Fn::Equals": - - !Ref NodeImageId - - "" - - IMDSv1Disabled: - "Fn::Equals": - - !Ref DisableIMDSv1 - - "true" - -Resources: - NodeInstanceRole: - Type: "AWS::IAM::Role" - Properties: - AssumeRolePolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: Allow - Principal: - Service: - - !FindInMap [PartitionMap, !Ref "AWS::Partition", EC2ServicePrincipal] - Action: - - "sts:AssumeRole" - ManagedPolicyArns: - - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKSWorkerNodePolicy" - - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEKS_CNI_Policy" - - !Sub "arn:${AWS::Partition}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" - Path: / - - NodeInstanceProfile: - Type: "AWS::IAM::InstanceProfile" - Properties: - Path: / - Roles: - - !Ref NodeInstanceRole - - NodeSecurityGroup: - Type: "AWS::EC2::SecurityGroup" - Properties: - GroupDescription: Security group for all nodes in the cluster - Tags: - - Key: !Sub kubernetes.io/cluster/${ClusterName} - Value: owned - VpcId: !Ref VpcId - - NodeSecurityGroupIngress: - Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow node to communicate with each other - FromPort: 0 - GroupId: !Ref NodeSecurityGroup - IpProtocol: "-1" - SourceSecurityGroupId: !Ref NodeSecurityGroup - ToPort: 65535 - - ClusterControlPlaneSecurityGroupIngress: - Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow pods to communicate with the cluster API Server - FromPort: 443 - GroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp - SourceSecurityGroupId: !Ref NodeSecurityGroup - ToPort: 443 - - ControlPlaneEgressToNodeSecurityGroup: - Type: "AWS::EC2::SecurityGroupEgress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow the cluster control plane to communicate with worker Kubelet and pods - DestinationSecurityGroupId: !Ref NodeSecurityGroup - FromPort: 1025 - GroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp - ToPort: 65535 - - ControlPlaneEgressToNodeSecurityGroupOn443: - Type: "AWS::EC2::SecurityGroupEgress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443 - DestinationSecurityGroupId: !Ref NodeSecurityGroup - FromPort: 443 - GroupId: !Ref ClusterControlPlaneSecurityGroup - IpProtocol: tcp - ToPort: 443 - - NodeSecurityGroupFromControlPlaneIngress: - Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow worker Kubelets and pods to receive communication from the cluster control plane - FromPort: 1025 - GroupId: !Ref NodeSecurityGroup - IpProtocol: tcp - SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup - ToPort: 65535 - - NodeSecurityGroupFromControlPlaneOn443Ingress: - Type: "AWS::EC2::SecurityGroupIngress" - DependsOn: NodeSecurityGroup - Properties: - Description: Allow pods running extension API servers on port 443 to receive communication from cluster control plane - FromPort: 443 - GroupId: !Ref NodeSecurityGroup - IpProtocol: tcp - SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup - ToPort: 443 - - NodeLaunchTemplate: - Type: "AWS::EC2::LaunchTemplate" - Properties: - LaunchTemplateData: - BlockDeviceMappings: - - DeviceName: /dev/xvda - Ebs: - DeleteOnTermination: true - VolumeSize: !Ref NodeVolumeSize - VolumeType: gp2 - IamInstanceProfile: - Arn: !GetAtt NodeInstanceProfile.Arn - ImageId: !If - - HasNodeImageId - - !Ref NodeImageId - - !Ref NodeImageIdSSMParam - InstanceType: !Ref NodeInstanceType - KeyName: !Ref KeyName - SecurityGroupIds: - - !Ref NodeSecurityGroup - UserData: !Base64 - "Fn::Sub": | - #!/bin/bash - set -o xtrace - /etc/eks/bootstrap.sh ${ClusterName} ${BootstrapArguments} - /opt/aws/bin/cfn-signal --exit-code $? \ - --stack ${AWS::StackName} \ - --resource NodeGroup \ - --region ${AWS::Region} - MetadataOptions: - HttpPutResponseHopLimit : 2 - HttpEndpoint: enabled - HttpTokens: !If - - IMDSv1Disabled - - required - - optional - - NodeGroup: - Type: "AWS::AutoScaling::AutoScalingGroup" - Properties: - DesiredCapacity: !Ref NodeAutoScalingGroupDesiredCapacity - LaunchTemplate: - LaunchTemplateId: !Ref NodeLaunchTemplate - Version: !GetAtt NodeLaunchTemplate.LatestVersionNumber - MaxSize: !Ref NodeAutoScalingGroupMaxSize - MinSize: !Ref NodeAutoScalingGroupMinSize - Tags: - - Key: Name - PropagateAtLaunch: true - Value: !Sub ${ClusterName}-${NodeGroupName}-Node - - Key: !Sub kubernetes.io/cluster/${ClusterName} - PropagateAtLaunch: true - Value: owned - VPCZoneIdentifier: !Ref Subnets - UpdatePolicy: - AutoScalingRollingUpdate: - MaxBatchSize: 1 - MinInstancesInService: !Ref NodeAutoScalingGroupDesiredCapacity - PauseTime: PT5M - -Outputs: - NodeInstanceRole: - Description: The node instance role - Value: !GetAtt NodeInstanceRole.Arn - - NodeSecurityGroup: - Description: The security group for the node group - Value: !Ref NodeSecurityGroup - - NodeAutoScalingGroup: - Description: The autoscaling group - Value: !Ref NodeGroup From b4bbfe51a4115f66be7a5afe9c3bc47597446133 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 17 Aug 2022 10:50:19 -0700 Subject: [PATCH 054/232] Removes reference to CFN template in ArchiveBuildConfig.yaml (#998) --- ArchiveBuildConfig.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index a93376845..2c5bf850c 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -13,7 +13,6 @@ dependencies: files: - src: Makefile - src: eks-worker-al2.json - - src: amazon-eks-nodegroup.yaml archive: name: amazon-eks-ami.tar.gz - type: tgz \ No newline at end of file + type: tgz From eaed70ddc58993ccce68d4627759f614ab72d7f8 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 18 Aug 2022 15:15:48 -0700 Subject: [PATCH 055/232] Adds section to README on configuring kubelet config (#997) --- README.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b503b29b8..a0ef8250b 100644 --- a/README.md +++ b/README.md @@ -83,9 +83,6 @@ make k8s \ ``` **Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. - - - The Makefile runs Packer with the `eks-worker-al2.json` build specification template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) builder. An instance is launched and the Packer [Shell @@ -109,6 +106,58 @@ By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/a When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. +## Customizing Kubelet Config + +In some cases, customers may want to customize the [kubelet configuration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration) on their nodes, and there are two mechanisms to do that with the EKS Optimized AMI. + +**Set the "--kubelet-extra-args" flag when invoking bootstrap.sh** + +`bootstrap.sh`, the script that bootstraps nodes when using the EKS Optimized AMI, supports a flag called `--kubelet-extra-args` that allows you to pass in additional `kubelet` configuration. If you invoke the bootstrap script yourself (self-managed nodegroups or EKS managed nodegroups with custom AMIs), you can use that to customize your configuration. For example, you can use something like the following in your userdata: + +``` +/etc/eks/bootstrap.sh my-cluster --kubelet-extra-args '--registry-qps=20 --registry-burst=40' +``` + +In this case, it will set `registryPullQPS` to 20 and `registryBurst` to 40 in `kubelet`. Some of the flags, like the ones above, are marked as deprecated and you're encouraged to set them in the `kubelet` config file (described below), but they continue to work as of 1.23. + +**Update the kubelet config file** + +You can update the `kubelet` config file directly with new configuration. On EKS Optimized AMIs, the file is stored at `/etc/kubernetes/kubelet/kubelet-config.json`. It must be valid JSON. You can use a utility like `jq` (or your tool of choice) to edit the config in your user data: + +``` +echo "$(jq ".registryPullQPS=20 | .registryBurst=40" /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json +``` + +There are a couple of important caveats here: + +1. If you update the `kubelet` config file after `kubelet` has already started (i.e. `bootstrap.sh` already ran), you'll need to restart `kubelet` to pick up the latest configuration. +2. [bootstrap.sh](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) does modify a few fields, like `kubeReserved` and `evictionHard`, so you'd need to modify the config after the bootstrap script is run and restart `kubelet` to overwrite those properties. + +**View active kubelet config** + +When `kubelet` starts up, it logs all possible flags, including unset flags. The unset flags get logged with default values. *These logs do not necessarily reflect the actual active configuration.* This has caused confusion in the past when customers have configured the `kubelet` config file with one value and notice the default value is logged. Here is an example of the referenced log: + +``` +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202824 3935 flags.go:59] FLAG: --registry-burst="10" +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202829 3935 flags.go:59] FLAG: --registry-qps="5" +``` + +To view the actual `kubelet` config on your node, you can use the Kubernetes API to confirm that your configuration has applied. + +``` +$ kubectl proxy +$ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.compute.internal/proxy/configz" | jq + +{ + "kubeletconfig": { + ... + "registryPullQPS": 20, + "registryBurst": 40, + ... + } +} +``` + ## Security For security issues or concerns, please do not open an issue or pull request on GitHub. Please report any suspected or confirmed security issues to AWS Security https://aws.amazon.com/security/vulnerability-reporting/ From 4d79489c567079883f1f69ac68ba58a4134d5ba3 Mon Sep 17 00:00:00 2001 From: Trent Petersen Date: Tue, 23 Aug 2022 14:14:26 -0500 Subject: [PATCH 056/232] Add 1.23 to `all` target (#1005) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b77731722..32571074b 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.19 1.20 1.21 1.22 +all: 1.19 1.20 1.21 1.22 1.23 .PHONY: validate validate: From 2acffd1fe57b434ca83cf18ccc4bf07902db44d6 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 23 Aug 2022 12:15:33 -0700 Subject: [PATCH 057/232] Update example for AWSCLI v2. (#1003) --- log-collector-script/linux/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index ee08bfd72..f634e271c 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -89,9 +89,10 @@ Trying to archive gathered information... #### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* -1. Create the SSM document named "EKSLogCollector" using the following command:
+1. Create the SSM document named "EKSLogCollector" using the following commands:
``` -aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json +curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json +aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content file://eks-ssm-content.json ``` 2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
``` From 5585aa5eb9b4800326a052720068bf8a61a7cad3 Mon Sep 17 00:00:00 2001 From: Jayanth Varavani <1111446+jayanthvn@users.noreply.github.com> Date: Tue, 23 Aug 2022 12:21:27 -0700 Subject: [PATCH 058/232] Support mac2, p4de, r6a instance types (#1000) --- files/eni-max-pods.txt | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index c4b0fd9b6..8981c9c0b 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-06-10T09:23:03-07:00 +# This file was generated at 2022-08-15T23:21:13Z # # The regions queried were: # - ap-northeast-1 @@ -394,6 +394,7 @@ m6id.large 29 m6id.metal 737 m6id.xlarge 58 mac1.metal 234 +mac2.metal 234 p2.16xlarge 234 p2.8xlarge 234 p2.xlarge 58 @@ -402,6 +403,7 @@ p3.2xlarge 58 p3.8xlarge 234 p3dn.24xlarge 737 p4d.24xlarge 737 +p4de.24xlarge 737 r3.2xlarge 58 r3.4xlarge 234 r3.8xlarge 234 @@ -474,6 +476,17 @@ r5n.8xlarge 234 r5n.large 29 r5n.metal 737 r5n.xlarge 58 +r6a.12xlarge 234 +r6a.16xlarge 737 +r6a.24xlarge 737 +r6a.2xlarge 58 +r6a.32xlarge 737 +r6a.48xlarge 737 +r6a.4xlarge 234 +r6a.8xlarge 234 +r6a.large 29 +r6a.metal 737 +r6a.xlarge 58 r6g.12xlarge 234 r6g.16xlarge 737 r6g.2xlarge 58 From 64cd62179ac2df8eb5e3aea3bda5e2c0ed70349b Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Tue, 23 Aug 2022 14:21:51 -0500 Subject: [PATCH 059/232] Log collector enhancements (#993) * collect xfs fragmentation information * add tests for network connectivity - ping well known hostnames - capture the results of curling the API server a few times - save the node's resolv.conf --- .../linux/eks-log-collector.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 561889915..1430b8545 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.0" +readonly PROGRAM_VERSION="0.7.1" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -286,7 +286,7 @@ get_mounts_info() { lvs > "${COLLECT_DIR}"/storage/lvs.txt pvs > "${COLLECT_DIR}"/storage/pvs.txt vgs > "${COLLECT_DIR}"/storage/vgs.txt - + mount -t xfs | awk '{print $1}' | xargs -I{} -- sh -c "xfs_info {}; xfs_db -r -c 'freesp -s' {}" > "${COLLECT_DIR}"/storage/xfs.txt ok } @@ -491,6 +491,20 @@ get_networking_info() { # configure-multicard-interfaces timeout 75 journalctl -u configure-multicard-interfaces > "${COLLECT_DIR}"/networking/configure-multicard-interfaces.txt || echo -e "\tTimed out, ignoring \"configure-multicard-interfaces unit output \" " + # test some network connectivity + timeout 75 ping -A -c 10 amazon.com > "${COLLECT_DIR}"/networking/ping_amazon.com.txt + timeout 75 ping -A -c 10 public.ecr.aws > "${COLLECT_DIR}"/networking/ping_public.ecr.aws.txt + + if [[ -e "${COLLECT_DIR}"/kubelet/kubeconfig.yaml ]]; then + API_SERVER=$(grep server: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*server: //') + CA_CRT=$(grep certificate-authority: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*certificate-authority: //') + for i in $(seq 5); do + echo -e "curling ${API_SERVER} ($i of 5) $(date --utc +%FT%T.%3N%Z)\n\n" >> ${COLLECT_DIR}"/networking/curl_api_server.txt" + timeout 75 curl -v --cacert "${CA_CRT}" "${API_SERVER}"/livez?verbose >> ${COLLECT_DIR}"/networking/curl_api_server.txt" 2>&1 + done + fi + + cp /etc/resolv.conf "${COLLECT_DIR}"/networking/resolv.conf ok } From d9c8938f6666d3037d126ee906c2f133f2be0fb8 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 23 Aug 2022 12:22:54 -0700 Subject: [PATCH 060/232] Include source AMI details in tags and manifest (#1001) --- eks-worker-al2.json | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index b53a6566f..624572849 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -91,9 +91,11 @@ "tags": { "Name": "{{user `ami_name`}}", "created": "{{timestamp}}", + "build_region": "{{ .BuildRegion }}", + "source_ami_id": "{{ .SourceAMI }}", + "source_ami_name": "{{ .SourceAMIName }}", "docker_version": "{{ user `docker_version`}}", "containerd_version": "{{ user `containerd_version`}}", - "source_ami_id": "{{ user `source_ami_id`}}", "kubernetes": "{{ user `kubernetes_version`}}/{{ user `kubernetes_build_date` }}/bin/linux/{{ user `arch` }}", "cni_plugin_version": "{{ user `cni_plugin_version`}}" }, @@ -192,12 +194,20 @@ { "type": "manifest", "output": "manifest.json", - "strip_path": true + "strip_path": true, + "custom_data": { + "source_ami_name": "{{ build `SourceAMIName` }}", + "source_ami_id": "{{ build `SourceAMI` }}" + } }, { "type": "manifest", "output": "{{user `ami_name`}}-manifest.json", - "strip_path": true + "strip_path": true, + "custom_data": { + "source_ami_name": "{{ build `SourceAMIName` }}", + "source_ami_id": "{{ build `SourceAMI` }}" + } } ] } From a043436e05b29e395616df7a6d2f871f339d5985 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Fri, 26 Aug 2022 11:05:40 -0700 Subject: [PATCH 061/232] Updating CHANGELOG.md with latest AMI details (#1008) --- CHANGELOG.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5b72d87f..3fcdc1766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,50 @@ # Changelog +### AMI Release v20220824 +* amazon-eks-gpu-node-1.23-v20220824 +* amazon-eks-gpu-node-1.22-v20220824 +* amazon-eks-gpu-node-1.21-v20220824 +* amazon-eks-gpu-node-1.20-v20220824 +* amazon-eks-gpu-node-1.19-v20220824 +* amazon-eks-arm64-node-1.23-v20220824 +* amazon-eks-arm64-node-1.22-v20220824 +* amazon-eks-arm64-node-1.21-v20220824 +* amazon-eks-arm64-node-1.20-v20220824 +* amazon-eks-arm64-node-1.19-v20220824 +* amazon-eks-node-1.23-v20220824 +* amazon-eks-node-1.22-v20220824 +* amazon-eks-node-1.21-v20220824 +* amazon-eks-node-1.20-v20220824 +* amazon-eks-node-1.19-v20220824 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220824` +* `1.22.12-20220824` +* `1.21.14-20220824` +* `1.20.15-20220824` +* `1.19.15-20220824` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ +* s3://amazon-eks/1.19.15/2021-11-10/ + +AMI details: +* kernel: 5.4.209-116.363.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1575.0-1.amzn2 + +Notable changes: +* We are updating the versions of docker, containerd and runc as part of this AMI release. +* Kernel version is also updated to include the [latest CVE patches](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-034.html) +* This is the last release for 1.19 as we are at [end of support for 1.19](https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html#kubernetes-release-calendar) + ### AMI Release v20220811 * amazon-eks-gpu-node-1.23-v20220811 * amazon-eks-gpu-node-1.22-v20220811 From 2e5a10d70e7756893143429a0107b3e016edb4bc Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 8 Sep 2022 14:42:55 -0500 Subject: [PATCH 062/232] Fix IPv6 CLUSTER_DNS, add test harness (#931) * Fixes ipv6 dns cluster IP, see #860 and #900 for more information. * Adds a unit testing approach, see `test/README.md` for more information. --- Makefile | 20 +++--- files/bootstrap.sh | 47 ++++++------- files/max-pods-calculator.sh | 11 ++-- test/Dockerfile | 15 +++++ test/README.md | 40 +++++++++++ .../ip-family-service-ipv6-cidr-mismatch.sh | 17 +++++ test/cases/ipv4-cluster-dns-ip.sh | 24 +++++++ test/cases/ipv6-cluster-dns-ip.sh | 24 +++++++ ...-dns-cluster-ip-given-service-ipv6-cidr.sh | 24 +++++++ .../ipv6-ip-family-and-service-ipv6-cidr.sh | 16 +++++ test/cases/max-pods-cni-1-11-2-delegation.sh | 22 +++++++ test/cases/max-pods-cni-1-11-2.sh | 21 ++++++ test/cases/max-pods-cni-1-7-5.sh | 21 ++++++ test/entrypoint.sh | 10 +++ test/mocks/aws | 16 +++++ .../describe-instance-types/m4-xlarge.json | 6 ++ .../describe-instance-types/m5-8xlarge.json | 6 ++ test/mocks/iptables-save | 4 ++ test/mocks/sudo | 4 ++ test/mocks/systemctl | 4 ++ test/test-harness.sh | 66 +++++++++++++++++++ 21 files changed, 383 insertions(+), 35 deletions(-) mode change 100644 => 100755 files/bootstrap.sh mode change 100644 => 100755 files/max-pods-calculator.sh create mode 100644 test/Dockerfile create mode 100644 test/README.md create mode 100755 test/cases/ip-family-service-ipv6-cidr-mismatch.sh create mode 100755 test/cases/ipv4-cluster-dns-ip.sh create mode 100755 test/cases/ipv6-cluster-dns-ip.sh create mode 100755 test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh create mode 100755 test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh create mode 100755 test/cases/max-pods-cni-1-11-2-delegation.sh create mode 100755 test/cases/max-pods-cni-1-11-2.sh create mode 100755 test/cases/max-pods-cni-1-7-5.sh create mode 100755 test/entrypoint.sh create mode 100755 test/mocks/aws create mode 100644 test/mocks/describe-instance-types/m4-xlarge.json create mode 100644 test/mocks/describe-instance-types/m5-8xlarge.json create mode 100755 test/mocks/iptables-save create mode 100755 test/mocks/sudo create mode 100755 test/mocks/systemctl create mode 100755 test/test-harness.sh diff --git a/Makefile b/Makefile index 32571074b..b04f1fbf9 100644 --- a/Makefile +++ b/Makefile @@ -29,35 +29,39 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.19 1.20 1.21 1.22 1.23 +all: 1.19 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI .PHONY: validate -validate: +validate: ## Validate packer config $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json .PHONY: k8s -k8s: validate +k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" $(PACKER_BINARY) build -timestamp-ui $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html .PHONY: 1.19 -1.19: +1.19: ## Build EKS Optimized AL2 AMI - K8s 1.19 $(MAKE) k8s kubernetes_version=1.19.15 kubernetes_build_date=2021-11-10 pull_cni_from_github=true .PHONY: 1.20 -1.20: +1.20: ## Build EKS Optimized AL2 AMI - K8s 1.20 $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.21 -1.21: +1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.22 -1.22: +1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 $(MAKE) k8s kubernetes_version=1.22.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true .PHONY: 1.23 -1.23: +1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 $(MAKE) k8s kubernetes_version=1.23.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + +.PHONY: help +help: ## Display help + @awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m\033[0m\n"} /^[\.a-zA-Z_0-9\-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) diff --git a/files/bootstrap.sh b/files/bootstrap.sh old mode 100644 new mode 100755 index 1cc5ca6c3..039e23246 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -151,6 +151,7 @@ IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" +IMDS_ENDPOINT="${IMDS_ENDPOINT:-169.254.169.254:80}" function get_pause_container_account_for_region () { local region="$1" @@ -186,7 +187,7 @@ function _get_token() { local token_result= local http_result= - token_result=$(curl -s -w "\n%{http_code}" -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://169.254.169.254/latest/api/token") + token_result=$(curl -s -w "\n%{http_code}" -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://${IMDS_ENDPOINT}/latest/api/token") http_result=$(echo "$token_result" | tail -n 1) if [[ "$http_result" != "200" ]] then @@ -218,11 +219,11 @@ function _get_meta_data() { local path=$1 local metadata_result= - metadata_result=$(curl -s -w "\n%{http_code}" -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/$path) + metadata_result=$(curl -s -w "\n%{http_code}" -H "X-aws-ec2-metadata-token: $TOKEN" http://${IMDS_ENDPOINT}/$path) http_result=$(echo "$metadata_result" | tail -n 1) if [[ "$http_result" != "200" ]] then - echo -e "Failed to get metadata:\n$metadata_result\nhttp://169.254.169.254/$path\n$TOKEN" + echo -e "Failed to get metadata:\n$metadata_result\nhttp://${IMDS_ENDPOINT}/$path\n$TOKEN" return 1 else local lines=$(echo "$metadata_result" | wc -l) @@ -322,11 +323,6 @@ if [[ ! -z "${IP_FAMILY}" ]]; then echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" exit 1 fi - - if [[ "${IP_FAMILY}" == "ipv6" ]] && [[ ! -z "${B64_CLUSTER_CA}" ]] && [[ ! -z "${APISERVER_ENDPOINT}" ]] && [[ -z "${SERVICE_IPV6_CIDR}" ]]; then - echo "Service Ipv6 Cidr must be provided when ip-family is specified as IPV6" - exit 1 - fi fi if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then @@ -339,7 +335,7 @@ fi TOKEN=$(get_token) AWS_DEFAULT_REGION=$(get_meta_data 'latest/dynamic/instance-identity/document' | jq .region -r) -AWS_SERVICES_DOMAIN=$(get_meta_data '2018-09-24/meta-data/services/domain') +AWS_SERVICES_DOMAIN=$(get_meta_data 'latest/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then @@ -410,7 +406,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then fi if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then - ### this can happen when the ifFamily field is not found in describeCluster response + ### this can happen when the ipFamily field is not found in describeCluster response ### or B64_CLUSTER_CA and APISERVER_ENDPOINT are defined but IPFamily isn't IP_FAMILY="ipv4" fi @@ -460,21 +456,28 @@ fi ### kubelet.service configuration -if [[ "${IP_FAMILY}" == "ipv6" ]]; then - DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a -fi - MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') + if [[ -z "${DNS_CLUSTER_IP}" ]]; then - if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]] ; then - #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) - DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 - else - TEN_RANGE=$(get_meta_data "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) - DNS_CLUSTER_IP=10.100.0.10 - if [[ "$TEN_RANGE" != "0" ]]; then - DNS_CLUSTER_IP=172.20.0.10 + if [[ "${IP_FAMILY}" == "ipv6" ]]; then + if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then + echo "One of --service-ipv6-cidr or --dns-cluster-ip must be provided when ip-family is specified as ipv6" + exit 1 + fi + DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a + fi + + if [[ "${IP_FAMILY}" == "ipv4" ]]; then + if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]]; then + #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) + DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 + else + TEN_RANGE=$(get_meta_data "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) + DNS_CLUSTER_IP=10.100.0.10 + if [[ "$TEN_RANGE" != "0" ]]; then + DNS_CLUSTER_IP=172.20.0.10 + fi fi fi else diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh old mode 100644 new mode 100755 index 643a9a22b..bf92d571e --- a/files/max-pods-calculator.sh +++ b/files/max-pods-calculator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o nounset @@ -76,14 +76,15 @@ CNI_MAX_ENI="${CNI_MAX_ENI:-}" INSTANCE_TYPE="${INSTANCE_TYPE:-}" INSTANCE_TYPE_FROM_IMDS="${INSTANCE_TYPE_FROM_IMDS:-false}" SHOW_MAX_ALLOWED="${SHOW_MAX_ALLOWED:-false}" +IMDS_ENDPOINT="${IMDS_ENDPOINT:-169.254.169.254:80}" PREFIX_DELEGATION_SUPPORTED=false IPS_PER_PREFIX=16 if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then - TOKEN=$(curl -m 10 -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" -s "http://169.254.169.254/latest/api/token") - export AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/dynamic/instance-identity/document | jq .region -r) - INSTANCE_TYPE=$(curl -m 10 -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/instance-type) + TOKEN=$(curl -m 10 -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" -s "http://${IMDS_ENDPOINT}/latest/api/token") + export AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://${IMDS_ENDPOINT}/latest/dynamic/instance-identity/document | jq .region -r) + INSTANCE_TYPE=$(curl -m 10 -H "X-aws-ec2-metadata-token: $TOKEN" -s http://${IMDS_ENDPOINT}/latest/meta-data/instance-type) elif [ -z "$INSTANCE_TYPE" ]; # There's no reasonable default for an instanceType so force one to be provided to the script. then echo "You must specify an instance type to calculate max pods value." @@ -119,7 +120,7 @@ if [[ "$CNI_MAJOR_VERSION" -gt 1 ]] || ([[ "$CNI_MAJOR_VERSION" = 1 ]] && [[ "$C PREFIX_DELEGATION_SUPPORTED=true fi -DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type $INSTANCE_TYPE --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus'} --output json) +DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type "${INSTANCE_TYPE}" --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus}' --output json) HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor' ) IS_NITRO=false diff --git a/test/Dockerfile b/test/Dockerfile new file mode 100644 index 000000000..f4a4487b1 --- /dev/null +++ b/test/Dockerfile @@ -0,0 +1,15 @@ +FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm +FROM public.ecr.aws/amazonlinux/amazonlinux:2 + +ENV IMDS_ENDPOINT=127.0.0.1:1338 +COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock + +COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json +COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig +COPY test/entrypoint.sh /entrypoint.sh +COPY files /etc/eks +COPY test/mocks/ /sbin/ + +RUN yum install -y jq + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/test/README.md b/test/README.md new file mode 100644 index 000000000..e688ca945 --- /dev/null +++ b/test/README.md @@ -0,0 +1,40 @@ +## Tests + +This directory contains a Dockerfile that is able to be used locally to test the `/etc/eks/boostrap.sh` script without having to use a real AL2 EC2 instance for a quick dev-loop. It is still necessary to test the bootstrap script on a real instance since the Docker image is not a fully accurate representation. + +## AL2 EKS Optimized AMI Docker Image + +The image is built using the official AL2 image `public.ecr.aws/amazonlinux/amazonlinux:2`. It has several mocks installed including the [ec2-metadata-mock](https://github.com/aws/amazon-ec2-metadata-mock). Mocks are installed into `/sbin`, so adding addditional ones as necessary should be as simple as dropping a bash script in the `mocks` dir named as the command you would like to mock out. + +## Usage + +```bash + +## The docker context needs to be at the root of the repo +docker build -t eks-optimized-ami -f Dockerfile ../ + +docker run -it eks-optimized-ami /etc/eks/bootstrap.sh --b64-cluster-ca dGVzdA== --apiserver-endpoint http://my-api-endpoint test +``` + +The `test-harness.sh` script wraps a build and runs test script in the `cases` dir. Tests scripts within the `cases` dir are invoked by the `test-harness.sh` script and have access to the `run` function. The `run` function accepts a temporary directory as an argument in order to mount as a volume in the container so that test scripts can check files within the `/etc/kubernetes/` directory after a bootstrap run. The remaining arguments to the `run` function are a path to a script within the AL2 EKS Optimized AMI Docker Container. + +Here's an example `run` call: + +``` +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --dns-cluster-ip 192.168.0.1 \ + test-cluster-name +``` + +## ECR Public + +You may need to logout of ECR public or reauthenticate if your credentials are expired: + +```bash +docker logout public.ecr.aws +``` + +ECR public allow anonymous access, but you cannot have expired credentials loaded. \ No newline at end of file diff --git a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh new file mode 100755 index 000000000..efe887290 --- /dev/null +++ b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should fail validation - ip-family mismatch" +exit_code=0 +TEMP_DIR=$(mktemp -d) +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --service-ipv6-cidr 192.168.0.1/24 \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/cases/ipv4-cluster-dns-ip.sh b/test/cases/ipv4-cluster-dns-ip.sh new file mode 100755 index 000000000..7dc7e36b5 --- /dev/null +++ b/test/cases/ipv4-cluster-dns-ip.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should return IPv4 DNS Cluster IP when given dns-cluster-ip" +exit_code=0 +TEMP_DIR=$(mktemp -d) +expected_cluster_dns="192.168.0.1" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/ipv6-cluster-dns-ip.sh b/test/cases/ipv6-cluster-dns-ip.sh new file mode 100755 index 000000000..26f67fa8d --- /dev/null +++ b/test/cases/ipv6-cluster-dns-ip.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should return ipv6 DNS Cluster IP when given dns-cluster-ip" +exit_code=0 +TEMP_DIR=$(mktemp -d) +expected_cluster_dns="fe80::2a" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh new file mode 100755 index 000000000..6c08cd7f6 --- /dev/null +++ b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should return IPv6 DNS cluster IP when given service-ipv6-cidr" +exit_code=0 +TEMP_DIR=$(mktemp -d) +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --service-ipv6-cidr fe80::1 \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cluster_dns="fe80::1a" +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh new file mode 100755 index 000000000..ac6991960 --- /dev/null +++ b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should fail w/ \"service-ipv6-cidr must be provided when ip-family is specified as ipv6\"" +exit_code=0 +TEMP_DIR=$(mktemp -d) +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/max-pods-cni-1-11-2-delegation.sh b/test/cases/max-pods-cni-1-11-2-delegation.sh new file mode 100755 index 000000000..e6a2b8ba6 --- /dev/null +++ b/test/cases/max-pods-cni-1-11-2-delegation.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for VPC CNI 1.11.2" +exit_code=0 +TEMP_DIR=$(mktemp -d) +out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ + --instance-type m5.8xlarge \ + --cni-version 1.11.2 \ + --cni-prefix-delegation-enabled || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="250" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-11-2.sh b/test/cases/max-pods-cni-1-11-2.sh new file mode 100755 index 000000000..cbf9179a5 --- /dev/null +++ b/test/cases/max-pods-cni-1-11-2.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for m5.8xlarge VPC CNI 1.11.2" +exit_code=0 +TEMP_DIR=$(mktemp -d) +out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ + --instance-type m5.8xlarge \ + --cni-version 1.11.2 || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="234" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-7-5.sh b/test/cases/max-pods-cni-1-7-5.sh new file mode 100755 index 000000000..295f43c28 --- /dev/null +++ b/test/cases/max-pods-cni-1-7-5.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "-> Should calc max-pods successfully for VPC CNI 1.7.5" +exit_code=0 +TEMP_DIR=$(mktemp -d) +out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ + --instance-type-from-imds \ + --cni-version 1.7.5 || exit_code=$?) +echo $out + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +expected_max_pods="58" +actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) +if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.7.5 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi \ No newline at end of file diff --git a/test/entrypoint.sh b/test/entrypoint.sh new file mode 100755 index 000000000..b067e39f6 --- /dev/null +++ b/test/entrypoint.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" + +## Start IMDS mock +/sbin/ec2-metadata-mock --imdsv2 &> /var/log/ec2-metadata-mock.log & +sleep 1 + +## execute any other params +$@ \ No newline at end of file diff --git a/test/mocks/aws b/test/mocks/aws new file mode 100755 index 000000000..5d9d57079 --- /dev/null +++ b/test/mocks/aws @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" + +if [[ $1 == "ec2" ]]; then + + if [[ $2 == "describe-instance-types" ]]; then + instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-') + if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then + cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" + exit 0 + fi + echo "instance type not found" + exit 1 + fi +fi \ No newline at end of file diff --git a/test/mocks/describe-instance-types/m4-xlarge.json b/test/mocks/describe-instance-types/m4-xlarge.json new file mode 100644 index 000000000..0239966bc --- /dev/null +++ b/test/mocks/describe-instance-types/m4-xlarge.json @@ -0,0 +1,6 @@ +{ + "Hypervisor": "xen", + "EniCount": 4, + "PodsPerEniCount": 15, + "CpuCount": 4 +} diff --git a/test/mocks/describe-instance-types/m5-8xlarge.json b/test/mocks/describe-instance-types/m5-8xlarge.json new file mode 100644 index 000000000..840091225 --- /dev/null +++ b/test/mocks/describe-instance-types/m5-8xlarge.json @@ -0,0 +1,6 @@ +{ + "Hypervisor": "nitro", + "EniCount": 8, + "PodsPerEniCount": 30, + "CpuCount": 32 +} diff --git a/test/mocks/iptables-save b/test/mocks/iptables-save new file mode 100755 index 000000000..b2bd12826 --- /dev/null +++ b/test/mocks/iptables-save @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "mocking iptables-save with params $@" \ No newline at end of file diff --git a/test/mocks/sudo b/test/mocks/sudo new file mode 100755 index 000000000..f91c5ff33 --- /dev/null +++ b/test/mocks/sudo @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "mocking sudo with params $@" \ No newline at end of file diff --git a/test/mocks/systemctl b/test/mocks/systemctl new file mode 100755 index 000000000..47846145d --- /dev/null +++ b/test/mocks/systemctl @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "mocking systemctl with $@" \ No newline at end of file diff --git a/test/test-harness.sh b/test/test-harness.sh new file mode 100755 index 000000000..fdfd00778 --- /dev/null +++ b/test/test-harness.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +export SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +set -euo pipefail + +TEST_CASE_SCRIPT="" + +USAGE=$(cat << 'EOM' + Usage: test-harness.sh [-c ] + Executes the test harness for the EKS Optimized AL2 AMI. + By default the test harness executes all scripts in the cases directory. + Example: test-harness.sh + Optional: + -c A path to a specific test case script +EOM +) + +while getopts "c:h" opt; do + case ${opt} in + c ) # Case Script Path + TEST_CASE_SCRIPT="$OPTARG" + ;; + h ) # help + echo "$USAGE" 1>&2 + exit + ;; + \? ) + echo "$USAGE" 1>&2 + exit + ;; + esac +done + +docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/../" +overall_status=0 + +function run(){ + local temp_dir=$1 + shift + cp -f ${SCRIPTPATH}/../files/kubelet-config.json ${temp_dir}/kubelet-config.json + docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ + -v ${temp_dir}/kubelet-config.json:/etc/kubernetes/kubelet/kubelet-config.json \ + -it --rm eks-optimized-ami $@ +} +export -f run + +if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then + test_cases=${TEST_CASE_SCRIPT} +else + test_cases=($(find ${SCRIPTPATH}/cases -name "*.sh" -type f)) +fi + +for case in "${test_cases[@]}"; do + status=0 + echo "=================================================================================================================" + echo "-> Executing Test Case: $(basename ${case})" + ${case} || status=1 + if [[ ${status} -eq 0 ]]; then + echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅ " + else + echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" + overall_status=1 + fi + echo "=================================================================================================================" +done + +exit $overall_status From e21d8649f652e1a3d013aea9470fbf3649cec260 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 8 Sep 2022 14:55:13 -0700 Subject: [PATCH 063/232] Add GitHub Action for unit tests (#1015) --- .github/workflows/ci.yaml | 17 +++++++++++++++++ Makefile | 4 ++++ test/test-harness.sh | 5 ++++- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000..45b749ed9 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,17 @@ +name: CI +on: + workflow_dispatch: + push: + branches: + - 'master' + pull_request: + types: + - opened + - reopened + - synchronize +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: make test diff --git a/Makefile b/Makefile index b04f1fbf9..2dc5bd69d 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,10 @@ T_RESET := \e[0m .PHONY: all all: 1.19 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +.PHONY: test +test: ## run the test-harness + test/test-harness.sh + .PHONY: validate validate: ## Validate packer config $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json diff --git a/test/test-harness.sh b/test/test-harness.sh index fdfd00778..76296888c 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -39,7 +39,10 @@ function run(){ cp -f ${SCRIPTPATH}/../files/kubelet-config.json ${temp_dir}/kubelet-config.json docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ -v ${temp_dir}/kubelet-config.json:/etc/kubernetes/kubelet/kubelet-config.json \ - -it --rm eks-optimized-ami $@ + --attach STDOUT \ + --attach STDERR \ + --rm \ + eks-optimized-ami $@ } export -f run From b5f852943866f1fe2747138dd3a09af5e77fc7cc Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 8 Sep 2022 15:04:48 -0700 Subject: [PATCH 064/232] Add note about unit testing (#1016) --- CONTRIBUTING.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c22acf627..30e2f96d9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -43,7 +43,15 @@ GitHub provides additional document on [forking a repository](https://help.githu When submitting PRs, we want to verify that there are no regressions in the AMI with the new changes. EKS runs various tests before publishing new Amazon EKS optimized Amazon Linux AMIs, which will ensure the highest level of confidence that there are no regressions in officially published AMIs. To maintain the health of this repo, we need to do some basic validation prior to merging PRs. Eventually, we hope to automate this process. Until then, here are the basic steps that we should take before merging PRs. -**Test #1: Verify that building AMIs still works** +**Test #1: Verify that the unit tests pass** + +Please add a test case for your changes, if possible. See the [unit test README](test/README.md) for more information. These tests will be run automatically for every pull request. + +``` +make test +``` + +**Test #2: Verify that building AMIs still works** If your change is relevant to a specific Kubernetes version, build all AMIs that apply. Otherwise, just choose the latest available Kubernetes version. @@ -52,7 +60,7 @@ If your change is relevant to a specific Kubernetes version, build all AMIs that make 1.22 ``` -**Test #2: Create a nodegroup with new AMI and confirm it joins a cluster** +**Test #3: Create a nodegroup with new AMI and confirm it joins a cluster** Once the AMI is built, we need to verify that it can join a cluster. You can use `eksctl`, or your method of choice, to create a cluster and add nodes to it using the AMI you built. Below is an example config file. @@ -84,7 +92,7 @@ eksctl create cluster -f cluster.yaml `eksctl` will verify that the nodes join the cluster before completing. -**Test #3: Verify that the nodes are Kubernetes conformant** +**Test #4: Verify that the nodes are Kubernetes conformant** You can use [sonobuoy](https://sonobuoy.io/) to run conformance tests on the cluster you've create in *Test #2*. You should only include nodes with the custom AMI built in *Test #1*. You must install `sonobuoy` locally before running. @@ -94,7 +102,7 @@ sonobuoy run --wait By default, `sonobuoy` will run `e2e` and `systemd-logs`. This step may take multiple hours to run. -**Test #4: [Optional] Test your specific PR changes** +**Test #5: [Optional] Test your specific PR changes** If your PR has changes that require additional, custom validation, provide the appropriate steps to verify that the changes don't cause regressions and behave as expected. Document the steps taken in the CR. From 73bca495141bfd03646a17046083ced8786edd01 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 8 Sep 2022 18:03:40 -0700 Subject: [PATCH 065/232] Add note on minimum Packer version (#1017) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a0ef8250b..988c67657 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ EKS-optimized AMI. ## Setup -You must have [Packer](https://www.packer.io/) installed on your local system. +You must have [Packer](https://www.packer.io/) version 1.8.0 or later installed on your local system. For more information, see [Installing Packer](https://www.packer.io/docs/install/index.html) in the Packer documentation. You must also have AWS account credentials configured so that Packer can make calls to AWS API operations on your behalf. From 6439014a163b7db6af4c96b3fe7792e904aa64e7 Mon Sep 17 00:00:00 2001 From: Brandon H <697896+brandonhon@users.noreply.github.com> Date: Thu, 8 Sep 2022 20:53:25 -0500 Subject: [PATCH 066/232] Use sudo for generate-version-info.sh (#1011) Co-authored-by: Brandon Honeycutt --- scripts/generate-version-info.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index fa7baa196..889a758ef 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -14,7 +14,7 @@ fi OUTPUT_FILE="$1" # packages -rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' | jq --slurp --sort-keys 'add | {packages:(.)}' > "$OUTPUT_FILE" +sudo rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' | jq --slurp --sort-keys 'add | {packages:(.)}' > "$OUTPUT_FILE" # binaries echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE From 486ef112af655134f3d7ff5607970882263138f6 Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Mon, 12 Sep 2022 12:22:52 -0700 Subject: [PATCH 067/232] Remove 1.19 (#1018) --- Makefile | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 2dc5bd69d..849fc3239 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.19 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI .PHONY: test test: ## run the test-harness @@ -40,16 +40,12 @@ validate: ## Validate packer config $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json .PHONY: k8s -k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI +k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" $(PACKER_BINARY) build -timestamp-ui $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.19 -1.19: ## Build EKS Optimized AL2 AMI - K8s 1.19 - $(MAKE) k8s kubernetes_version=1.19.15 kubernetes_build_date=2021-11-10 pull_cni_from_github=true - .PHONY: 1.20 1.20: ## Build EKS Optimized AL2 AMI - K8s 1.20 $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-07-27 pull_cni_from_github=true From 8e3c3cc5541a021f31616445cad5edea4375a28e Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 15 Sep 2022 13:14:41 -0700 Subject: [PATCH 068/232] Removes deprecated --network-plugin flag (#1021) --- files/kubelet-containerd.service | 2 +- files/kubelet.service | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index 35a6bf188..b01a5bf15 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -11,7 +11,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS + $KUBELET_ARGS $KUBELET_EXTRA_ARGS Restart=on-failure RestartForceExitStatus=SIGPIPE diff --git a/files/kubelet.service b/files/kubelet.service index 387470da1..e221f6a0f 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -10,7 +10,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS + $KUBELET_ARGS $KUBELET_EXTRA_ARGS Restart=always RestartSec=5 From 3ab8f32859f175dc2c25e05530b0f171e52db703 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 16 Sep 2022 11:24:38 -0700 Subject: [PATCH 069/232] Adds --network-plugin back to docker kubelet.service (#1026) --- files/kubelet.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/kubelet.service b/files/kubelet.service index e221f6a0f..387470da1 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -10,7 +10,7 @@ ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ - $KUBELET_ARGS $KUBELET_EXTRA_ARGS + --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS Restart=always RestartSec=5 From 802add46bcb573628d9b357b63e01d9fe7ad5eed Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Fri, 16 Sep 2022 18:30:34 -0700 Subject: [PATCH 070/232] Updating CHANGELOG.md with latest AMI details (#1029) Co-authored-by: ljosyula --- CHANGELOG.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fcdc1766..025386aaf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,45 @@ # Changelog +### AMI Release v20220914 +* amazon-eks-gpu-node-1.23-v20220914 +* amazon-eks-gpu-node-1.22-v20220914 +* amazon-eks-gpu-node-1.21-v20220914 +* amazon-eks-gpu-node-1.20-v20220914 +* amazon-eks-arm64-node-1.23-v20220914 +* amazon-eks-arm64-node-1.22-v20220914 +* amazon-eks-arm64-node-1.21-v20220914 +* amazon-eks-arm64-node-1.20-v20220914 +* amazon-eks-node-1.23-v20220914 +* amazon-eks-node-1.22-v20220914 +* amazon-eks-node-1.21-v20220914 +* amazon-eks-node-1.20-v20220914 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220914` +* `1.22.12-20220914` +* `1.21.14-20220914` +* `1.20.15-20220914` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- The AWS CLI has been updated to (`1.25.72`)[https://github.com/aws/aws-cli/blob/1.25.72/CHANGELOG.rst#L8] to support local EKS clusters on Outposts. +- This release fixes an issue with DNS cluster IP and IPv6. More info in #931. +- Kernel version updated to `5.4.209-116.367.amzn2` as a part of latest CVE patch (ALASKERNEL-5.4-2022-035)[https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-035.html] + ### AMI Release v20220824 * amazon-eks-gpu-node-1.23-v20220824 * amazon-eks-gpu-node-1.22-v20220824 From 07aeeb2cd62c8de9ff2a41dedb8c99d34fb9ba1d Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Tue, 20 Sep 2022 15:11:31 -0700 Subject: [PATCH 071/232] Add pause container account for me-central-1 (#1032) Co-authored-by: Zaid Farooq --- files/bootstrap.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 039e23246..cfbc730e8 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -178,6 +178,8 @@ function get_pause_container_account_for_region () { echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}";; ap-southeast-3) echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}";; + me-central-1) + echo "${PAUSE_CONTAINER_ACCOUNT:-759879836304}";; *) echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; esac From 75c7d2970a73b00276519e96f249e89059167d34 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Wed, 21 Sep 2022 11:27:30 -0500 Subject: [PATCH 072/232] Capture the disk usage from pod local storage (#1019) This captures the storage from the top transient layer of the overlay filesystem where files that the pod creates while running are stored. This allows identifying situations where pods are writing log files or other data to disk to both identify potential causes for full disks as well as I/O throttling. --- log-collector-script/linux/eks-log-collector.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 1430b8545..a0a3cafab 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.1" +readonly PROGRAM_VERSION="0.7.2" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -287,6 +287,7 @@ get_mounts_info() { pvs > "${COLLECT_DIR}"/storage/pvs.txt vgs > "${COLLECT_DIR}"/storage/vgs.txt mount -t xfs | awk '{print $1}' | xargs -I{} -- sh -c "xfs_info {}; xfs_db -r -c 'freesp -s' {}" > "${COLLECT_DIR}"/storage/xfs.txt + mount | grep ^overlay | sed 's/.*upperdir=//' | sed 's/,.*//' | xargs -n 1 timeout 75 du -sh | grep -v ^0 > "${COLLECT_DIR}"/storage/pod_local_storage.txt ok } From 9c4b0c54440193a15b2458bb971a924542ad9606 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 21 Sep 2022 10:55:35 -0700 Subject: [PATCH 073/232] Resolves #1024: makes containerd the default and only runtime for k8s version 1.24+ (#1027) --- .gitignore | 2 + files/bootstrap.sh | 42 +++++++++- scripts/upgrade_kernel.sh | 17 +---- test/cases/container-runtime-defaults.sh | 97 ++++++++++++++++++++++++ test/mocks/kubelet | 9 +++ test/test-harness.sh | 3 + 6 files changed, 152 insertions(+), 18 deletions(-) create mode 100755 test/cases/container-runtime-defaults.sh create mode 100755 test/mocks/kubelet diff --git a/.gitignore b/.gitignore index 42b8dcbf0..2d9cb419a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *manifest.json *.swp .idea +*version-info.json +.DS_Store diff --git a/files/bootstrap.sh b/files/bootstrap.sh index cfbc730e8..2ead184d1 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -135,18 +135,46 @@ set -- "${POSITIONAL[@]}" # restore positional parameters CLUSTER_NAME="$1" set -u +KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') +echo "Using kubelet version $KUBELET_VERSION" + +function is_greater_than_or_equal_to_version() { + local actual_version="$1" + local compared_version="$2" + + [ $actual_version = "`echo -e \"$actual_version\n$compared_version\" | sort -V | tail -n1`" ] +} + +# As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd +# and no longer support docker as a container runtime. +IS_124_OR_GREATER=false +DEFAULT_CONTAINER_RUNTIME=dockerd +if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.24.0"; then + IS_124_OR_GREATER=true + DEFAULT_CONTAINER_RUNTIME=containerd +fi + +# Set container runtime related variables +DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" +ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" +CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" + +echo "Using $CONTAINER_RUNTIME as the container runtime" + +if $IS_124_OR_GREATER && [ $CONTAINER_RUNTIME != "containerd" ]; then + echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" + exit 1 +fi + USE_MAX_PODS="${USE_MAX_PODS:-true}" B64_CLUSTER_CA="${B64_CLUSTER_CA:-}" APISERVER_ENDPOINT="${APISERVER_ENDPOINT:-}" SERVICE_IPV4_CIDR="${SERVICE_IPV4_CIDR:-}" DNS_CLUSTER_IP="${DNS_CLUSTER_IP:-}" KUBELET_EXTRA_ARGS="${KUBELET_EXTRA_ARGS:-}" -ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" API_RETRY_ATTEMPTS="${API_RETRY_ATTEMPTS:-3}" -DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" CONTAINERD_CONFIG_FILE="${CONTAINERD_CONFIG_FILE:-}" PAUSE_CONTAINER_VERSION="${PAUSE_CONTAINER_VERSION:-3.5}" -CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-dockerd}" IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" @@ -542,6 +570,14 @@ EOF fi if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then + if $ENABLE_DOCKER_BRIDGE; then + echo "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" + fi + + if [ ! -z "$DOCKER_CONFIG_JSON" ]; then + echo "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" + fi + sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d mkdir -p /etc/systemd/system/containerd.service.d diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 7a686604b..a53c4f6f9 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -5,22 +5,9 @@ set -o nounset set -o errexit if [[ -z "$KERNEL_VERSION" ]]; then - # Save for resetting - OLDIFS=$IFS - # Makes 5.4 kernel the default on 1.19 and higher - IFS='.' - # Convert kubernetes version in an array to compare versions - read -ra ADDR <<< "$KUBERNETES_VERSION" - # Reset - IFS=$OLDIFS + KERNEL_VERSION=5.4 - if (( ADDR[0] == 1 && ADDR[1] < 19 )); then - KERNEL_VERSION=4.14 - else - KERNEL_VERSION=5.4 - fi - - echo "kernel_version is unset. Setting to $KERNEL_VERSION based on kubernetes_version $KUBERNETES_VERSION" + echo "kernel_version is unset. Setting to $KERNEL_VERSION" fi if [[ $KERNEL_VERSION == "4.14" ]]; then diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh new file mode 100755 index 000000000..c8efff4ed --- /dev/null +++ b/test/cases/container-runtime-defaults.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 +TEMP_DIR=$(mktemp -d) + +echo "--> Should allow dockerd as container runtime when below k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should allow containerd as container runtime when below k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should have default container runtime when below k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" +export KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? + +echo "EXIT CODE $exit_code" +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi +exit_code=0 + +echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should have default container runtime when at or above k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" +KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --enable-docker-bridge true \ + --docker-config-json "{\"some\":\"json\"}" \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/mocks/kubelet b/test/mocks/kubelet new file mode 100755 index 000000000..6dbf8abcd --- /dev/null +++ b/test/mocks/kubelet @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +# The only use of kubelet directly is to get the Kubernetes version, +# so we'll set a default here to avoid test failures, and you can +# override by setting the KUBELET_VERSION environment variable. +some_kubelet_version="Kubernetes v1.20.15-eks-ba74326" +KUBELET_VERSION="${KUBELET_VERSION:-$some_kubelet_version}" +echo "$KUBELET_VERSION" diff --git a/test/test-harness.sh b/test/test-harness.sh index 76296888c..c13ca763e 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -36,12 +36,15 @@ overall_status=0 function run(){ local temp_dir=$1 shift + # This variable is used to override the default value in the kubelet mock + KUBELET_VERSION="${KUBELET_VERSION:-}" cp -f ${SCRIPTPATH}/../files/kubelet-config.json ${temp_dir}/kubelet-config.json docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ -v ${temp_dir}/kubelet-config.json:/etc/kubernetes/kubelet/kubelet-config.json \ --attach STDOUT \ --attach STDERR \ --rm \ + -e KUBELET_VERSION="$KUBELET_VERSION" \ eks-optimized-ami $@ } export -f run From eb908eb80fe9e5b4cc4266c445638dedea264a8c Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 22 Sep 2022 12:12:55 -0700 Subject: [PATCH 074/232] Add trn1 instance types (#1033) --- files/eni-max-pods.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 8981c9c0b..a8de14260 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-08-15T23:21:13Z +# This file was generated at 2022-09-21T13:34:09-07:00 # # The regions queried were: # - ap-northeast-1 @@ -554,6 +554,8 @@ t4g.micro 4 t4g.nano 4 t4g.small 11 t4g.xlarge 58 +trn1.2xlarge 58 +trn1.32xlarge 247 u-12tb1.112xlarge 737 u-12tb1.metal 147 u-18tb1.metal 737 From 2485779f6918a2154d53875157914547fe57bad5 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Fri, 23 Sep 2022 11:23:58 -0500 Subject: [PATCH 075/232] increase the kube-api-server QPS from 5/10 to 10/20 (#1030) This applies for EKS v1.22+ where API Priority & Fairness is available and there is a specific queue for kubelet health. --- files/bootstrap.sh | 12 ++++++--- test/cases/api-qps-k8s-1.21-below.sh | 32 ++++++++++++++++++++++++ test/cases/api-qps-k8s-1.22-above.sh | 31 +++++++++++++++++++++++ test/cases/container-runtime-defaults.sh | 14 +++++------ test/mocks/kubelet | 8 +++--- 5 files changed, 84 insertions(+), 13 deletions(-) create mode 100755 test/cases/api-qps-k8s-1.21-below.sh create mode 100755 test/cases/api-qps-k8s-1.22-above.sh diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 2ead184d1..3cfc9bc72 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -518,13 +518,19 @@ KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "${IP_FAMILY}" == "ipv4" ]]; then - INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') + INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') else - INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s - INTERNAL_IP=$(get_meta_data $INTERNAL_IP_URI) + INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s + INTERNAL_IP=$(get_meta_data $INTERNAL_IP_URI) fi INSTANCE_TYPE=$(get_meta_data 'latest/meta-data/instance-type') +if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.22.0"; then + # for K8s versions that suport API Priority & Fairness, increase our API server QPS + echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG +fi + + # Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function # calls calculate the CPU and memory resources to reserve for kubeReserved based on the instance type of the worker node. # Note that allocatable memory and CPU resources on worker nodes is calculated by the Kubernetes scheduler diff --git a/test/cases/api-qps-k8s-1.21-below.sh b/test/cases/api-qps-k8s-1.21-below.sh new file mode 100755 index 000000000..5c7c40216 --- /dev/null +++ b/test/cases/api-qps-k8s-1.21-below.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should use default API server QPS for K8s 1.21-" +exit_code=0 +TEMP_DIR=$(mktemp -d) +KUBELET_VERSION=v1.21.0-eks-ba74326 +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +# values should not be set +expected_api_qps="null" +expected_api_burst="null" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < ${TEMP_DIR}/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/api-qps-k8s-1.22-above.sh b/test/cases/api-qps-k8s-1.22-above.sh new file mode 100755 index 000000000..54639260e --- /dev/null +++ b/test/cases/api-qps-k8s-1.22-above.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should increase API server QPS for K8s 1.22+" +exit_code=0 +TEMP_DIR=$(mktemp -d) +KUBELET_VERSION=v1.22.0-eks-ba74326 +run ${TEMP_DIR} /etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_api_qps="10" +expected_api_burst="20" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < ${TEMP_DIR}/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < ${TEMP_DIR}/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh index c8efff4ed..74f4c5f12 100755 --- a/test/cases/container-runtime-defaults.sh +++ b/test/cases/container-runtime-defaults.sh @@ -5,7 +5,7 @@ exit_code=0 TEMP_DIR=$(mktemp -d) echo "--> Should allow dockerd as container runtime when below k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +KUBELET_VERSION="v1.20.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -18,7 +18,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should allow containerd as container runtime when below k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +KUBELET_VERSION="v1.20.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -31,7 +31,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should have default container runtime when below k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.20.15-eks-ba74326" +KUBELET_VERSION="v1.20.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -43,7 +43,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" -export KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +export KUBELET_VERSION="v1.24.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -58,7 +58,7 @@ fi exit_code=0 echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +KUBELET_VERSION="v1.24.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -71,7 +71,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should have default container runtime when at or above k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +KUBELET_VERSION="v1.24.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -83,7 +83,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" -KUBELET_VERSION="Kubernetes v1.24.15-eks-ba74326" +KUBELET_VERSION="v1.24.15-eks-ba74326" run ${TEMP_DIR} /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ diff --git a/test/mocks/kubelet b/test/mocks/kubelet index 6dbf8abcd..c689a18dc 100755 --- a/test/mocks/kubelet +++ b/test/mocks/kubelet @@ -4,6 +4,8 @@ set -euo pipefail # The only use of kubelet directly is to get the Kubernetes version, # so we'll set a default here to avoid test failures, and you can # override by setting the KUBELET_VERSION environment variable. -some_kubelet_version="Kubernetes v1.20.15-eks-ba74326" -KUBELET_VERSION="${KUBELET_VERSION:-$some_kubelet_version}" -echo "$KUBELET_VERSION" +if [ $# == 1 ] && [ $1 == "--version" ]; then + echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" +else + echo "mocking kubelet with params $@" +fi From c3c83267df3605a45bc93bfcf6c17fc6a343335a Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 29 Sep 2022 11:55:40 -0700 Subject: [PATCH 076/232] Move cleanup to separate provisioner, remove cleanup_image var (#1036) --- eks-worker-al2.json | 7 +++++-- scripts/cleanup.sh | 28 ++++++++++++++++++++++++++++ scripts/install-worker.sh | 33 --------------------------------- 3 files changed, 33 insertions(+), 35 deletions(-) create mode 100644 scripts/cleanup.sh diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 624572849..80a81653e 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -24,7 +24,6 @@ "arch": null, "instance_type": null, "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", - "cleanup_image": "true", "ssh_interface": "", "ssh_username": "ec2-user", "temporary_security_group_source_cidrs": "", @@ -157,10 +156,14 @@ "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", "AWS_SESSION_TOKEN={{user `aws_session_token`}}", - "CLEANUP_IMAGE={{user `cleanup_image`}}", "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}" ] }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "script": "{{template_dir}}/scripts/cleanup.sh" + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh new file mode 100644 index 000000000..39babfde2 --- /dev/null +++ b/scripts/cleanup.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Clean up yum caches to reduce the image size +sudo yum clean all +sudo rm -rf /var/cache/yum + +# Clean up build artifacts +sudo rm -rf /tmp/worker + +# Clean up files to reduce confusion during debug +sudo rm -rf \ + /etc/hostname \ + /etc/machine-id \ + /etc/resolv.conf \ + /etc/ssh/ssh_host* \ + /home/ec2-user/.ssh/authorized_keys \ + /root/.ssh/authorized_keys \ + /var/lib/cloud/data \ + /var/lib/cloud/instance \ + /var/lib/cloud/instances \ + /var/lib/cloud/sem \ + /var/lib/dhclient/* \ + /var/lib/dhcp/dhclient.* \ + /var/lib/yum/history \ + /var/log/cloud-init-output.log \ + /var/log/cloud-init.log \ + /var/log/secure \ + /var/log/wtmp \ No newline at end of file diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 639bbe1bb..43825e045 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -373,37 +373,4 @@ echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf sudo mkdir -p /etc/eks/log-collector-script/ sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ -################################################################################ -### Cleanup #################################################################### -################################################################################ - -CLEANUP_IMAGE="${CLEANUP_IMAGE:-true}" -if [[ "$CLEANUP_IMAGE" == "true" ]]; then - # Clean up yum caches to reduce the image size - sudo yum clean all - sudo rm -rf \ - $TEMPLATE_DIR \ - /var/cache/yum - - # Clean up files to reduce confusion during debug - sudo rm -rf \ - /etc/hostname \ - /etc/machine-id \ - /etc/resolv.conf \ - /etc/ssh/ssh_host* \ - /home/ec2-user/.ssh/authorized_keys \ - /root/.ssh/authorized_keys \ - /var/lib/cloud/data \ - /var/lib/cloud/instance \ - /var/lib/cloud/instances \ - /var/lib/cloud/sem \ - /var/lib/dhclient/* \ - /var/lib/dhcp/dhclient.* \ - /var/lib/yum/history \ - /var/log/cloud-init-output.log \ - /var/log/cloud-init.log \ - /var/log/secure \ - /var/log/wtmp -fi - sudo touch /etc/machine-id From 5651264bfffe5a7e0733aaf56fe12a506cabb314 Mon Sep 17 00:00:00 2001 From: xr1776 <108886506+xr1776@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:24:10 -0400 Subject: [PATCH 077/232] Update CHANGELOG.md for v20220926 (#1039) --- CHANGELOG.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 025386aaf..f0deebb16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,44 @@ # Changelog +### AMI Release v20220926 +Available ap-northeast-2 09/29/2022 and other regions 10/3/2022. +* amazon-eks-gpu-node-1.23-v20220926 +* amazon-eks-gpu-node-1.22-v20220926 +* amazon-eks-gpu-node-1.21-v20220926 +* amazon-eks-gpu-node-1.20-v20220926 +* amazon-eks-arm64-node-1.23-v20220926 +* amazon-eks-arm64-node-1.22-v20220926 +* amazon-eks-arm64-node-1.21-v20220926 +* amazon-eks-arm64-node-1.20-v20220926 +* amazon-eks-node-1.23-v20220926 +* amazon-eks-node-1.22-v20220926 +* amazon-eks-node-1.21-v20220926 +* amazon-eks-node-1.20-v20220926 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20220926` +* `1.22.12-20220926` +* `1.21.14-20220926` +* `1.20.15-20220926` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2 +* containerd: 1.6.6-1.amzn2 +* runc: 1.1.3-1.amzn2 +* cuda: 470.57.02-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable Changes: +* Phase 1 of support for Trn1 instances + ### AMI Release v20220914 * amazon-eks-gpu-node-1.23-v20220914 * amazon-eks-gpu-node-1.22-v20220914 From e064e254d2388ab048aff586de43f8b0d3b7fc03 Mon Sep 17 00:00:00 2001 From: xr1776 <108886506+xr1776@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:46:24 -0400 Subject: [PATCH 078/232] Update CHANGELOG.md (#1040) * Update CHANGELOG.md * Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0deebb16..a3560492e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,6 @@ # Changelog ### AMI Release v20220926 -Available ap-northeast-2 09/29/2022 and other regions 10/3/2022. * amazon-eks-gpu-node-1.23-v20220926 * amazon-eks-gpu-node-1.22-v20220926 * amazon-eks-gpu-node-1.21-v20220926 From bb9a9bbe9b7fcf32ce5e38822b10b665303cffdb Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Oct 2022 13:15:23 -0700 Subject: [PATCH 079/232] Increase polling timeout to 45 minutes (#1042) --- eks-worker-al2.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 80a81653e..33d00f105 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -74,6 +74,10 @@ "delete_on_termination": true } ], + "aws_polling": { + "delay_seconds": 30, + "max_attempts": 90 + }, "ami_regions": "{{user `ami_regions`}}", "ssh_username": "{{user `ssh_username`}}", "ssh_interface": "{{user `ssh_interface`}}", From 9bec3eb6db8dc1ed645264f20f020f039eb1e8ce Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Oct 2022 13:15:57 -0700 Subject: [PATCH 080/232] Disable colorized output (#1041) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 849fc3239..d6275f1c1 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ validate: ## Validate packer config .PHONY: k8s k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build -timestamp-ui $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui -color=false $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html From e5eb32500584a43ba1fab0a4091de9c1853a6f7a Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Mon, 10 Oct 2022 12:01:20 -0500 Subject: [PATCH 081/232] always add empty machine-id file (#1043) --- scripts/cleanup.sh | 4 +++- scripts/install-worker.sh | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index 39babfde2..a661178b3 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -25,4 +25,6 @@ sudo rm -rf \ /var/log/cloud-init-output.log \ /var/log/cloud-init.log \ /var/log/secure \ - /var/log/wtmp \ No newline at end of file + /var/log/wtmp + +sudo touch /etc/machine-id diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 43825e045..e54e01501 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -372,5 +372,3 @@ echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf ################################################################################ sudo mkdir -p /etc/eks/log-collector-script/ sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ - -sudo touch /etc/machine-id From bfc24acad17ed771c0ffe13169bb126b7e000e9e Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 12 Oct 2022 10:35:41 -0700 Subject: [PATCH 082/232] Makes volume_type configurable with make arguments (#1045) --- Makefile | 2 +- eks-worker-al2.json | 51 +++++++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index d6275f1c1..a4253f36f 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry ami_regions +PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry ami_regions volume_type K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 33d00f105..a2335e3d6 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -1,41 +1,42 @@ { "variables": { - "aws_region": "us-west-2", + "additional_yum_repos": "", + "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", "ami_name": null, - "creator": "{{env `USER`}}", - "encrypted": "false", - "kms_key_id": "", + "ami_regions": "", + "ami_users": "", + "arch": null, + "associate_public_ip_address": "", "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", + "aws_region": "us-west-2", "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", - "kubernetes_version": null, - "kubernetes_build_date": null, - "kernel_version": "", - "docker_version": "20.10.17-1.amzn2", - "containerd_version": "1.6.6-1.amzn2", - "runc_version": "1.1.3-1.amzn2", "cni_plugin_version": "v0.8.6", + "containerd_version": "1.6.6-1.amzn2", + "creator": "{{env `USER`}}", + "docker_version": "20.10.17-1.amzn2", + "encrypted": "false", + "instance_type": null, + "kernel_version": "", + "kms_key_id": "", + "kubernetes_build_date": null, + "kubernetes_version": null, + "launch_block_device_mappings_volume_size": "4", "pull_cni_from_github": "true", + "remote_folder": "", + "runc_version": "1.1.3-1.amzn2", + "security_group_id": "", + "sonobuoy_e2e_registry": "", + "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", "source_ami_id": "", "source_ami_owners": "137112412989", - "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", - "arch": null, - "instance_type": null, - "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", "ssh_interface": "", "ssh_username": "ec2-user", - "temporary_security_group_source_cidrs": "", - "security_group_id": "", - "associate_public_ip_address": "", "subnet_id": "", - "remote_folder": "", - "launch_block_device_mappings_volume_size": "4", - "ami_users": "", - "additional_yum_repos": "", - "sonobuoy_e2e_registry": "", - "ami_regions": "" + "temporary_security_group_source_cidrs": "", + "volume_type": "gp2" }, "builders": [ { @@ -61,7 +62,7 @@ "launch_block_device_mappings": [ { "device_name": "/dev/xvda", - "volume_type": "gp2", + "volume_type": "{{user `volume_type`}}", "volume_size": "{{user `launch_block_device_mappings_volume_size`}}", "delete_on_termination": true } @@ -69,7 +70,7 @@ "ami_block_device_mappings": [ { "device_name": "/dev/xvda", - "volume_type": "gp2", + "volume_type": "{{user `volume_type`}}", "volume_size": 20, "delete_on_termination": true } From 162eee294eed1dafe99c47180fa2c470b112aad5 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 12 Oct 2022 16:00:19 -0700 Subject: [PATCH 083/232] Add IMDS helper (#1044) --- files/bin/imds | 61 ++++++++++++++ files/bootstrap.sh | 82 ++----------------- files/max-pods-calculator.sh | 6 +- scripts/install-worker.sh | 9 +- test/Dockerfile | 6 +- test/cases/api-qps-k8s-1.21-below.sh | 9 +- test/cases/api-qps-k8s-1.22-above.sh | 9 +- test/cases/container-runtime-defaults.sh | 29 +++---- test/cases/imds-token-refresh.sh | 49 +++++++++++ .../ip-family-service-ipv6-cidr-mismatch.sh | 3 +- test/cases/ipv4-cluster-dns-ip.sh | 5 +- test/cases/ipv6-cluster-dns-ip.sh | 5 +- ...-dns-cluster-ip-given-service-ipv6-cidr.sh | 4 +- .../ipv6-ip-family-and-service-ipv6-cidr.sh | 3 +- test/cases/max-pods-cni-1-11-2-delegation.sh | 3 +- test/cases/max-pods-cni-1-11-2.sh | 3 +- test/cases/max-pods-cni-1-7-5.sh | 4 +- test/entrypoint.sh | 2 +- test/test-harness.sh | 20 ++--- 19 files changed, 174 insertions(+), 138 deletions(-) create mode 100755 files/bin/imds create mode 100755 test/cases/imds-token-refresh.sh diff --git a/files/bin/imds b/files/bin/imds new file mode 100755 index 000000000..a65e442b6 --- /dev/null +++ b/files/bin/imds @@ -0,0 +1,61 @@ +#!/bin/sh + +set -o errexit +set -o pipefail +set -o nounset + +IMDS_DEBUG="${IMDS_DEBUG:-false}" +function log() { + if [ "$IMDS_DEBUG" = "true" ] + then + echo >&2 "$1" + fi +} + +if [ "$#" -ne 1 ] +then + echo >&2 "usage: imds API_PATH" + exit 1 +fi + +# leading slashes will be removed +API_PATH="${1#/}" + +CURRENT_TIME=$(date '+%s') + +IMDS_ENDPOINT=${IMDS_ENDPOINT:-169.254.169.254} + +log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" + +TOKEN_DIR=/tmp/imds-tokens +mkdir -p $TOKEN_DIR + +TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) + +if [ "$TOKEN_FILE" = "" ] +then + # default ttl is 15 minutes + IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} + TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) + curl \ + --silent \ + --show-error \ + --retry 10 \ + --retry-delay 1 \ + -o $TOKEN_DIR/$TOKEN_FILE \ + -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ + -X PUT \ + "http://$IMDS_ENDPOINT/latest/api/token" + log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." +else + log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." +fi + +curl \ + --silent \ + --show-error \ + --retry 10 \ + --retry-delay 1 \ + --write-out '\n' \ + -H "X-aws-ec2-metadata-token: $(cat $TOKEN_DIR/$TOKEN_FILE)" \ + "http://$IMDS_ENDPOINT/$API_PATH" diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 3cfc9bc72..208f06298 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -179,7 +179,6 @@ IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" -IMDS_ENDPOINT="${IMDS_ENDPOINT:-169.254.169.254:80}" function get_pause_container_account_for_region () { local region="$1" @@ -213,72 +212,6 @@ function get_pause_container_account_for_region () { esac } -function _get_token() { - local token_result= - local http_result= - - token_result=$(curl -s -w "\n%{http_code}" -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" "http://${IMDS_ENDPOINT}/latest/api/token") - http_result=$(echo "$token_result" | tail -n 1) - if [[ "$http_result" != "200" ]] - then - echo -e "Failed to get token:\n$token_result" - return 1 - else - echo "$token_result" | head -n 1 - return 0 - fi -} - -function get_token() { - local token= - local retries=20 - local result=1 - - while [[ retries -gt 0 && $result -ne 0 ]] - do - retries=$[$retries-1] - token=$(_get_token) - result=$? - [[ $result != 0 ]] && sleep 5 - done - [[ $result == 0 ]] && echo "$token" - return $result -} - -function _get_meta_data() { - local path=$1 - local metadata_result= - - metadata_result=$(curl -s -w "\n%{http_code}" -H "X-aws-ec2-metadata-token: $TOKEN" http://${IMDS_ENDPOINT}/$path) - http_result=$(echo "$metadata_result" | tail -n 1) - if [[ "$http_result" != "200" ]] - then - echo -e "Failed to get metadata:\n$metadata_result\nhttp://${IMDS_ENDPOINT}/$path\n$TOKEN" - return 1 - else - local lines=$(echo "$metadata_result" | wc -l) - echo "$metadata_result" | head -n $(( lines - 1 )) - return 0 - fi -} - -function get_meta_data() { - local metadata= - local path=$1 - local retries=20 - local result=1 - - while [[ retries -gt 0 && $result -ne 0 ]] - do - retries=$[$retries-1] - metadata=$(_get_meta_data $path) - result=$? - [[ $result != 0 ]] && TOKEN=$(get_token) - done - [[ $result == 0 ]] && echo "$metadata" - return $result -} - # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage # of the resource to reserve. Note that we return zero if the start of the resource range is @@ -363,9 +296,8 @@ if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then IP_FAMILY="ipv6" fi -TOKEN=$(get_token) -AWS_DEFAULT_REGION=$(get_meta_data 'latest/dynamic/instance-identity/document' | jq .region -r) -AWS_SERVICES_DOMAIN=$(get_meta_data 'latest/meta-data/services/domain') +AWS_DEFAULT_REGION=$(imds 'latest/dynamic/instance-identity/document' | jq .region -r) +AWS_SERVICES_DOMAIN=$(imds 'latest/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then @@ -486,7 +418,7 @@ fi ### kubelet.service configuration -MAC=$(get_meta_data 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') +MAC=$(imds 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') if [[ -z "${DNS_CLUSTER_IP}" ]]; then @@ -503,7 +435,7 @@ if [[ -z "${DNS_CLUSTER_IP}" ]]; then #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 else - TEN_RANGE=$(get_meta_data "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) + TEN_RANGE=$(imds "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) DNS_CLUSTER_IP=10.100.0.10 if [[ "$TEN_RANGE" != "0" ]]; then DNS_CLUSTER_IP=172.20.0.10 @@ -518,12 +450,12 @@ KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "${IP_FAMILY}" == "ipv4" ]]; then - INTERNAL_IP=$(get_meta_data 'latest/meta-data/local-ipv4') + INTERNAL_IP=$(imds 'latest/meta-data/local-ipv4') else INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s - INTERNAL_IP=$(get_meta_data $INTERNAL_IP_URI) + INTERNAL_IP=$(imds $INTERNAL_IP_URI) fi -INSTANCE_TYPE=$(get_meta_data 'latest/meta-data/instance-type') +INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.22.0"; then # for K8s versions that suport API Priority & Fairness, increase our API server QPS diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh index bf92d571e..a9bb7122b 100755 --- a/files/max-pods-calculator.sh +++ b/files/max-pods-calculator.sh @@ -76,15 +76,13 @@ CNI_MAX_ENI="${CNI_MAX_ENI:-}" INSTANCE_TYPE="${INSTANCE_TYPE:-}" INSTANCE_TYPE_FROM_IMDS="${INSTANCE_TYPE_FROM_IMDS:-false}" SHOW_MAX_ALLOWED="${SHOW_MAX_ALLOWED:-false}" -IMDS_ENDPOINT="${IMDS_ENDPOINT:-169.254.169.254:80}" PREFIX_DELEGATION_SUPPORTED=false IPS_PER_PREFIX=16 if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then - TOKEN=$(curl -m 10 -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 600" -s "http://${IMDS_ENDPOINT}/latest/api/token") - export AWS_DEFAULT_REGION=$(curl -s --retry 5 -H "X-aws-ec2-metadata-token: $TOKEN" http://${IMDS_ENDPOINT}/latest/dynamic/instance-identity/document | jq .region -r) - INSTANCE_TYPE=$(curl -m 10 -H "X-aws-ec2-metadata-token: $TOKEN" -s http://${IMDS_ENDPOINT}/latest/meta-data/instance-type) + export AWS_DEFAULT_REGION=$(imds /latest/dynamic/instance-identity/document | jq .region -r) + INSTANCE_TYPE=$(imds /latest/meta-data/instance-type) elif [ -z "$INSTANCE_TYPE" ]; # There's no reasonable default for an instanceType so force one to be provided to the script. then echo "You must specify an instance type to calculate max pods value." diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e54e01501..da0de5789 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -45,6 +45,13 @@ else exit 1 fi +################################################################################ +### Utilities ################################################################## +################################################################################ + +sudo chmod -R a+x $TEMPLATE_DIR/bin/ +sudo mv $TEMPLATE_DIR/bin/* /usr/bin/ + ################################################################################ ### Packages ################################################################### ################################################################################ @@ -338,7 +345,7 @@ sudo yum install -y amazon-ssm-agent ### AMI Metadata ############################################################### ################################################################################ -BASE_AMI_ID=$(curl -s http://169.254.169.254/latest/meta-data/ami-id) +BASE_AMI_ID=$(imds /latest/meta-data/ami-id) cat < /tmp/release BASE_AMI_ID="$BASE_AMI_ID" BUILD_TIME="$(date)" diff --git a/test/Dockerfile b/test/Dockerfile index f4a4487b1..cf0d7c020 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,15 +1,13 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 +RUN yum install -y jq ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock - COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig COPY test/entrypoint.sh /entrypoint.sh COPY files /etc/eks +COPY files/bin/* /usr/bin/ COPY test/mocks/ /sbin/ - -RUN yum install -y jq - ENTRYPOINT ["/entrypoint.sh"] diff --git a/test/cases/api-qps-k8s-1.21-below.sh b/test/cases/api-qps-k8s-1.21-below.sh index 5c7c40216..dd7653a24 100755 --- a/test/cases/api-qps-k8s-1.21-below.sh +++ b/test/cases/api-qps-k8s-1.21-below.sh @@ -3,9 +3,8 @@ set -euo pipefail echo "--> Should use default API server QPS for K8s 1.21-" exit_code=0 -TEMP_DIR=$(mktemp -d) -KUBELET_VERSION=v1.21.0-eks-ba74326 -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.21.0-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ test || exit_code=$? @@ -19,8 +18,8 @@ fi expected_api_qps="null" expected_api_burst="null" -actual_api_qps=$(jq -r '.kubeAPIQPS' < ${TEMP_DIR}/kubelet-config.json) -actual_api_burst=$(jq -r '.kubeAPIBurst' < ${TEMP_DIR}/kubelet-config.json) +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" exit 1 diff --git a/test/cases/api-qps-k8s-1.22-above.sh b/test/cases/api-qps-k8s-1.22-above.sh index 54639260e..103868c93 100755 --- a/test/cases/api-qps-k8s-1.22-above.sh +++ b/test/cases/api-qps-k8s-1.22-above.sh @@ -3,9 +3,8 @@ set -euo pipefail echo "--> Should increase API server QPS for K8s 1.22+" exit_code=0 -TEMP_DIR=$(mktemp -d) -KUBELET_VERSION=v1.22.0-eks-ba74326 -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.22.0-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ test || exit_code=$? @@ -18,8 +17,8 @@ fi expected_api_qps="10" expected_api_burst="20" -actual_api_qps=$(jq -r '.kubeAPIQPS' < ${TEMP_DIR}/kubelet-config.json) -actual_api_burst=$(jq -r '.kubeAPIBurst' < ${TEMP_DIR}/kubelet-config.json) +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" exit 1 diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh index 74f4c5f12..03333a190 100755 --- a/test/cases/container-runtime-defaults.sh +++ b/test/cases/container-runtime-defaults.sh @@ -5,8 +5,9 @@ exit_code=0 TEMP_DIR=$(mktemp -d) echo "--> Should allow dockerd as container runtime when below k8s version 1.24" -KUBELET_VERSION="v1.20.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --container-runtime dockerd \ @@ -18,8 +19,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should allow containerd as container runtime when below k8s version 1.24" -KUBELET_VERSION="v1.20.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --container-runtime containerd \ @@ -31,8 +32,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should have default container runtime when below k8s version 1.24" -KUBELET_VERSION="v1.20.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.20.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ test || exit_code=$? @@ -43,8 +44,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" -export KUBELET_VERSION="v1.24.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --container-runtime dockerd \ @@ -58,8 +59,8 @@ fi exit_code=0 echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" -KUBELET_VERSION="v1.24.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --container-runtime containerd \ @@ -71,8 +72,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should have default container runtime when at or above k8s version 1.24" -KUBELET_VERSION="v1.24.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ test || exit_code=$? @@ -83,8 +84,8 @@ if [[ ${exit_code} -ne 0 ]]; then fi echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" -KUBELET_VERSION="v1.24.15-eks-ba74326" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --enable-docker-bridge true \ diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh new file mode 100755 index 000000000..215aef426 --- /dev/null +++ b/test/cases/imds-token-refresh.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should refresh IMDS token on configured interval" +exit_code=0 +TOKEN_DIR=/tmp/imds-tokens +TTL=5 +export IMDS_TOKEN_TTL_SECONDS=$TTL +export IMDS_DEBUG=true +imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] +then + echo "❌ Test Failed: expected one token to be present after first IMDS call but got '${ls $TOKEN_DIR}'" + exit 1 +fi + +imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] +then + echo "❌ Test Failed: expected one token to be present after second IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 +fi + +sleep $(($TTL + 1)) + +imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] +then + echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 +fi \ No newline at end of file diff --git a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh index efe887290..f39ed8807 100755 --- a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh +++ b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh @@ -3,8 +3,7 @@ set -euo pipefail echo "-> Should fail validation - ip-family mismatch" exit_code=0 -TEMP_DIR=$(mktemp -d) -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv4 \ diff --git a/test/cases/ipv4-cluster-dns-ip.sh b/test/cases/ipv4-cluster-dns-ip.sh index 7dc7e36b5..03074fc07 100755 --- a/test/cases/ipv4-cluster-dns-ip.sh +++ b/test/cases/ipv4-cluster-dns-ip.sh @@ -3,9 +3,8 @@ set -euo pipefail echo "--> Should return IPv4 DNS Cluster IP when given dns-cluster-ip" exit_code=0 -TEMP_DIR=$(mktemp -d) expected_cluster_dns="192.168.0.1" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv4 \ @@ -17,7 +16,7 @@ if [[ ${exit_code} -ne 0 ]]; then exit 1 fi -actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" exit 1 diff --git a/test/cases/ipv6-cluster-dns-ip.sh b/test/cases/ipv6-cluster-dns-ip.sh index 26f67fa8d..0f7d7451d 100755 --- a/test/cases/ipv6-cluster-dns-ip.sh +++ b/test/cases/ipv6-cluster-dns-ip.sh @@ -3,9 +3,8 @@ set -euo pipefail echo "-> Should return ipv6 DNS Cluster IP when given dns-cluster-ip" exit_code=0 -TEMP_DIR=$(mktemp -d) expected_cluster_dns="fe80::2a" -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv6 \ @@ -17,7 +16,7 @@ if [[ ${exit_code} -ne 0 ]]; then exit 1 fi -actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" exit 1 diff --git a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh index 6c08cd7f6..f503f01ad 100755 --- a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh +++ b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh @@ -4,7 +4,7 @@ set -euo pipefail echo "-> Should return IPv6 DNS cluster IP when given service-ipv6-cidr" exit_code=0 TEMP_DIR=$(mktemp -d) -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv6 \ @@ -17,7 +17,7 @@ if [[ ${exit_code} -ne 0 ]]; then fi expected_cluster_dns="fe80::1a" -actual_cluster_dns=$(jq -r '.clusterDNS[0]' < ${TEMP_DIR}/kubelet-config.json) +actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" exit 1 diff --git a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh index ac6991960..e984a223e 100755 --- a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh +++ b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh @@ -3,8 +3,7 @@ set -euo pipefail echo "-> Should fail w/ \"service-ipv6-cidr must be provided when ip-family is specified as ipv6\"" exit_code=0 -TEMP_DIR=$(mktemp -d) -run ${TEMP_DIR} /etc/eks/bootstrap.sh \ +/etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ --ip-family ipv6 \ diff --git a/test/cases/max-pods-cni-1-11-2-delegation.sh b/test/cases/max-pods-cni-1-11-2-delegation.sh index e6a2b8ba6..dca43f5e6 100755 --- a/test/cases/max-pods-cni-1-11-2-delegation.sh +++ b/test/cases/max-pods-cni-1-11-2-delegation.sh @@ -3,8 +3,7 @@ set -euo pipefail echo "-> Should calc max-pods successfully for VPC CNI 1.11.2" exit_code=0 -TEMP_DIR=$(mktemp -d) -out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ +out=$(/etc/eks/max-pods-calculator.sh \ --instance-type m5.8xlarge \ --cni-version 1.11.2 \ --cni-prefix-delegation-enabled || exit_code=$?) diff --git a/test/cases/max-pods-cni-1-11-2.sh b/test/cases/max-pods-cni-1-11-2.sh index cbf9179a5..ec47c0730 100755 --- a/test/cases/max-pods-cni-1-11-2.sh +++ b/test/cases/max-pods-cni-1-11-2.sh @@ -3,8 +3,7 @@ set -euo pipefail echo "-> Should calc max-pods successfully for m5.8xlarge VPC CNI 1.11.2" exit_code=0 -TEMP_DIR=$(mktemp -d) -out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ +out=$(/etc/eks/max-pods-calculator.sh \ --instance-type m5.8xlarge \ --cni-version 1.11.2 || exit_code=$?) echo $out diff --git a/test/cases/max-pods-cni-1-7-5.sh b/test/cases/max-pods-cni-1-7-5.sh index 295f43c28..619767256 100755 --- a/test/cases/max-pods-cni-1-7-5.sh +++ b/test/cases/max-pods-cni-1-7-5.sh @@ -3,8 +3,8 @@ set -euo pipefail echo "-> Should calc max-pods successfully for VPC CNI 1.7.5" exit_code=0 -TEMP_DIR=$(mktemp -d) -out=$(run ${TEMP_DIR} /etc/eks/max-pods-calculator.sh \ +export IMDS_DEBUG=true +out=$(/etc/eks/max-pods-calculator.sh \ --instance-type-from-imds \ --cni-version 1.7.5 || exit_code=$?) echo $out diff --git a/test/entrypoint.sh b/test/entrypoint.sh index b067e39f6..fdd437768 100755 --- a/test/entrypoint.sh +++ b/test/entrypoint.sh @@ -7,4 +7,4 @@ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" sleep 1 ## execute any other params -$@ \ No newline at end of file +/test.sh \ No newline at end of file diff --git a/test/test-harness.sh b/test/test-harness.sh index c13ca763e..f632194a4 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -34,20 +34,13 @@ docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/. overall_status=0 function run(){ - local temp_dir=$1 - shift - # This variable is used to override the default value in the kubelet mock - KUBELET_VERSION="${KUBELET_VERSION:-}" - cp -f ${SCRIPTPATH}/../files/kubelet-config.json ${temp_dir}/kubelet-config.json docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ - -v ${temp_dir}/kubelet-config.json:/etc/kubernetes/kubelet/kubelet-config.json \ + -v "$(realpath $1):/test.sh" \ --attach STDOUT \ --attach STDERR \ --rm \ - -e KUBELET_VERSION="$KUBELET_VERSION" \ - eks-optimized-ami $@ + eks-optimized-ami } -export -f run if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then test_cases=${TEST_CASE_SCRIPT} @@ -59,9 +52,9 @@ for case in "${test_cases[@]}"; do status=0 echo "=================================================================================================================" echo "-> Executing Test Case: $(basename ${case})" - ${case} || status=1 + run ${case} || status=1 if [[ ${status} -eq 0 ]]; then - echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅ " + echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" else echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" overall_status=1 @@ -69,4 +62,9 @@ for case in "${test_cases[@]}"; do echo "=================================================================================================================" done +if [[ ${overall_status} -eq 0 ]]; then + echo "✅ ✅ All Tests Passed! ✅ ✅" +else + echo "❌ ❌ Some Tests Failed! ❌ ❌" +fi exit $overall_status From a830ca83411ea20b05ba84265098030faa3946c4 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 20 Oct 2022 10:11:43 -0700 Subject: [PATCH 084/232] Upgrades runc to 1.1.3-1.amzn2.0.2 (#1055) --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index a2335e3d6..94900936e 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -26,7 +26,7 @@ "launch_block_device_mappings_volume_size": "4", "pull_cni_from_github": "true", "remote_folder": "", - "runc_version": "1.1.3-1.amzn2", + "runc_version": "1.1.3-1.amzn2.0.2", "security_group_id": "", "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", From 0cad5c51788a709c31a8cb839c6fac4b0431cbba Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 20 Oct 2022 10:48:44 -0700 Subject: [PATCH 085/232] Update docker and containerd for ALASDOCKER-2022-021 (#1056) --- eks-worker-al2.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 94900936e..dda34dd39 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -14,9 +14,9 @@ "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", "cni_plugin_version": "v0.8.6", - "containerd_version": "1.6.6-1.amzn2", + "containerd_version": "1.6.6-1.amzn2.0.2", "creator": "{{env `USER`}}", - "docker_version": "20.10.17-1.amzn2", + "docker_version": "20.10.17-1.amzn2.0.1", "encrypted": "false", "instance_type": null, "kernel_version": "", From 4b54ee95d42df8a2715add2a32f5150db097fde8 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 21 Oct 2022 11:48:51 -0700 Subject: [PATCH 086/232] Fixes arch reference in README per #1057 (#1058) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 988c67657..21c4b6641 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ PLATFORM=linux # Chose a platform and set the variable #List of all architectures for the selected Kubernetes Version, build date and platform aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ -ARCH=amd64 #Chose an architecture and set the variable +ARCH=x86_64 #Chose an architecture and set the variable ``` Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step ```bash From a0d20adbb4f2e0bcc3d520aecc9bfa2ba0b0f47b Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 27 Oct 2022 14:00:14 -0400 Subject: [PATCH 087/232] Add fmt to make targets (#1063) --- .editorconfig | 8 ++++++++ CONTRIBUTING.md | 9 +++++---- Makefile | 6 ++++++ 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..117520dfb --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +[*.sh] +indent_style = space +indent_size = 2 +binary_next_line = true +switch_case_indent = true +space_redirects = true +keep_padding = true +function_next_line = false diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 30e2f96d9..0576691bc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,10 +31,11 @@ To send us a pull request, please: 1. Fork the repository. 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. -3. Ensure local tests pass. -4. Commit to your fork using clear commit messages. -5. Send us a pull request, answering any default questions in the pull request interface. -6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. +3. Ensure your changes match our style guide (`make fmt`). +4. Ensure local tests pass (`make test`). +5. Commit to your fork using clear commit messages. +6. Send us a pull request, answering any default questions in the pull request interface. +7. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). diff --git a/Makefile b/Makefile index a4253f36f..c96644855 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,8 @@ PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) +MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + aws_region ?= $(AWS_DEFAULT_REGION) binary_bucket_region ?= $(AWS_DEFAULT_REGION) arch ?= x86_64 @@ -31,6 +33,10 @@ T_RESET := \e[0m .PHONY: all all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +.PHONY: fmt +fmt: ## Format the source files + shfmt --list $(MAKEFILE_DIR) + .PHONY: test test: ## run the test-harness test/test-harness.sh From c46e7413f674c2bb03fad950d47658b4ef3c9fb1 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 27 Oct 2022 14:00:39 -0400 Subject: [PATCH 088/232] Fix syntax issues (#1062) --- files/bin/imds | 2 +- test/cases/imds-token-refresh.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/files/bin/imds b/files/bin/imds index a65e442b6..c73097b2f 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash set -o errexit set -o pipefail diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh index 215aef426..cc1334ec6 100755 --- a/test/cases/imds-token-refresh.sh +++ b/test/cases/imds-token-refresh.sh @@ -18,7 +18,7 @@ then exit 1 elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] then - echo "❌ Test Failed: expected one token to be present after first IMDS call but got '${ls $TOKEN_DIR}'" + echo "❌ Test Failed: expected one token to be present after first IMDS call but got '$(ls $TOKEN_DIR)'" exit 1 fi @@ -46,4 +46,4 @@ elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] then echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" exit 1 -fi \ No newline at end of file +fi From a459e7332592cda651a81f7db2526b5d94e90f05 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 28 Oct 2022 11:25:47 -0400 Subject: [PATCH 089/232] Use shfmt flags instead of editorconfig (#1064) --- .editorconfig | 3 +++ Makefile | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index 117520dfb..cd9cab171 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,3 +1,6 @@ +# ensure that these rules are equivalent to the flags to shfmt in the Makefile. +# we can't use this file with shfmt directly because there's no way to express +# shebang matching on files without the `sh` extension. [*.sh] indent_style = space indent_size = 2 diff --git a/Makefile b/Makefile index c96644855..095922b28 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,17 @@ all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI .PHONY: fmt fmt: ## Format the source files - shfmt --list $(MAKEFILE_DIR) + # ensure that these flags are equivalent to the rules in the .editorconfig + shfmt \ + --list \ + --write \ + --language-dialect auto \ + --indent 2 \ + --binary-next-line \ + --case-indent \ + --space-redirects \ + --keep-padding \ + $(MAKEFILE_DIR) .PHONY: test test: ## run the test-harness From 193759f20224bb866fef169b2a93049cc999e758 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Sat, 29 Oct 2022 09:22:24 -0700 Subject: [PATCH 090/232] Updating CHANGELOG.md for AMI release 20221027 (#1067) --- CHANGELOG.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3560492e..d6e524ef3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,48 @@ # Changelog +### AMI Release v20221027 +* amazon-eks-gpu-node-1.23-v20221027 +* amazon-eks-gpu-node-1.22-v20221027 +* amazon-eks-gpu-node-1.21-v20221027 +* amazon-eks-gpu-node-1.20-v20221027 +* amazon-eks-arm64-node-1.23-v20221027 +* amazon-eks-arm64-node-1.22-v20221027 +* amazon-eks-arm64-node-1.21-v20221027 +* amazon-eks-arm64-node-1.20-v20221027 +* amazon-eks-node-1.23-v20221027 +* amazon-eks-node-1.22-v20221027 +* amazon-eks-node-1.21-v20221027 +* amazon-eks-node-1.20-v20221027 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20221027` +* `1.22.12-20221027` +* `1.21.14-20221027` +* `1.20.15-20221027` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.217-126.408.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* cuda is updated to 470.141.03-1. +* Linux kernel is updated to 5.4.217-126.408.amzn2. +* runc version is updated to 1.1.3-1.amzn2.0.2 to include [ALAS2DOCKER-2022-020](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-020.html). [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) +* docker version are update to 20.10.17-1.amzn2.0.1, and containerd version are updated to 1.6.6-1.amzn2.0.2 to include [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html). [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) +* Increase the kube-api-server QPS from 5/10 to 10/20. [#1030](https://github.com/awslabs/amazon-eks-ami/pull/1030) +* Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 will not be supported since it will be deprecated soon. + ### AMI Release v20220926 * amazon-eks-gpu-node-1.23-v20220926 * amazon-eks-gpu-node-1.22-v20220926 From 3b6b5231f4ab8cbf669f6d8034864870690d721a Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Sat, 29 Oct 2022 18:21:33 -0700 Subject: [PATCH 091/232] recalling change log v20221027 (#1069) Co-authored-by: Ravi Sinha --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6e524ef3..3f268cc05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -### AMI Release v20221027 +### [Recalled] AMI Release v20221027 * amazon-eks-gpu-node-1.23-v20221027 * amazon-eks-gpu-node-1.22-v20221027 * amazon-eks-gpu-node-1.21-v20221027 From ff27e2440b6a02d51ebcc5fec2ae42d315b31310 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Mon, 31 Oct 2022 14:15:30 -0400 Subject: [PATCH 092/232] Pin Kernel 5.4 to 5.4.209-116.367 to prevent nodes from going into Unready (#1072) We're investigating issues with later Kernel versions which cause nodes to become Unready. Till those issues are resolved, pinning the Kernel to the last known good version. --- scripts/upgrade_kernel.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index a53c4f6f9..a4cf68f3e 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -13,7 +13,12 @@ fi if [[ $KERNEL_VERSION == "4.14" ]]; then sudo yum update -y kernel elif [[ $KERNEL_VERSION == "5.4" ]]; then - sudo amazon-linux-extras install -y kernel-5.4 + # Pinning Kernel to 5.4.209-116.367 since we're investigating issues with later Kernel versions which cause nodes to become Unready. + # sudo amazon-linux-extras install -y kernel-5.4 + sudo amazon-linux-extras enable kernel-5.4=latest + sudo yum -y install kernel-5.4.209-116.367.amzn2 + sudo yum install -y yum-plugin-versionlock + sudo yum versionlock kernel-5.4* elif [[ $KERNEL_VERSION == "5.10" ]]; then sudo amazon-linux-extras install -y kernel-5.10 else From 165d827c38c548a0c08a7e34441ee80d79c136bc Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 31 Oct 2022 13:35:03 -0700 Subject: [PATCH 093/232] Address shellcheck finding (#1073) --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 208f06298..7d8f4b0aa 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -265,7 +265,7 @@ get_cpu_millicores_to_reserve() { local cpu_ranges=(0 1000 2000 4000 $total_cpu_on_instance) local cpu_percentage_reserved_for_ranges=(600 100 50 25) cpu_to_reserve="0" - for i in ${!cpu_percentage_reserved_for_ranges[@]}; do + for i in "${!cpu_percentage_reserved_for_ranges[@]}"; do local start_range=${cpu_ranges[$i]} local end_range=${cpu_ranges[(($i+1))]} local percentage_to_reserve_for_range=${cpu_percentage_reserved_for_ranges[$i]} From 0d8d78c0f0010432c0ebef73d41bf88ebbdf4f60 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 31 Oct 2022 15:44:45 -0700 Subject: [PATCH 094/232] Don't keep padding when formatting (#1075) --- .editorconfig | 2 +- Makefile | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.editorconfig b/.editorconfig index cd9cab171..deaf311fc 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,5 +7,5 @@ indent_size = 2 binary_next_line = true switch_case_indent = true space_redirects = true -keep_padding = true +keep_padding = false function_next_line = false diff --git a/Makefile b/Makefile index 095922b28..32f431ab1 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,6 @@ fmt: ## Format the source files --binary-next-line \ --case-indent \ --space-redirects \ - --keep-padding \ $(MAKEFILE_DIR) .PHONY: test From 1e89a4483c5bd2c34fcdf89d625fbd56085d83cf Mon Sep 17 00:00:00 2001 From: Kulwant Singh Date: Wed, 2 Nov 2022 15:06:48 +0000 Subject: [PATCH 095/232] Update Windows eks-log-collector for container runtime log collection (#1059) 1. Added check for collection for docker 2. Add support for containerd log collection 3. panic.log file is collected for containerd --- .../windows/eks-log-collector.ps1 | 109 +++++++++++++++--- 1 file changed, 91 insertions(+), 18 deletions(-) diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 index 2d538376d..d3e03660b 100644 --- a/log-collector-script/windows/eks-log-collector.ps1 +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -52,11 +52,13 @@ Function create_working_dir{ New-Item -type directory -path $info_system -Force >$null New-Item -type directory -path $info_system\eks -Force >$null New-Item -type directory -path $info_system\docker -Force >$null + New-Item -type directory -path $info_system\containerd -Force >$null New-Item -type directory -path $info_system\firewall -Force >$null New-Item -type directory -path $info_system\kubelet -Force >$null New-Item -type directory -path $info_system\kube-proxy -Force >$null New-Item -type directory -path $info_system\cni -Force >$null New-Item -type directory -path $info_system\docker_log -Force >$null + New-Item -type directory -path $info_system\containerd_log -Force >$null New-Item -type directory -path $info_system\network -Force >$null New-Item -type directory -path $info_system\network\hns -Force >$null Write-Host "OK" -ForegroundColor "green" @@ -69,6 +71,38 @@ Function create_working_dir{ } } +Function check_service_installed_and_running { + <# + .SYNOPSIS + This method checks if the specified service is installed and in running state. + #> + [CmdletBinding()] + Param ( + [Parameter(Mandatory=$true)] + [ValidateNotNullOrEmpty()] + [string]$ServiceName + ) + + Write-Host ("Checking status of service: {0}" -f $ServiceName) + try { + if (-not (Get-Service -Name $ServiceName -ErrorAction SilentlyContinue)) { + Write-Host ("Service {0} not found" -f $ServiceName) + return 0 + } + + if ((Get-Service -Name $ServiceName).Status -eq "Running") { + Write-Host ("Service {0} is running." -f $ServiceName) + return 1 + } + Write-Host ("Service {0} is not running." -f $ServiceName) + return 0 + } + catch { + Write-Error "Unable to check if service is installed and running" + break + } +} + Function get_sysinfo{ try { Write-Host "Collecting System information" @@ -181,18 +215,39 @@ Function get_system_services{ } } -Function get_docker_info{ - try { - Write-Host "Collecting Docker daemon information" - docker info > $info_system\docker\docker-info.txt 2>&1 - docker ps --all --no-trunc > $info_system\docker\docker-ps.txt 2>&1 - docker images > $info_system\docker\docker-images.txt 2>&1 - docker version > $info_system\docker\docker-version.txt 2>&1 - Write-Host "OK" -foregroundcolor "green" +Function get_containerd_info{ + Write-Host "Collecting Containerd information" + if (check_service_installed_and_running "containerd") { + try { + ctr version > $info_system\containerd\containerd-version.txt 2>&1 + ctr namespaces list > $info_system\containerd\containerd-namespaces.txt 2>&1 + ctr --namespace k8s.io images list > $info_system\containerd\containerd-images.txt 2>&1 + ctr --namespace k8s.io containers list > $info_system\containerd\containerd-containers.txt 2>&1 + ctr --namespace k8s.io tasks list > $info_system\containerd\containerd-tasks.txt 2>&1 + ctr --namespace k8s.io plugins list > $info_system\containerd\containerd-plugins.txt 2>&1 + Write-Host "OK" -foregroundcolor "green" + } + catch{ + Write-Error "Unable to collect Containerd information" + Break + } } - catch{ - Write-Error "Unable to collect Docker daemon information" - Break +} + +Function get_docker_info{ + Write-Host "Collecting Docker daemon information" + if (check_service_installed_and_running "docker") { + try { + docker info > $info_system\docker\docker-info.txt 2>&1 + docker ps --all --no-trunc > $info_system\docker\docker-ps.txt 2>&1 + docker images > $info_system\docker\docker-images.txt 2>&1 + docker version > $info_system\docker\docker-version.txt 2>&1 + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Docker daemon information" + Break + } } } @@ -243,14 +298,30 @@ Function get_k8s_info{ } Function get_docker_logs{ - try { - Write-Host "Collecting Docker daemon logs" - Get-EventLog -LogName Application -Source Docker | Sort-Object Time | Export-CSV $info_system/docker_log/docker-daemon.csv - Write-Host "OK" -foregroundcolor "green" + Write-Host "Collecting Docker daemon logs" + if (check_service_installed_and_running "docker") { + try { + Get-EventLog -LogName Application -Source Docker | Sort-Object Time | Export-CSV $info_system/docker_log/docker-daemon.csv + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect Docker daemon logs" + Break + } } - catch { - Write-Error "Unable to collect Docker daemon logs" - Break +} + +Function get_containerd_logs{ + Write-Host "Collecting containerd logs" + if (check_service_installed_and_running "containerd") { + try { + copy C:\ProgramData\containerd\root\panic.log $info_system\containerd_log\ + Write-Host "OK" -foregroundcolor "green" + } + catch { + Write-Error "Unable to collect containerd logs" + Break + } } } @@ -312,8 +383,10 @@ Function collect{ get_softwarelist get_system_services get_docker_info + get_containerd_info get_k8s_info get_docker_logs + get_containerd_logs get_eks_logs get_network_info From 0b4b45fc9f8e0b1e9b18f45f85aaf6b35636a1a6 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 10:37:45 -0700 Subject: [PATCH 096/232] Fallback to shfmt in container (#1077) --- Makefile | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 32f431ab1..97d3374a6 100644 --- a/Makefile +++ b/Makefile @@ -33,18 +33,22 @@ T_RESET := \e[0m .PHONY: all all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +# ensure that these flags are equivalent to the rules in the .editorconfig +SHFMT_FLAGS := --list \ +--language-dialect auto \ +--indent 2 \ +--binary-next-line \ +--case-indent \ +--space-redirects + +SHFMT_COMMAND := $(shell which shfmt) +ifeq (, $(SHFMT_COMMAND)) +SHFMT_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) mvdan/shfmt +endif + .PHONY: fmt fmt: ## Format the source files - # ensure that these flags are equivalent to the rules in the .editorconfig - shfmt \ - --list \ - --write \ - --language-dialect auto \ - --indent 2 \ - --binary-next-line \ - --case-indent \ - --space-redirects \ - $(MAKEFILE_DIR) + $(SHFMT_COMMAND) $(SHFMT_FLAGS) --write $(MAKEFILE_DIR) .PHONY: test test: ## run the test-harness From b9bd7f65ee84f1f285bacbcabd3cd145ecee318f Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 3 Nov 2022 16:49:18 -0500 Subject: [PATCH 097/232] Cleanup messages and imds-tokens (#1080) --- scripts/cleanup.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index a661178b3..b9fff7987 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -25,6 +25,8 @@ sudo rm -rf \ /var/log/cloud-init-output.log \ /var/log/cloud-init.log \ /var/log/secure \ - /var/log/wtmp + /var/log/wtmp \ + /var/log/messages \ + /tmp/imds-tokens sudo touch /etc/machine-id From 1bb12c591dda81443b652e6adde44a5c62f0e4fc Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 15:18:50 -0700 Subject: [PATCH 098/232] Garbage collection for imds tokens (#1081) --- files/bin/imds | 31 +++++++++++++++++++++++++------ test/cases/imds-token-refresh.sh | 30 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/files/bin/imds b/files/bin/imds index c73097b2f..88b372ddf 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -30,22 +30,41 @@ log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" TOKEN_DIR=/tmp/imds-tokens mkdir -p $TOKEN_DIR +IMDS_RETRIES=${IMDS_RETRIES:-10} +IMDS_RETRY_DELAY_SECONDS=${IMDS_RETRY_DELAY_SECONDS:-1} + +# default ttl is 15 minutes +IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} + +# max ttl is 6 hours, see: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html +IMDS_MAX_TOKEN_TTL_SECONDS=${IMDS_MAX_TOKEN_TTL_SECONDS:-21600} + +# cleanup expired tokens +DELETED_TOKENS=0 +for TOKEN_FILE in $(ls $TOKEN_DIR | awk '$0 < '$(($CURRENT_TIME - $IMDS_MAX_TOKEN_TTL_SECONDS))); do + rm $TOKEN_DIR/$TOKEN_FILE + DELETED_TOKENS=$(($DELETED_TOKENS + 1)) +done +if [ "$DELETED_TOKENS" -gt 0 ]; then + log "🗑️ Deleted $DELETED_TOKENS expired IMDS token(s)." +fi + TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) if [ "$TOKEN_FILE" = "" ] then - # default ttl is 15 minutes - IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) curl \ --silent \ --show-error \ - --retry 10 \ - --retry-delay 1 \ + --retry $IMDS_RETRIES \ + --retry-delay $IMDS_RETRY_DELAY_SECONDS \ -o $TOKEN_DIR/$TOKEN_FILE \ -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ -X PUT \ "http://$IMDS_ENDPOINT/latest/api/token" + # make sure any user can utilize (and clean up) these tokens + chmod a+rwx $TOKEN_DIR/$TOKEN_FILE log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." else log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." @@ -54,8 +73,8 @@ fi curl \ --silent \ --show-error \ - --retry 10 \ - --retry-delay 1 \ + --retry $IMDS_RETRIES \ + --retry-delay $IMDS_RETRY_DELAY_SECONDS \ --write-out '\n' \ -H "X-aws-ec2-metadata-token: $(cat $TOKEN_DIR/$TOKEN_FILE)" \ "http://$IMDS_ENDPOINT/$API_PATH" diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh index cc1334ec6..0947ec61c 100755 --- a/test/cases/imds-token-refresh.sh +++ b/test/cases/imds-token-refresh.sh @@ -47,3 +47,33 @@ then echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" exit 1 fi + +sleep $(($TTL + 1)) + +# both tokens are now expired, but only one should be garbage-collected with a window of $TTL + +IMDS_MAX_TOKEN_TTL_SECONDS=$TTL imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] +then + echo "❌ Test Failed: expected two tokens to be present after first garbage-collection but got '$(ls $TOKEN_DIR)'" + exit 1 +fi + +# the other expired token should be removed with a window of 0 + +IMDS_MAX_TOKEN_TTL_SECONDS=0 imds /latest/meta-data/instance-id || exit_code=$? + +if [[ ${exit_code} -ne 0 ]] +then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] +then + echo "❌ Test Failed: expected one token to be present after second garbage-collection but got '$(ls $TOKEN_DIR)'" + exit 1 +fi \ No newline at end of file From 6014c4e6872a23f82ca295afa93b033207042876 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 15:24:34 -0700 Subject: [PATCH 099/232] Apply style rules to source files. (#1076) --- files/bin/imds | 9 +- files/bootstrap.sh | 654 +++++++++--------- files/max-pods-calculator.sh | 174 +++-- files/pull-sandbox-image.sh | 36 +- .../linux/eks-log-collector.sh | 186 +++-- scripts/cleanup.sh | 38 +- scripts/cleanup_additional_repos.sh | 3 +- scripts/generate-version-info.sh | 3 +- scripts/install-worker.sh | 241 ++++--- scripts/install_additional_repos.sh | 3 +- scripts/upgrade_kernel.sh | 24 +- scripts/validate.sh | 9 +- test/cases/api-qps-k8s-1.21-below.sh | 18 +- test/cases/api-qps-k8s-1.22-above.sh | 18 +- test/cases/container-runtime-defaults.sh | 82 +-- test/cases/imds-token-refresh.sh | 72 +- .../ip-family-service-ipv6-cidr-mismatch.sh | 14 +- test/cases/ipv4-cluster-dns-ip.sh | 20 +- test/cases/ipv6-cluster-dns-ip.sh | 20 +- ...-dns-cluster-ip-given-service-ipv6-cidr.sh | 20 +- .../ipv6-ip-family-and-service-ipv6-cidr.sh | 14 +- test/cases/max-pods-cni-1-11-2-delegation.sh | 14 +- test/cases/max-pods-cni-1-11-2.sh | 12 +- test/cases/max-pods-cni-1-7-5.sh | 14 +- test/entrypoint.sh | 7 +- test/mocks/aws | 23 +- test/mocks/iptables-save | 2 +- test/mocks/kubelet | 4 +- test/mocks/sudo | 2 +- test/mocks/systemctl | 2 +- test/test-harness.sh | 68 +- 31 files changed, 899 insertions(+), 907 deletions(-) diff --git a/files/bin/imds b/files/bin/imds index 88b372ddf..e9f8e749d 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -6,14 +6,12 @@ set -o nounset IMDS_DEBUG="${IMDS_DEBUG:-false}" function log() { - if [ "$IMDS_DEBUG" = "true" ] - then + if [ "$IMDS_DEBUG" = "true" ]; then echo >&2 "$1" fi } -if [ "$#" -ne 1 ] -then +if [ "$#" -ne 1 ]; then echo >&2 "usage: imds API_PATH" exit 1 fi @@ -51,8 +49,7 @@ fi TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) -if [ "$TOKEN_FILE" = "" ] -then +if [ "$TOKEN_FILE" = "" ]; then TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) curl \ --silent \ diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 7d8f4b0aa..c7aa93c22 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -5,129 +5,129 @@ set -o nounset set -o errexit err_report() { - echo "Exited with error on line $1" + echo "Exited with error on line $1" } trap 'err_report $LINENO' ERR IFS=$'\n\t' function print_help { - echo "usage: $0 [options] " - echo "Bootstraps an instance into an EKS cluster" - echo "" - echo "-h,--help print this help" - echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" - echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" - echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\"" - echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." - echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" - echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" - echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" - echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." - echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" - echo "--pause-container-account The AWS account (number) to pull the pause container from" - echo "--pause-container-version The tag of the pause container" - echo "--container-runtime Specify a container runtime (default: dockerd)" - echo "--ip-family Specify ip family of the cluster" - echo "--service-ipv6-cidr ipv6 cidr range of the cluster" - echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" - echo "--cluster-id Specify the id of EKS cluster" + echo "usage: $0 [options] " + echo "Bootstraps an instance into an EKS cluster" + echo "" + echo "-h,--help print this help" + echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" + echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" + echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\"" + echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." + echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" + echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" + echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." + echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" + echo "--pause-container-account The AWS account (number) to pull the pause container from" + echo "--pause-container-version The tag of the pause container" + echo "--container-runtime Specify a container runtime (default: dockerd)" + echo "--ip-family Specify ip family of the cluster" + echo "--service-ipv6-cidr ipv6 cidr range of the cluster" + echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" + echo "--cluster-id Specify the id of EKS cluster" } POSITIONAL=() while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -h|--help) - print_help - exit 1 - ;; - --use-max-pods) - USE_MAX_PODS="$2" - shift - shift - ;; - --b64-cluster-ca) - B64_CLUSTER_CA=$2 - shift - shift - ;; - --apiserver-endpoint) - APISERVER_ENDPOINT=$2 - shift - shift - ;; - --kubelet-extra-args) - KUBELET_EXTRA_ARGS=$2 - shift - shift - ;; - --enable-docker-bridge) - ENABLE_DOCKER_BRIDGE=$2 - shift - shift - ;; - --aws-api-retry-attempts) - API_RETRY_ATTEMPTS=$2 - shift - shift - ;; - --docker-config-json) - DOCKER_CONFIG_JSON=$2 - shift - shift - ;; - --containerd-config-file) - CONTAINERD_CONFIG_FILE=$2 - shift - shift - ;; - --pause-container-account) - PAUSE_CONTAINER_ACCOUNT=$2 - shift - shift - ;; - --pause-container-version) - PAUSE_CONTAINER_VERSION=$2 - shift - shift - ;; - --dns-cluster-ip) - DNS_CLUSTER_IP=$2 - shift - shift - ;; - --container-runtime) - CONTAINER_RUNTIME=$2 - shift - shift - ;; - --ip-family) - IP_FAMILY=$2 - shift - shift - ;; - --service-ipv6-cidr) - SERVICE_IPV6_CIDR=$2 - shift - shift - ;; - --enable-local-outpost) - ENABLE_LOCAL_OUTPOST=$2 - shift - shift - ;; - --cluster-id) - CLUSTER_ID=$2 - shift - shift - ;; - *) # unknown option - POSITIONAL+=("$1") # save it in an array for later - shift # past argument - ;; - esac + key="$1" + case $key in + -h | --help) + print_help + exit 1 + ;; + --use-max-pods) + USE_MAX_PODS="$2" + shift + shift + ;; + --b64-cluster-ca) + B64_CLUSTER_CA=$2 + shift + shift + ;; + --apiserver-endpoint) + APISERVER_ENDPOINT=$2 + shift + shift + ;; + --kubelet-extra-args) + KUBELET_EXTRA_ARGS=$2 + shift + shift + ;; + --enable-docker-bridge) + ENABLE_DOCKER_BRIDGE=$2 + shift + shift + ;; + --aws-api-retry-attempts) + API_RETRY_ATTEMPTS=$2 + shift + shift + ;; + --docker-config-json) + DOCKER_CONFIG_JSON=$2 + shift + shift + ;; + --containerd-config-file) + CONTAINERD_CONFIG_FILE=$2 + shift + shift + ;; + --pause-container-account) + PAUSE_CONTAINER_ACCOUNT=$2 + shift + shift + ;; + --pause-container-version) + PAUSE_CONTAINER_VERSION=$2 + shift + shift + ;; + --dns-cluster-ip) + DNS_CLUSTER_IP=$2 + shift + shift + ;; + --container-runtime) + CONTAINER_RUNTIME=$2 + shift + shift + ;; + --ip-family) + IP_FAMILY=$2 + shift + shift + ;; + --service-ipv6-cidr) + SERVICE_IPV6_CIDR=$2 + shift + shift + ;; + --enable-local-outpost) + ENABLE_LOCAL_OUTPOST=$2 + shift + shift + ;; + --cluster-id) + CLUSTER_ID=$2 + shift + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac done set +u @@ -139,10 +139,10 @@ KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') echo "Using kubelet version $KUBELET_VERSION" function is_greater_than_or_equal_to_version() { - local actual_version="$1" - local compared_version="$2" + local actual_version="$1" + local compared_version="$2" - [ $actual_version = "`echo -e \"$actual_version\n$compared_version\" | sort -V | tail -n1`" ] + [ $actual_version = "$(echo -e "$actual_version\n$compared_version" | sort -V | tail -n1)" ] } # As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd @@ -150,8 +150,8 @@ function is_greater_than_or_equal_to_version() { IS_124_OR_GREATER=false DEFAULT_CONTAINER_RUNTIME=dockerd if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.24.0"; then - IS_124_OR_GREATER=true - DEFAULT_CONTAINER_RUNTIME=containerd + IS_124_OR_GREATER=true + DEFAULT_CONTAINER_RUNTIME=containerd fi # Set container runtime related variables @@ -162,8 +162,8 @@ CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" echo "Using $CONTAINER_RUNTIME as the container runtime" if $IS_124_OR_GREATER && [ $CONTAINER_RUNTIME != "containerd" ]; then - echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" - exit 1 + echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" + exit 1 fi USE_MAX_PODS="${USE_MAX_PODS:-true}" @@ -180,36 +180,49 @@ SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" -function get_pause_container_account_for_region () { - local region="$1" - case "${region}" in +function get_pause_container_account_for_region() { + local region="$1" + case "${region}" in ap-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-800184023465}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-800184023465}" + ;; me-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-558608220178}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-558608220178}" + ;; cn-north-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-918309763551}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-918309763551}" + ;; cn-northwest-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-961992271922}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-961992271922}" + ;; us-gov-west-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}" + ;; us-gov-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}" + ;; us-iso-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-725322719131}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-725322719131}" + ;; us-isob-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-187977181151}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-187977181151}" + ;; af-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}" + ;; eu-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}" + ;; ap-southeast-3) - echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}" + ;; me-central-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-759879836304}";; + echo "${PAUSE_CONTAINER_ACCOUNT:-759879836304}" + ;; *) - echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}";; - esac + echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}" + ;; + esac } # Helper function which calculates the amount of the given resource (either CPU or memory) @@ -230,9 +243,8 @@ get_resource_to_reserve_in_range() { local end_range=$3 local percentage=$4 resources_to_reserve="0" - if (( $total_resource_on_instance > $start_range )); then - resources_to_reserve=$(((($total_resource_on_instance < $end_range ? \ - $total_resource_on_instance : $end_range) - $start_range) * $percentage / 100 / 100)) + if (($total_resource_on_instance > $start_range)); then + resources_to_reserve=$(((($total_resource_on_instance < $end_range ? $total_resource_on_instance : $end_range) - $start_range) * $percentage / 100 / 100)) fi echo $resources_to_reserve } @@ -267,33 +279,32 @@ get_cpu_millicores_to_reserve() { cpu_to_reserve="0" for i in "${!cpu_percentage_reserved_for_ranges[@]}"; do local start_range=${cpu_ranges[$i]} - local end_range=${cpu_ranges[(($i+1))]} + local end_range=${cpu_ranges[(($i + 1))]} local percentage_to_reserve_for_range=${cpu_percentage_reserved_for_ranges[$i]} - cpu_to_reserve=$(($cpu_to_reserve + \ - $(get_resource_to_reserve_in_range $total_cpu_on_instance $start_range $end_range $percentage_to_reserve_for_range))) + cpu_to_reserve=$(($cpu_to_reserve + $(get_resource_to_reserve_in_range $total_cpu_on_instance $start_range $end_range $percentage_to_reserve_for_range))) done echo $cpu_to_reserve } if [ -z "$CLUSTER_NAME" ]; then - echo "CLUSTER_NAME is not defined" - exit 1 + echo "CLUSTER_NAME is not defined" + exit 1 fi if [[ ! -z "${IP_FAMILY}" ]]; then IP_FAMILY="$(tr [A-Z] [a-z] <<< "$IP_FAMILY")" - if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]] ; then - echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" - exit 1 + if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]]; then + echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" + exit 1 fi fi if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then - if [[ "${IP_FAMILY}" == "ipv4" ]]; then - echo "ip-family should be ipv6 when service-ipv6-cidr is specified" - exit 1 - fi - IP_FAMILY="ipv6" + if [[ "${IP_FAMILY}" == "ipv4" ]]; then + echo "ip-family should be ipv6 when service-ipv6-cidr is specified" + exit 1 + fi + IP_FAMILY="ipv6" fi AWS_DEFAULT_REGION=$(imds 'latest/dynamic/instance-identity/document' | jq .region -r) @@ -301,8 +312,8 @@ AWS_SERVICES_DOMAIN=$(imds 'latest/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then - echo "Unknown machine architecture '$MACHINE'" >&2 - exit 1 + echo "Unknown machine architecture '$MACHINE'" >&2 + exit 1 fi PAUSE_CONTAINER_ACCOUNT=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}") @@ -315,62 +326,62 @@ CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then - DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" - - # Retry the DescribeCluster API for API_RETRY_ATTEMPTS - for attempt in `seq 0 $API_RETRY_ATTEMPTS`; do - rc=0 - if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" - fi - - aws eks wait cluster-active \ - --region=${AWS_DEFAULT_REGION} \ - --name=${CLUSTER_NAME} - - aws eks describe-cluster \ - --region=${AWS_DEFAULT_REGION} \ - --name=${CLUSTER_NAME} \ - --output=text \ - --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily, outpostArn: outpostConfig.outpostArns[0], id: id}' > $DESCRIBE_CLUSTER_RESULT || rc=$? - if [[ $rc -eq 0 ]]; then - break - fi - if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then - exit $rc - fi - jitter=$((1 + RANDOM % 10)) - sleep_sec="$(( $(( 5 << $((1+$attempt)) )) + $jitter))" - sleep $sleep_sec - done - B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') - APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') - CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') - OUTPOST_ARN=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') - SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $6}') - SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $7}') - - if [[ -z "${IP_FAMILY}" ]]; then - IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') - fi + DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" - # Automatically detect local cluster in outpost - if [[ -z "${OUTPOST_ARN}" ]] || [[ "${OUTPOST_ARN}" == "None" ]]; then - IS_LOCAL_OUTPOST_DETECTED=false - else - IS_LOCAL_OUTPOST_DETECTED=true + # Retry the DescribeCluster API for API_RETRY_ATTEMPTS + for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do + rc=0 + if [[ $attempt -gt 0 ]]; then + echo "Attempt $attempt of $API_RETRY_ATTEMPTS" fi - # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option - if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then - CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} + aws eks wait cluster-active \ + --region=${AWS_DEFAULT_REGION} \ + --name=${CLUSTER_NAME} + + aws eks describe-cluster \ + --region=${AWS_DEFAULT_REGION} \ + --name=${CLUSTER_NAME} \ + --output=text \ + --query 'cluster.{certificateAuthorityData: certificateAuthority.data, endpoint: endpoint, serviceIpv4Cidr: kubernetesNetworkConfig.serviceIpv4Cidr, serviceIpv6Cidr: kubernetesNetworkConfig.serviceIpv6Cidr, clusterIpFamily: kubernetesNetworkConfig.ipFamily, outpostArn: outpostConfig.outpostArns[0], id: id}' > $DESCRIBE_CLUSTER_RESULT || rc=$? + if [[ $rc -eq 0 ]]; then + break fi + if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then + exit $rc + fi + jitter=$((1 + RANDOM % 10)) + sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec + done + B64_CLUSTER_CA=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $1}') + APISERVER_ENDPOINT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $3}') + CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $4}') + OUTPOST_ARN=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $5}') + SERVICE_IPV4_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $6}') + SERVICE_IPV6_CIDR=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $7}') + + if [[ -z "${IP_FAMILY}" ]]; then + IP_FAMILY=$(cat $DESCRIBE_CLUSTER_RESULT | awk '{print $2}') + fi + + # Automatically detect local cluster in outpost + if [[ -z "${OUTPOST_ARN}" ]] || [[ "${OUTPOST_ARN}" == "None" ]]; then + IS_LOCAL_OUTPOST_DETECTED=false + else + IS_LOCAL_OUTPOST_DETECTED=true + fi + + # If the cluster id is returned from describe cluster, let us use it no matter whether cluster id is passed from option + if [[ ! -z "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" ]] && [[ "${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT}" != "None" ]]; then + CLUSTER_ID=${CLUSTER_ID_IN_DESCRIBE_CLUSTER_RESULT} + fi fi if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then - ### this can happen when the ipFamily field is not found in describeCluster response - ### or B64_CLUSTER_CA and APISERVER_ENDPOINT are defined but IPFamily isn't - IP_FAMILY="ipv4" + ### this can happen when the ipFamily field is not found in describeCluster response + ### or B64_CLUSTER_CA and APISERVER_ENDPOINT are defined but IPFamily isn't + IP_FAMILY="ipv4" fi echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH @@ -379,11 +390,11 @@ sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig sed -i s,AWS_REGION,$AWS_DEFAULT_REGION,g /var/lib/kubelet/kubeconfig if [[ -z "$ENABLE_LOCAL_OUTPOST" ]]; then - # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with - # - the result of auto-detectection through describe-cluster - # - or "false" when describe-cluster is bypassed. - # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result - ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" + # Only when "--enable-local-outpost" option is not set explicity on calling bootstrap.sh, it will be assigned with + # - the result of auto-detectection through describe-cluster + # - or "false" when describe-cluster is bypassed. + # This also means if "--enable-local-outpost" option is set explicity, it will override auto-detection result + ENABLE_LOCAL_OUTPOST="${IS_LOCAL_OUTPOST_DETECTED:-false}" fi ### To support worker nodes to continue to communicate and connect to local cluster even when the Outpost @@ -395,32 +406,31 @@ fi ### worker node can be authentiacated through X.509 certificate which works for both connected and #### disconnected state. if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then - ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" - DOMAIN_NAME=$(echo "$APISERVER_ENDPOINT" | awk -F/ '{print $3}' | awk -F: '{print $1}') - getent hosts "$DOMAIN_NAME" | shuf >> /etc/hosts - - ### kubelet bootstrap kubeconfig uses aws-iam-authenticator with cluster id to authenticate to cluster - ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. - ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". - if [[ -z "${CLUSTER_ID}" ]]; then - echo "Cluster ID is required when local outpost support is enabled" - exit 1 - else - sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig + ### append to /etc/hosts file with shuffled mappings of "IP address to API server domain name" + DOMAIN_NAME=$(echo "$APISERVER_ENDPOINT" | awk -F/ '{print $3}' | awk -F: '{print $1}') + getent hosts "$DOMAIN_NAME" | shuf >> /etc/hosts + + ### kubelet bootstrap kubeconfig uses aws-iam-authenticator with cluster id to authenticate to cluster + ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. + ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". + if [[ -z "${CLUSTER_ID}" ]]; then + echo "Cluster ID is required when local outpost support is enabled" + exit 1 + else + sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig - ### use aws-iam-authenticator as bootstrap auth and download X.509 cert used in kubelet kubeconfig - mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig - KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" - fi + ### use aws-iam-authenticator as bootstrap auth and download X.509 cert used in kubelet kubeconfig + mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig + KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" + fi else - sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig + sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig fi ### kubelet.service configuration MAC=$(imds 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') - if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ "${IP_FAMILY}" == "ipv6" ]]; then if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then @@ -432,14 +442,14 @@ if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ "${IP_FAMILY}" == "ipv4" ]]; then if [[ ! -z "${SERVICE_IPV4_CIDR}" ]] && [[ "${SERVICE_IPV4_CIDR}" != "None" ]]; then - #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) - DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 + #Sets the DNS Cluster IP address that would be chosen from the serviceIpv4Cidr. (x.y.z.10) + DNS_CLUSTER_IP=${SERVICE_IPV4_CIDR%.*}.10 else - TEN_RANGE=$(imds "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true ) - DNS_CLUSTER_IP=10.100.0.10 - if [[ "$TEN_RANGE" != "0" ]]; then - DNS_CLUSTER_IP=172.20.0.10 - fi + TEN_RANGE=$(imds "latest/meta-data/network/interfaces/macs/$MAC/vpc-ipv4-cidr-blocks" | grep -c '^10\..*' || true) + DNS_CLUSTER_IP=10.100.0.10 + if [[ "$TEN_RANGE" != "0" ]]; then + DNS_CLUSTER_IP=172.20.0.10 + fi fi fi else @@ -450,19 +460,18 @@ KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json echo "$(jq ".clusterDNS=[\"$DNS_CLUSTER_IP\"]" $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "${IP_FAMILY}" == "ipv4" ]]; then - INTERNAL_IP=$(imds 'latest/meta-data/local-ipv4') + INTERNAL_IP=$(imds 'latest/meta-data/local-ipv4') else - INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s - INTERNAL_IP=$(imds $INTERNAL_IP_URI) + INTERNAL_IP_URI=latest/meta-data/network/interfaces/macs/$MAC/ipv6s + INTERNAL_IP=$(imds $INTERNAL_IP_URI) fi INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.22.0"; then - # for K8s versions that suport API Priority & Fairness, increase our API server QPS - echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG + # for K8s versions that suport API Priority & Fairness, increase our API server QPS + echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG fi - # Sets kubeReserved and evictionHard in /etc/kubernetes/kubelet/kubelet-config.json for worker nodes. The following two function # calls calculate the CPU and memory resources to reserve for kubeReserved based on the instance type of the worker node. # Note that allocatable memory and CPU resources on worker nodes is calculated by the Kubernetes scheduler @@ -474,11 +483,11 @@ set +o pipefail MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^${INSTANCE_TYPE:-unset}/"' { print $2 }') set -o pipefail if [ -z "$MAX_PODS" ] || [ -z "$INSTANCE_TYPE" ]; then - echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." - # When determining the value of maxPods, we're using the legacy calculation by default since it's more restrictive than - # the PrefixDelegation based alternative and is likely to be in-use by more customers. - # The legacy numbers also maintain backwards compatibility when used to calculate `kubeReserved.memory` - MAX_PODS=$(/etc/eks/max-pods-calculator.sh --instance-type-from-imds --cni-version 1.10.0 --show-max-allowed) + echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." + # When determining the value of maxPods, we're using the legacy calculation by default since it's more restrictive than + # the PrefixDelegation based alternative and is likely to be in-use by more customers. + # The legacy numbers also maintain backwards compatibility when used to calculate `kubeReserved.memory` + MAX_PODS=$(/etc/eks/max-pods-calculator.sh --instance-type-from-imds --cni-version 1.10.0 --show-max-allowed) fi # calculates the amount of each resource to reserve @@ -487,115 +496,114 @@ cpu_millicores_to_reserve=$(get_cpu_millicores_to_reserve) # writes kubeReserved and evictionHard to the kubelet-config using the amount of CPU and memory to be reserved echo "$(jq '. += {"evictionHard": {"memory.available": "100Mi", "nodefs.available": "10%", "nodefs.inodesFree": "5%"}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG echo "$(jq --arg mebibytes_to_reserve "${mebibytes_to_reserve}Mi" --arg cpu_millicores_to_reserve "${cpu_millicores_to_reserve}m" \ - '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG + '. += {kubeReserved: {"cpu": $cpu_millicores_to_reserve, "ephemeral-storage": "1Gi", "memory": $mebibytes_to_reserve}}' $KUBELET_CONFIG)" > $KUBELET_CONFIG if [[ "$USE_MAX_PODS" = "true" ]]; then - echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG + echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG fi mkdir -p /etc/systemd/system/kubelet.service.d -cat < /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf +cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf [Service] Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' EOF if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then - cat < /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf + cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf [Service] Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' EOF fi if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then - if $ENABLE_DOCKER_BRIDGE; then - echo "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" - fi + if $ENABLE_DOCKER_BRIDGE; then + echo "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" + fi - if [ ! -z "$DOCKER_CONFIG_JSON" ]; then - echo "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" - fi + if [ ! -z "$DOCKER_CONFIG_JSON" ]; then + echo "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" + fi - sudo mkdir -p /etc/containerd - sudo mkdir -p /etc/cni/net.d - mkdir -p /etc/systemd/system/containerd.service.d - cat < /etc/systemd/system/containerd.service.d/10-compat-symlink.conf + sudo mkdir -p /etc/containerd + sudo mkdir -p /etc/cni/net.d + mkdir -p /etc/systemd/system/containerd.service.d + cat << EOF > /etc/systemd/system/containerd.service.d/10-compat-symlink.conf [Service] ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock EOF - if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then - sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml - fi - echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG - sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml - sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml - sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service - sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service - sudo chown root:root /etc/systemd/system/kubelet.service - sudo chown root:root /etc/systemd/system/sandbox-image.service - systemctl daemon-reload - systemctl enable containerd - systemctl restart containerd - systemctl enable sandbox-image - systemctl start sandbox-image + if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then + sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml + fi + echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG + sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml + sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service + sudo chown root:root /etc/systemd/system/kubelet.service + sudo chown root:root /etc/systemd/system/sandbox-image.service + systemctl daemon-reload + systemctl enable containerd + systemctl restart containerd + systemctl enable sandbox-image + systemctl start sandbox-image elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then - mkdir -p /etc/docker - bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" - cp -v /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service - sudo chown root:root /etc/systemd/system/iptables-restore.service - systemctl daemon-reload - systemctl enable iptables-restore - - if [[ -n "$DOCKER_CONFIG_JSON" ]]; then - echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json - fi - if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then - # Enabling the docker bridge network. We have to disable live-restore as it - # prevents docker from recreating the default bridge network on restart - echo "$(jq '.bridge="docker0" | ."live-restore"=false' /etc/docker/daemon.json)" > /etc/docker/daemon.json - fi - systemctl daemon-reload - systemctl enable docker - systemctl restart docker + mkdir -p /etc/docker + bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" + cp -v /etc/eks/iptables-restore.service /etc/systemd/system/iptables-restore.service + sudo chown root:root /etc/systemd/system/iptables-restore.service + systemctl daemon-reload + systemctl enable iptables-restore + + if [[ -n "$DOCKER_CONFIG_JSON" ]]; then + echo "$DOCKER_CONFIG_JSON" > /etc/docker/daemon.json + fi + if [[ "$ENABLE_DOCKER_BRIDGE" = "true" ]]; then + # Enabling the docker bridge network. We have to disable live-restore as it + # prevents docker from recreating the default bridge network on restart + echo "$(jq '.bridge="docker0" | ."live-restore"=false' /etc/docker/daemon.json)" > /etc/docker/daemon.json + fi + systemctl daemon-reload + systemctl enable docker + systemctl restart docker else - echo "Container runtime ${CONTAINER_RUNTIME} is not supported." - exit 1 + echo "Container runtime ${CONTAINER_RUNTIME} is not supported." + exit 1 fi - systemctl enable kubelet systemctl start kubelet # gpu boost clock -if command -v nvidia-smi &>/dev/null ; then - echo "nvidia-smi found" - - nvidia-smi -q > /tmp/nvidia-smi-check - if [[ "$?" == "0" ]]; then - sudo nvidia-smi -pm 1 # set persistence mode - sudo nvidia-smi --auto-boost-default=0 - - GPUNAME=$(nvidia-smi -L | head -n1) - echo $GPUNAME - - # set application clock to maximum - if [[ $GPUNAME == *"A100"* ]]; then - nvidia-smi -ac 1215,1410 - elif [[ $GPUNAME == *"V100"* ]]; then - nvidia-smi -ac 877,1530 - elif [[ $GPUNAME == *"K80"* ]]; then - nvidia-smi -ac 2505,875 - elif [[ $GPUNAME == *"T4"* ]]; then - nvidia-smi -ac 5001,1590 - elif [[ $GPUNAME == *"M60"* ]]; then - nvidia-smi -ac 2505,1177 - else - echo "unsupported gpu" - fi - else - cat /tmp/nvidia-smi-check - fi +if command -v nvidia-smi &> /dev/null; then + echo "nvidia-smi found" + + nvidia-smi -q > /tmp/nvidia-smi-check + if [[ "$?" == "0" ]]; then + sudo nvidia-smi -pm 1 # set persistence mode + sudo nvidia-smi --auto-boost-default=0 + + GPUNAME=$(nvidia-smi -L | head -n1) + echo $GPUNAME + + # set application clock to maximum + if [[ $GPUNAME == *"A100"* ]]; then + nvidia-smi -ac 1215,1410 + elif [[ $GPUNAME == *"V100"* ]]; then + nvidia-smi -ac 877,1530 + elif [[ $GPUNAME == *"K80"* ]]; then + nvidia-smi -ac 2505,875 + elif [[ $GPUNAME == *"T4"* ]]; then + nvidia-smi -ac 5001,1590 + elif [[ $GPUNAME == *"M60"* ]]; then + nvidia-smi -ac 2505,1177 + else + echo "unsupported gpu" + fi + else + cat /tmp/nvidia-smi-check + fi else - echo "nvidia-smi not found" + echo "nvidia-smi not found" fi diff --git a/files/max-pods-calculator.sh b/files/max-pods-calculator.sh index a9bb7122b..c52c5d8d5 100755 --- a/files/max-pods-calculator.sh +++ b/files/max-pods-calculator.sh @@ -5,68 +5,68 @@ set -o nounset set -o errexit err_report() { - echo "Exited with error on line $1" + echo "Exited with error on line $1" } trap 'err_report $LINENO' ERR function print_help { - echo "usage: $0 [options]" - echo "Calculates maxPods value to be used when starting up the kubelet." - echo "-h,--help print this help." - echo "--instance-type Specify the instance type to calculate max pods value." - echo "--instance-type-from-imds Use this flag if the instance type should be fetched from IMDS." - echo "--cni-version Specify the version of the CNI (example - 1.7.5)." - echo "--cni-custom-networking-enabled Use this flag to indicate if CNI custom networking mode has been enabled." - echo "--cni-prefix-delegation-enabled Use this flag to indicate if CNI prefix delegation has been enabled." - echo "--cni-max-eni specify how many ENIs should be used for prefix delegation. Defaults to using all ENIs per instance." - echo "--show-max-allowed Use this flag to show max number of Pods allowed to run in Worker Node. Otherwise the script will show the recommended value" + echo "usage: $0 [options]" + echo "Calculates maxPods value to be used when starting up the kubelet." + echo "-h,--help print this help." + echo "--instance-type Specify the instance type to calculate max pods value." + echo "--instance-type-from-imds Use this flag if the instance type should be fetched from IMDS." + echo "--cni-version Specify the version of the CNI (example - 1.7.5)." + echo "--cni-custom-networking-enabled Use this flag to indicate if CNI custom networking mode has been enabled." + echo "--cni-prefix-delegation-enabled Use this flag to indicate if CNI prefix delegation has been enabled." + echo "--cni-max-eni specify how many ENIs should be used for prefix delegation. Defaults to using all ENIs per instance." + echo "--show-max-allowed Use this flag to show max number of Pods allowed to run in Worker Node. Otherwise the script will show the recommended value" } POSITIONAL=() while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -h|--help) - print_help - exit 1 - ;; - --instance-type) - INSTANCE_TYPE=$2 - shift - shift - ;; - --instance-type-from-imds) - INSTANCE_TYPE_FROM_IMDS=true - shift - ;; - --cni-version) - CNI_VERSION=$2 - shift - shift - ;; - --cni-custom-networking-enabled) - CNI_CUSTOM_NETWORKING_ENABLED=true - shift - ;; - --cni-prefix-delegation-enabled) - CNI_PREFIX_DELEGATION_ENABLED=true - shift - ;; - --cni-max-eni) - CNI_MAX_ENI=$2 - shift - shift - ;; - --show-max-allowed) - SHOW_MAX_ALLOWED=true - shift - ;; - *) # unknown option - POSITIONAL+=("$1") # save it in an array for later - shift # past argument - ;; - esac + key="$1" + case $key in + -h | --help) + print_help + exit 1 + ;; + --instance-type) + INSTANCE_TYPE=$2 + shift + shift + ;; + --instance-type-from-imds) + INSTANCE_TYPE_FROM_IMDS=true + shift + ;; + --cni-version) + CNI_VERSION=$2 + shift + shift + ;; + --cni-custom-networking-enabled) + CNI_CUSTOM_NETWORKING_ENABLED=true + shift + ;; + --cni-prefix-delegation-enabled) + CNI_PREFIX_DELEGATION_ENABLED=true + shift + ;; + --cni-max-eni) + CNI_MAX_ENI=$2 + shift + shift + ;; + --show-max-allowed) + SHOW_MAX_ALLOWED=true + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac done CNI_VERSION="${CNI_VERSION:-}" @@ -81,83 +81,79 @@ PREFIX_DELEGATION_SUPPORTED=false IPS_PER_PREFIX=16 if [ "$INSTANCE_TYPE_FROM_IMDS" = true ]; then - export AWS_DEFAULT_REGION=$(imds /latest/dynamic/instance-identity/document | jq .region -r) - INSTANCE_TYPE=$(imds /latest/meta-data/instance-type) -elif [ -z "$INSTANCE_TYPE" ]; - # There's no reasonable default for an instanceType so force one to be provided to the script. - then echo "You must specify an instance type to calculate max pods value." - exit 1 + export AWS_DEFAULT_REGION=$(imds /latest/dynamic/instance-identity/document | jq .region -r) + INSTANCE_TYPE=$(imds /latest/meta-data/instance-type) +elif [ -z "$INSTANCE_TYPE" ]; then # There's no reasonable default for an instanceType so force one to be provided to the script. + echo "You must specify an instance type to calculate max pods value." + exit 1 fi -if [ -z "$CNI_VERSION" ]; - then echo "You must specify a CNI Version to use. Example - 1.7.5" - exit 1 +if [ -z "$CNI_VERSION" ]; then + echo "You must specify a CNI Version to use. Example - 1.7.5" + exit 1 fi calculate_max_ip_addresses_prefix_delegation() { - enis=$1 - instance_max_eni_ips=$2 - echo $(($enis * (($instance_max_eni_ips - 1) * $IPS_PER_PREFIX ) + 2)) + enis=$1 + instance_max_eni_ips=$2 + echo $(($enis * (($instance_max_eni_ips - 1) * $IPS_PER_PREFIX) + 2)) } calculate_max_ip_addresses_secondary_ips() { - enis=$1 - instance_max_eni_ips=$2 - echo $(($enis * ($instance_max_eni_ips - 1) + 2)) + enis=$1 + instance_max_eni_ips=$2 + echo $(($enis * ($instance_max_eni_ips - 1) + 2)) } min_number() { - printf "%s\n" "$@" | sort -g | head -n1 + printf "%s\n" "$@" | sort -g | head -n1 } - VERSION_SPLIT=(${CNI_VERSION//./ }) CNI_MAJOR_VERSION="${VERSION_SPLIT[0]}" CNI_MINOR_VERSION="${VERSION_SPLIT[1]}" if [[ "$CNI_MAJOR_VERSION" -gt 1 ]] || ([[ "$CNI_MAJOR_VERSION" = 1 ]] && [[ "$CNI_MINOR_VERSION" -gt 8 ]]); then - PREFIX_DELEGATION_SUPPORTED=true + PREFIX_DELEGATION_SUPPORTED=true fi DESCRIBE_INSTANCES_RESULT=$(aws ec2 describe-instance-types --instance-type "${INSTANCE_TYPE}" --query 'InstanceTypes[0].{Hypervisor: Hypervisor, EniCount: NetworkInfo.MaximumNetworkInterfaces, PodsPerEniCount: NetworkInfo.Ipv4AddressesPerInterface, CpuCount: VCpuInfo.DefaultVCpus}' --output json) -HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor' ) +HYPERVISOR_TYPE=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.Hypervisor') IS_NITRO=false if [[ "$HYPERVISOR_TYPE" == "nitro" ]]; then - IS_NITRO=true + IS_NITRO=true fi -INSTANCE_MAX_ENIS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.EniCount' ) -INSTANCE_MAX_ENIS_IPS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.PodsPerEniCount' ) +INSTANCE_MAX_ENIS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.EniCount') +INSTANCE_MAX_ENIS_IPS=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.PodsPerEniCount') -if [ -z "$CNI_MAX_ENI" ] ; then - enis_for_pods=$INSTANCE_MAX_ENIS +if [ -z "$CNI_MAX_ENI" ]; then + enis_for_pods=$INSTANCE_MAX_ENIS else - enis_for_pods="$(min_number $CNI_MAX_ENI $INSTANCE_MAX_ENIS)" + enis_for_pods="$(min_number $CNI_MAX_ENI $INSTANCE_MAX_ENIS)" fi -if [ "$CNI_CUSTOM_NETWORKING_ENABLED" = true ] ; then - enis_for_pods=$((enis_for_pods-1)) +if [ "$CNI_CUSTOM_NETWORKING_ENABLED" = true ]; then + enis_for_pods=$((enis_for_pods - 1)) fi - if [ "$IS_NITRO" = true ] && [ "$CNI_PREFIX_DELEGATION_ENABLED" = true ] && [ "$PREFIX_DELEGATION_SUPPORTED" = true ]; then - max_pods=$(calculate_max_ip_addresses_prefix_delegation $enis_for_pods $INSTANCE_MAX_ENIS_IPS) + max_pods=$(calculate_max_ip_addresses_prefix_delegation $enis_for_pods $INSTANCE_MAX_ENIS_IPS) else - max_pods=$(calculate_max_ip_addresses_secondary_ips $enis_for_pods $INSTANCE_MAX_ENIS_IPS) + max_pods=$(calculate_max_ip_addresses_secondary_ips $enis_for_pods $INSTANCE_MAX_ENIS_IPS) fi # Limit the total number of pods that can be launched on any instance type based on the vCPUs on that instance type. MAX_POD_CEILING_FOR_LOW_CPU=110 MAX_POD_CEILING_FOR_HIGH_CPU=250 -CPU_COUNT=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.CpuCount' ) +CPU_COUNT=$(echo $DESCRIBE_INSTANCES_RESULT | jq -r '.CpuCount') -if [ "$SHOW_MAX_ALLOWED" = true ] ; then +if [ "$SHOW_MAX_ALLOWED" = true ]; then echo $max_pods exit 0 fi -if [ "$CPU_COUNT" -gt 30 ] ; then - echo $(min_number $MAX_POD_CEILING_FOR_HIGH_CPU $max_pods) +if [ "$CPU_COUNT" -gt 30 ]; then + echo $(min_number $MAX_POD_CEILING_FOR_HIGH_CPU $max_pods) else - echo $(min_number $MAX_POD_CEILING_FOR_LOW_CPU $max_pods) + echo $(min_number $MAX_POD_CEILING_FOR_LOW_CPU $max_pods) fi - diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index 588f155f2..270be7d32 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -6,22 +6,22 @@ region=$(echo "$sandbox_image" | cut -f4 -d ".") ecr_password=$(aws ecr get-login-password --region $region) API_RETRY_ATTEMPTS=5 -for attempt in `seq 0 $API_RETRY_ATTEMPTS`; do - rc=0 - if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" - fi - ### pull sandbox image from ecr - ### username will always be constant i.e; AWS - sudo ctr --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password - rc=$?; - if [[ $rc -eq 0 ]]; then - break - fi - if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then - exit $rc - fi - jitter=$((1 + RANDOM % 10)) - sleep_sec="$(( $(( 5 << $((1+$attempt)) )) + $jitter))" - sleep $sleep_sec +for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do + rc=0 + if [[ $attempt -gt 0 ]]; then + echo "Attempt $attempt of $API_RETRY_ATTEMPTS" + fi + ### pull sandbox image from ecr + ### username will always be constant i.e; AWS + sudo ctr --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password + rc=$? + if [[ $rc -eq 0 ]]; then + break + fi + if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then + exit $rc + fi + jitter=$((1 + RANDOM % 10)) + sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec done diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index a0a3cafab..b538dd97a 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -57,18 +57,18 @@ COMMON_DIRECTORIES=( var_log networking sandbox-image # eks - ipamd # eks - sysctls # eks - kubelet # eks - cni # eks + ipamd # eks + sysctls # eks + kubelet # eks + cni # eks ) COMMON_LOGS=( syslog messages aws-routed-eni # eks - containers # eks - pods # eks + containers # eks + pods # eks cloud-init.log cloud-init-output.log kube-proxy.log @@ -151,7 +151,7 @@ is_root() { check_required_utils() { for utils in ${REQUIRED_UTILS[*]}; do # If exit code of "command -v" not equal to 0, fail - if ! command -v "${utils}" >/dev/null 2>&1; then + if ! command -v "${utils}" > /dev/null 2>&1; then echo -e "\nApplication \"${utils}\" is missing, please install \"${utils}\" as this script requires it." fi done @@ -167,13 +167,13 @@ log_parameters() { } systemd_check() { - if command -v systemctl >/dev/null 2>&1; then - INIT_TYPE="systemd" - if command -v snap >/dev/null 2>&1; then + if command -v systemctl > /dev/null 2>&1; then + INIT_TYPE="systemd" + if command -v snap > /dev/null 2>&1; then INIT_TYPE="snap" fi else - INIT_TYPE="other" + INIT_TYPE="other" fi } @@ -214,14 +214,14 @@ is_diskfull() { # If "result" is less than or equal to "threshold", fail. if [[ "${result}" -le "${threshold}" ]]; then - die "Free space on root volume is less than or equal to $((threshold>>10))MB, please ensure adequate disk space to collect and store the log files." + die "Free space on root volume is less than or equal to $((threshold >> 10))MB, please ensure adequate disk space to collect and store the log files." fi } cleanup() { #guard rails to avoid accidental deletion of unknown data if [[ "${COLLECT_DIR}" == "/tmp/eks-log-collector" ]]; then - rm --recursive --force "${COLLECT_DIR}" >/dev/null 2>&1 + rm --recursive --force "${COLLECT_DIR}" > /dev/null 2>&1 else echo "Unable to Cleanup as {COLLECT_DIR} variable is modified. Please cleanup manually!" fi @@ -287,25 +287,25 @@ get_mounts_info() { pvs > "${COLLECT_DIR}"/storage/pvs.txt vgs > "${COLLECT_DIR}"/storage/vgs.txt mount -t xfs | awk '{print $1}' | xargs -I{} -- sh -c "xfs_info {}; xfs_db -r -c 'freesp -s' {}" > "${COLLECT_DIR}"/storage/xfs.txt - mount | grep ^overlay | sed 's/.*upperdir=//' | sed 's/,.*//' | xargs -n 1 timeout 75 du -sh | grep -v ^0 > "${COLLECT_DIR}"/storage/pod_local_storage.txt + mount | grep ^overlay | sed 's/.*upperdir=//' | sed 's/,.*//' | xargs -n 1 timeout 75 du -sh | grep -v ^0 > "${COLLECT_DIR}"/storage/pod_local_storage.txt ok } get_selinux_info() { try "collect SELinux status" - if ! command -v getenforce >/dev/null 2>&1; then - echo -e "SELinux mode:\n\t Not installed" > "${COLLECT_DIR}"/system/selinux.txt - else - echo -e "SELinux mode:\n\t $(getenforce)" > "${COLLECT_DIR}"/system/selinux.txt + if ! command -v getenforce > /dev/null 2>&1; then + echo -e "SELinux mode:\n\t Not installed" > "${COLLECT_DIR}"/system/selinux.txt + else + echo -e "SELinux mode:\n\t $(getenforce)" > "${COLLECT_DIR}"/system/selinux.txt fi ok } get_iptables_info() { - if ! command -v iptables >/dev/null 2>&1; then - echo "IPtables not installed" |tee -a iptables.txt + if ! command -v iptables > /dev/null 2>&1; then + echo "IPtables not installed" | tee -a iptables.txt else try "collect iptables information" iptables --wait 1 --numeric --verbose --list --table mangle | tee "${COLLECT_DIR}"/networking/iptables-mangle.txt | sed '/^num\|^$\|^Chain\|^\ pkts.*.destination/d' | echo -e "=======\nTotal Number of Rules: $(wc -l)" >> "${COLLECT_DIR}"/networking/iptables-mangle.txt @@ -323,29 +323,29 @@ get_common_logs() { for entry in ${COMMON_LOGS[*]}; do if [[ -e "/var/log/${entry}" ]]; then - if [[ "${entry}" == "messages" ]]; then - tail -c 100M /var/log/messages > "${COLLECT_DIR}"/var_log/messages - continue - fi - if [[ "${entry}" == "containers" ]]; then - cp --force --dereference --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2>/dev/null - continue - fi - if [[ "${entry}" == "pods" ]]; then - cp --force --dereference --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_ebs-csi-* "${COLLECT_DIR}"/var_log/ 2>/dev/null - cp --force --dereference --recursive /var/log/pods/kube-system_efs-csi-* "${COLLECT_DIR}"/var_log/ 2>/dev/null - continue - fi - cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2>/dev/null + if [[ "${entry}" == "messages" ]]; then + tail -c 100M /var/log/messages > "${COLLECT_DIR}"/var_log/messages + continue + fi + if [[ "${entry}" == "containers" ]]; then + cp --force --dereference --recursive /var/log/containers/aws-node* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/coredns-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + continue + fi + if [[ "${entry}" == "pods" ]]; then + cp --force --dereference --recursive /var/log/pods/kube-system_aws-node* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_cni-metrics-helper* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_coredns* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_ebs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_efs-csi-* "${COLLECT_DIR}"/var_log/ 2> /dev/null + continue + fi + cp --force --recursive --dereference /var/log/"${entry}" "${COLLECT_DIR}"/var_log/ 2> /dev/null fi done @@ -356,7 +356,7 @@ get_kernel_info() { try "collect kernel logs" if [[ -e "/var/log/dmesg" ]]; then - cp --force /var/log/dmesg "${COLLECT_DIR}/kernel/dmesg.boot" + cp --force /var/log/dmesg "${COLLECT_DIR}/kernel/dmesg.boot" fi dmesg > "${COLLECT_DIR}/kernel/dmesg.current" dmesg --ctime > "${COLLECT_DIR}/kernel/dmesg.human.current" @@ -369,7 +369,7 @@ get_docker_logs() { try "collect Docker daemon logs" case "${INIT_TYPE}" in - systemd|snap) + systemd | snap) journalctl --unit=docker --since "${DAYS_10}" > "${COLLECT_DIR}"/docker/docker.log ;; other) @@ -392,29 +392,29 @@ get_k8s_info() { if [[ -n "${KUBECONFIG:-}" ]]; then command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /etc/eksctl/kubeconfig.yaml ]]; then KUBECONFIG="/etc/eksctl/kubeconfig.yaml" command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /etc/systemd/system/kubelet.service ]]; then KUBECONFIG=$(grep kubeconfig /etc/systemd/system/kubelet.service | awk '{print $2}') command -v kubectl > /dev/null && kubectl get --kubeconfig="${KUBECONFIG}" svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig="${KUBECONFIG}" config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml elif [[ -f /var/lib/kubelet/kubeconfig ]]; then KUBECONFIG="/var/lib/kubelet/kubeconfig" command -v kubectl > /dev/null && kubectl get --kubeconfig=${KUBECONFIG} svc > "${COLLECT_DIR}"/kubelet/svc.log - command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + command -v kubectl > /dev/null && kubectl --kubeconfig=${KUBECONFIG} config view --output yaml > "${COLLECT_DIR}"/kubelet/kubeconfig.yaml else echo "======== Unable to find KUBECONFIG, IGNORING POD DATA =========" >> "${COLLECT_DIR}"/kubelet/svc.log fi # Try to copy the kubeconfig file if kubectl command doesn't exist - [[ (! -f "${COLLECT_DIR}/kubelet/kubeconfig.yaml") && ( -n ${KUBECONFIG}) ]] && cp ${KUBECONFIG} "${COLLECT_DIR}"/kubelet/kubeconfig.yaml + [[ (! -f "${COLLECT_DIR}/kubelet/kubeconfig.yaml") && (-n ${KUBECONFIG}) ]] && cp ${KUBECONFIG} "${COLLECT_DIR}"/kubelet/kubeconfig.yaml case "${INIT_TYPE}" in systemd) @@ -442,14 +442,14 @@ get_ipamd_info() { curl --max-time 3 --silent http://localhost:61679/v1/"${entry}" >> "${COLLECT_DIR}"/ipamd/"${entry}".json done else - echo "Ignoring IPAM introspection stats as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt + echo "Ignoring IPAM introspection stats as mentioned" | tee -a "${COLLECT_DIR}"/ipamd/ipam_introspection_ignore.txt fi if [[ "${ignore_metrics}" == "false" ]]; then try "collect L-IPAMD prometheus metrics" curl --max-time 3 --silent http://localhost:61678/metrics > "${COLLECT_DIR}"/ipamd/metrics.json 2>&1 else - echo "Ignoring Prometheus Metrics collection as mentioned"| tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt + echo "Ignoring Prometheus Metrics collection as mentioned" | tee -a "${COLLECT_DIR}"/ipamd/ipam_metrics_ignore.txt fi try "collect L-IPAMD checkpoint" @@ -460,7 +460,7 @@ get_ipamd_info() { get_multus_info() { try "collect Multus logs if they exist" - cp --force --dereference --recursive /var/log/pods/kube-system_kube-multus* "${COLLECT_DIR}"/var_log/ 2>/dev/null + cp --force --dereference --recursive /var/log/pods/kube-system_kube-multus* "${COLLECT_DIR}"/var_log/ 2> /dev/null ok } @@ -468,7 +468,7 @@ get_multus_info() { get_sysctls_info() { try "collect sysctls information" # dump all sysctls - sysctl --all >> "${COLLECT_DIR}"/sysctls/sysctl_all.txt 2>/dev/null + sysctl --all >> "${COLLECT_DIR}"/sysctls/sysctl_all.txt 2> /dev/null ok } @@ -500,7 +500,7 @@ get_networking_info() { API_SERVER=$(grep server: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*server: //') CA_CRT=$(grep certificate-authority: "${COLLECT_DIR}"/kubelet/kubeconfig.yaml | sed 's/.*certificate-authority: //') for i in $(seq 5); do - echo -e "curling ${API_SERVER} ($i of 5) $(date --utc +%FT%T.%3N%Z)\n\n" >> ${COLLECT_DIR}"/networking/curl_api_server.txt" + echo -e "curling ${API_SERVER} ($i of 5) $(date --utc +%FT%T.%3N%Z)\n\n" >> ${COLLECT_DIR}"/networking/curl_api_server.txt" timeout 75 curl -v --cacert "${CA_CRT}" "${API_SERVER}"/livez?verbose >> ${COLLECT_DIR}"/networking/curl_api_server.txt" 2>&1 done fi @@ -512,17 +512,17 @@ get_networking_info() { get_cni_config() { try "collect CNI configuration information" - if [[ -e "/etc/cni/net.d/" ]]; then - cp --force --recursive --dereference /etc/cni/net.d/* "${COLLECT_DIR}"/cni/ - fi + if [[ -e "/etc/cni/net.d/" ]]; then + cp --force --recursive --dereference /etc/cni/net.d/* "${COLLECT_DIR}"/cni/ + fi ok } get_pkgtype() { - if [[ "$(command -v rpm )" ]]; then + if [[ "$(command -v rpm)" ]]; then PACKAGE_TYPE=rpm - elif [[ "$(command -v dpkg )" ]]; then + elif [[ "$(command -v dpkg)" ]]; then PACKAGE_TYPE=deb else PACKAGE_TYPE='unknown' @@ -551,7 +551,7 @@ get_system_services() { try "collect active system services" case "${INIT_TYPE}" in - systemd|snap) + systemd | snap) systemctl list-units > "${COLLECT_DIR}"/system/services.txt 2>&1 ;; other) @@ -575,36 +575,36 @@ get_system_services() { } get_containerd_info() { - try "Collect Containerd daemon information" + try "Collect Containerd daemon information" - if [[ "$(pgrep -o containerd)" -ne 0 ]]; then - timeout 75 containerd config dump > "${COLLECT_DIR}"/containerd/containerd-config.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 journalctl -u containerd > "${COLLECT_DIR}"/containerd/containerd-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - else - warning "The Containerd daemon is not running." - fi + if [[ "$(pgrep -o containerd)" -ne 0 ]]; then + timeout 75 containerd config dump > "${COLLECT_DIR}"/containerd/containerd-config.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 journalctl -u containerd > "${COLLECT_DIR}"/containerd/containerd-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + else + warning "The Containerd daemon is not running." + fi - ok + ok - try "Collect Containerd running information" - if ! command -v ctr >/dev/null 2>&1; then - warning "ctr not installed" - else - timeout 75 ctr version > "${COLLECT_DIR}"/containerd/containerd-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr namespaces list > "${COLLECT_DIR}"/containerd/containerd-namespaces.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr --namespace k8s.io images list > "${COLLECT_DIR}"/containerd/containerd-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr --namespace k8s.io containers list > "${COLLECT_DIR}"/containerd/containerd-containers.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr --namespace k8s.io tasks list > "${COLLECT_DIR}"/containerd/containerd-tasks.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - timeout 75 ctr --namespace k8s.io plugins list > "${COLLECT_DIR}"/containerd/containerd-plugins.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " - fi + try "Collect Containerd running information" + if ! command -v ctr > /dev/null 2>&1; then + warning "ctr not installed" + else + timeout 75 ctr version > "${COLLECT_DIR}"/containerd/containerd-version.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr namespaces list > "${COLLECT_DIR}"/containerd/containerd-namespaces.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io images list > "${COLLECT_DIR}"/containerd/containerd-images.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io containers list > "${COLLECT_DIR}"/containerd/containerd-containers.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io tasks list > "${COLLECT_DIR}"/containerd/containerd-tasks.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 ctr --namespace k8s.io plugins list > "${COLLECT_DIR}"/containerd/containerd-plugins.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + fi - ok + ok } get_sandboxImage_info() { - try "Collect sandbox-image daemon information" - timeout 75 journalctl -u sandbox-image > "${COLLECT_DIR}"/sandbox-image/sandbox-image-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"sandbox-image info output \" " - ok + try "Collect sandbox-image daemon information" + timeout 75 journalctl -u sandbox-image > "${COLLECT_DIR}"/sandbox-image/sandbox-image-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"sandbox-image info output \" " + ok } get_docker_info() { @@ -626,28 +626,26 @@ get_docker_info() { get_cpu_throttled_processes() { try "Collect CPU Throttled Process Information" readonly THROTTLE_LOG="${COLLECT_DIR}"/system/cpu_throttling.txt - command find /sys/fs/cgroup -iname "cpu.stat" -print0 | while IFS= read -r -d '' cs - do + command find /sys/fs/cgroup -iname "cpu.stat" -print0 | while IFS= read -r -d '' cs; do # look for a non-zero nr_throttled value if grep -q "nr_throttled [1-9]" "${cs}"; then pids=${cs/cpu.stat/cgroup.procs} lines=$(wc -l < "${pids}") # ignore if no PIDs are listed - if [ "${lines}" -eq "0" ] ; then + if [ "${lines}" -eq "0" ]; then continue fi echo "$cs" >> "${THROTTLE_LOG}" cat "${cs}" >> "${THROTTLE_LOG}" - while IFS= read -r pid - do + while IFS= read -r pid; do command ps ax | grep "^${pid}" >> "${THROTTLE_LOG}" - done < "${pids}" - echo "" >> "${THROTTLE_LOG}" - fi + done < "${pids}" + echo "" >> "${THROTTLE_LOG}" + fi done if [ ! -e "${THROTTLE_LOG}" ]; then - echo "No CPU Throttling Found" >> "${THROTTLE_LOG}" + echo "No CPU Throttling Found" >> "${THROTTLE_LOG}" fi ok } @@ -658,7 +656,7 @@ get_io_throttled_processes() { command echo -e "PID Name Block IO Delay (centisconds)" > ${IO_THROTTLE_LOG} # column 42 is Aggregated block I/O delays, measured in centiseconds so we capture the non-zero block # I/O delays. - command cut -d" " -f 1,2,42 /proc/[0-9]*/stat | sort -n -k+3 -r | grep -v 0$ >> ${IO_THROTTLE_LOG} + command cut -d" " -f 1,2,42 /proc/[0-9]*/stat | sort -n -k+3 -r | grep -v 0$ >> ${IO_THROTTLE_LOG} ok } diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index b9fff7987..24861c3e9 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -9,24 +9,24 @@ sudo rm -rf /tmp/worker # Clean up files to reduce confusion during debug sudo rm -rf \ - /etc/hostname \ - /etc/machine-id \ - /etc/resolv.conf \ - /etc/ssh/ssh_host* \ - /home/ec2-user/.ssh/authorized_keys \ - /root/.ssh/authorized_keys \ - /var/lib/cloud/data \ - /var/lib/cloud/instance \ - /var/lib/cloud/instances \ - /var/lib/cloud/sem \ - /var/lib/dhclient/* \ - /var/lib/dhcp/dhclient.* \ - /var/lib/yum/history \ - /var/log/cloud-init-output.log \ - /var/log/cloud-init.log \ - /var/log/secure \ - /var/log/wtmp \ - /var/log/messages \ - /tmp/imds-tokens + /etc/hostname \ + /etc/machine-id \ + /etc/resolv.conf \ + /etc/ssh/ssh_host* \ + /home/ec2-user/.ssh/authorized_keys \ + /root/.ssh/authorized_keys \ + /var/lib/cloud/data \ + /var/lib/cloud/instance \ + /var/lib/cloud/instances \ + /var/lib/cloud/sem \ + /var/lib/dhclient/* \ + /var/lib/dhcp/dhclient.* \ + /var/lib/yum/history \ + /var/log/cloud-init-output.log \ + /var/log/cloud-init.log \ + /var/log/secure \ + /var/log/wtmp \ + /var/log/messages \ + /tmp/imds-tokens sudo touch /etc/machine-id diff --git a/scripts/cleanup_additional_repos.sh b/scripts/cleanup_additional_repos.sh index e2665b484..79179d674 100644 --- a/scripts/cleanup_additional_repos.sh +++ b/scripts/cleanup_additional_repos.sh @@ -9,7 +9,6 @@ if [ -z "${ADDITIONAL_YUM_REPOS}" ]; then exit 0 fi - AWK_CMD=' BEGIN {RS=";";FS=","} { @@ -24,4 +23,4 @@ BEGIN {RS=";";FS=","} } {cmd="rm -f " Repo; system(cmd)} ' -sudo awk "$AWK_CMD" <<< "${ADDITIONAL_YUM_REPOS}" \ No newline at end of file +sudo awk "$AWK_CMD" <<< "${ADDITIONAL_YUM_REPOS}" diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index 889a758ef..22ef83a45 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -5,8 +5,7 @@ set -o errexit set -o pipefail -if [ "$#" -ne 1 ] -then +if [ "$#" -ne 1 ]; then echo "usage: $0 OUTPUT_FILE" exit 1 fi diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index da0de5789..f329cd996 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -11,14 +11,14 @@ TEMPLATE_DIR=${TEMPLATE_DIR:-/tmp/worker} ### Validate Required Arguments ################################################ ################################################################################ validate_env_set() { - ( - set +o nounset - - if [ -z "${!1}" ]; then - echo "Packer variable '$1' was not set. Aborting" - exit 1 - fi - ) + ( + set +o nounset + + if [ -z "${!1}" ]; then + echo "Packer variable '$1' was not set. Aborting" + exit 1 + fi + ) } validate_env_set BINARY_BUCKET_NAME @@ -37,12 +37,12 @@ validate_env_set PULL_CNI_FROM_GITHUB MACHINE=$(uname -m) if [ "$MACHINE" == "x86_64" ]; then - ARCH="amd64" + ARCH="amd64" elif [ "$MACHINE" == "aarch64" ]; then - ARCH="arm64" + ARCH="arm64" else - echo "Unknown machine architecture '$MACHINE'" >&2 - exit 1 + echo "Unknown machine architecture '$MACHINE'" >&2 + exit 1 fi ################################################################################ @@ -61,20 +61,20 @@ sudo yum update -y # Install necessary packages sudo yum install -y \ - aws-cfn-bootstrap \ - awscli \ - chrony \ - conntrack \ - curl \ - ec2-instance-connect \ - ipvsadm \ - jq \ - nfs-utils \ - socat \ - unzip \ - wget \ - yum-plugin-versionlock \ - yum-utils + aws-cfn-bootstrap \ + awscli \ + chrony \ + conntrack \ + curl \ + ec2-instance-connect \ + ipvsadm \ + jq \ + nfs-utils \ + socat \ + unzip \ + wget \ + yum-plugin-versionlock \ + yum-utils # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" sudo package-cleanup --oldkernels --count=1 -y @@ -90,18 +90,18 @@ if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils - sudo chkconfig chronyd on # Make sure that chronyd syncs RTC clock to the kernel. -cat < $TEMPLATE_DIR/kubelet-config.json + KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') + echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then - # enable CredentialProviders feature flags in kubelet service file - IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' - sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service - # enable KubeletCredentialProviders features in kubelet configuration - KUBELET_CREDENTIAL_PROVIDERS_FEATURES=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') - printf "%s" "$KUBELET_CREDENTIAL_PROVIDERS_FEATURES" > "$TEMPLATE_DIR/kubelet-config.json" + # enable CredentialProviders feature flags in kubelet service file + IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' + sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service + # enable KubeletCredentialProviders features in kubelet configuration + KUBELET_CREDENTIAL_PROVIDERS_FEATURES=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') + printf "%s" "$KUBELET_CREDENTIAL_PROVIDERS_FEATURES" > "$TEMPLATE_DIR/kubelet-config.json" fi sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service @@ -293,7 +292,6 @@ sudo chown root:root /etc/systemd/system/kubelet.service sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json - sudo systemctl daemon-reload # Disable the kubelet until the proper dropins have been configured sudo systemctl disable kubelet @@ -311,28 +309,28 @@ sudo chmod +x /etc/eks/max-pods-calculator.sh SONOBUOY_E2E_REGISTRY="${SONOBUOY_E2E_REGISTRY:-}" if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then - sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config - sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config + sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config + sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then - ECR_BINARY="ecr-credential-provider" - if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then - echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." - aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_BINARY . - else - echo "AWS cli missing - using wget to fetch ecr-credential-provider binaries from s3. Note: This won't work for private bucket." - sudo wget "$S3_URL_BASE/$ECR_BINARY" - fi - sudo chmod +x $ECR_BINARY - sudo mkdir -p /etc/eks/ecr-credential-provider - sudo mv $ECR_BINARY /etc/eks/ecr-credential-provider - - # copying credential provider config file to eks folder - sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config + ECR_BINARY="ecr-credential-provider" + if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_BINARY . + else + echo "AWS cli missing - using wget to fetch ecr-credential-provider binaries from s3. Note: This won't work for private bucket." + sudo wget "$S3_URL_BASE/$ECR_BINARY" + fi + sudo chmod +x $ECR_BINARY + sudo mkdir -p /etc/eks/ecr-credential-provider + sudo mv $ECR_BINARY /etc/eks/ecr-credential-provider + + # copying credential provider config file to eks folder + sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config fi ################################################################################ @@ -346,7 +344,7 @@ sudo yum install -y amazon-ssm-agent ################################################################################ BASE_AMI_ID=$(imds /latest/meta-data/ami-id) -cat < /tmp/release +cat << EOF > /tmp/release BASE_AMI_ID="$BASE_AMI_ID" BUILD_TIME="$(date)" BUILD_KERNEL="$(uname -r)" @@ -359,7 +357,7 @@ sudo chown -R root:root /etc/eks ### Stuff required by "protectKernelDefaults=true" ############################# ################################################################################ -cat < Should use default API server QPS for K8s 1.21-" exit_code=0 export KUBELET_VERSION=v1.21.0-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi # values should not be set @@ -21,11 +21,11 @@ expected_api_burst="null" actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then - echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" - exit 1 + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 fi if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then - echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" - exit 1 + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 fi diff --git a/test/cases/api-qps-k8s-1.22-above.sh b/test/cases/api-qps-k8s-1.22-above.sh index 103868c93..cbc242ac2 100755 --- a/test/cases/api-qps-k8s-1.22-above.sh +++ b/test/cases/api-qps-k8s-1.22-above.sh @@ -5,13 +5,13 @@ echo "--> Should increase API server QPS for K8s 1.22+" exit_code=0 export KUBELET_VERSION=v1.22.0-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_api_qps="10" @@ -20,11 +20,11 @@ expected_api_burst="20" actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then - echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" - exit 1 + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 fi if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then - echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" - exit 1 + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 fi diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh index 03333a190..d4aaf407e 100755 --- a/test/cases/container-runtime-defaults.sh +++ b/test/cases/container-runtime-defaults.sh @@ -8,91 +8,91 @@ echo "--> Should allow dockerd as container runtime when below k8s version 1.24" # This variable is used to override the default value in the kubelet mock export KUBELET_VERSION=v1.20.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --container-runtime dockerd \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should allow containerd as container runtime when below k8s version 1.24" export KUBELET_VERSION=v1.20.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --container-runtime containerd \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should have default container runtime when below k8s version 1.24" export KUBELET_VERSION=v1.20.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should not allow dockerd as container runtime when at or above k8s version 1.24" export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --container-runtime dockerd \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime dockerd \ + test || exit_code=$? echo "EXIT CODE $exit_code" if [[ ${exit_code} -eq 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi exit_code=0 echo "--> Should allow containerd as container runtime when at or above k8s version 1.24" export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --container-runtime containerd \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should have default container runtime when at or above k8s version 1.24" export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi echo "--> Should ignore docker-specific flags when at or above k8s version 1.24" export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --enable-docker-bridge true \ - --docker-config-json "{\"some\":\"json\"}" \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --enable-docker-bridge true \ + --docker-config-json "{\"some\":\"json\"}" \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 fi diff --git a/test/cases/imds-token-refresh.sh b/test/cases/imds-token-refresh.sh index 0947ec61c..1f4ca7039 100755 --- a/test/cases/imds-token-refresh.sh +++ b/test/cases/imds-token-refresh.sh @@ -12,40 +12,34 @@ export IMDS_TOKEN_TTL_SECONDS=$TTL export IMDS_DEBUG=true imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] -then - echo "❌ Test Failed: expected one token to be present after first IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then + echo "❌ Test Failed: expected one token to be present after first IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 fi imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] -then - echo "❌ Test Failed: expected one token to be present after second IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then + echo "❌ Test Failed: expected one token to be present after second IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 fi sleep $(($TTL + 1)) imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] -then - echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" - exit 1 +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]]; then + echo "❌ Test Failed: expected two tokens to be present after third IMDS call but got '$(ls $TOKEN_DIR)'" + exit 1 fi sleep $(($TTL + 1)) @@ -54,26 +48,22 @@ sleep $(($TTL + 1)) IMDS_MAX_TOKEN_TTL_SECONDS=$TTL imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]] -then - echo "❌ Test Failed: expected two tokens to be present after first garbage-collection but got '$(ls $TOKEN_DIR)'" - exit 1 +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 2 ]]; then + echo "❌ Test Failed: expected two tokens to be present after first garbage-collection but got '$(ls $TOKEN_DIR)'" + exit 1 fi # the other expired token should be removed with a window of 0 IMDS_MAX_TOKEN_TTL_SECONDS=0 imds /latest/meta-data/instance-id || exit_code=$? -if [[ ${exit_code} -ne 0 ]] -then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]] -then - echo "❌ Test Failed: expected one token to be present after second garbage-collection but got '$(ls $TOKEN_DIR)'" - exit 1 -fi \ No newline at end of file +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +elif [[ $(ls $TOKEN_DIR | wc -l) -ne 1 ]]; then + echo "❌ Test Failed: expected one token to be present after second garbage-collection but got '$(ls $TOKEN_DIR)'" + exit 1 +fi diff --git a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh index f39ed8807..fe835ee03 100755 --- a/test/cases/ip-family-service-ipv6-cidr-mismatch.sh +++ b/test/cases/ip-family-service-ipv6-cidr-mismatch.sh @@ -4,13 +4,13 @@ set -euo pipefail echo "-> Should fail validation - ip-family mismatch" exit_code=0 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv4 \ - --service-ipv6-cidr 192.168.0.1/24 \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --service-ipv6-cidr 192.168.0.1/24 \ + test || exit_code=$? if [[ ${exit_code} -eq 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi diff --git a/test/cases/ipv4-cluster-dns-ip.sh b/test/cases/ipv4-cluster-dns-ip.sh index 03074fc07..b0e05355b 100755 --- a/test/cases/ipv4-cluster-dns-ip.sh +++ b/test/cases/ipv4-cluster-dns-ip.sh @@ -5,19 +5,19 @@ echo "--> Should return IPv4 DNS Cluster IP when given dns-cluster-ip" exit_code=0 expected_cluster_dns="192.168.0.1" /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv4 \ - --dns-cluster-ip "${expected_cluster_dns}" \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv4 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then - echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-cluster-dns-ip.sh b/test/cases/ipv6-cluster-dns-ip.sh index 0f7d7451d..bfb5553c9 100755 --- a/test/cases/ipv6-cluster-dns-ip.sh +++ b/test/cases/ipv6-cluster-dns-ip.sh @@ -5,19 +5,19 @@ echo "-> Should return ipv6 DNS Cluster IP when given dns-cluster-ip" exit_code=0 expected_cluster_dns="fe80::2a" /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv6 \ - --dns-cluster-ip "${expected_cluster_dns}" \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --dns-cluster-ip "${expected_cluster_dns}" \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then - echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh index f503f01ad..ec84ee391 100755 --- a/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh +++ b/test/cases/ipv6-dns-cluster-ip-given-service-ipv6-cidr.sh @@ -5,20 +5,20 @@ echo "-> Should return IPv6 DNS cluster IP when given service-ipv6-cidr" exit_code=0 TEMP_DIR=$(mktemp -d) /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv6 \ - --service-ipv6-cidr fe80::1 \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + --service-ipv6-cidr fe80::1 \ + test || exit_code=$? if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_cluster_dns="fe80::1a" actual_cluster_dns=$(jq -r '.clusterDNS[0]' < /etc/kubernetes/kubelet/kubelet-config.json) if [[ ${actual_cluster_dns} != "${expected_cluster_dns}" ]]; then - echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected clusterDNS IP '${expected_cluster_dns}' but got '${actual_cluster_dns}'" + exit 1 +fi diff --git a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh index e984a223e..d93561585 100755 --- a/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh +++ b/test/cases/ipv6-ip-family-and-service-ipv6-cidr.sh @@ -4,12 +4,12 @@ set -euo pipefail echo "-> Should fail w/ \"service-ipv6-cidr must be provided when ip-family is specified as ipv6\"" exit_code=0 /etc/eks/bootstrap.sh \ - --b64-cluster-ca dGVzdA== \ - --apiserver-endpoint http://my-api-endpoint \ - --ip-family ipv6 \ - test || exit_code=$? + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --ip-family ipv6 \ + test || exit_code=$? if [[ ${exit_code} -eq 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/cases/max-pods-cni-1-11-2-delegation.sh b/test/cases/max-pods-cni-1-11-2-delegation.sh index dca43f5e6..5f64100fa 100755 --- a/test/cases/max-pods-cni-1-11-2-delegation.sh +++ b/test/cases/max-pods-cni-1-11-2-delegation.sh @@ -4,18 +4,18 @@ set -euo pipefail echo "-> Should calc max-pods successfully for VPC CNI 1.11.2" exit_code=0 out=$(/etc/eks/max-pods-calculator.sh \ - --instance-type m5.8xlarge \ - --cni-version 1.11.2 \ - --cni-prefix-delegation-enabled || exit_code=$?) + --instance-type m5.8xlarge \ + --cni-version 1.11.2 \ + --cni-prefix-delegation-enabled || exit_code=$?) echo $out if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_max_pods="250" actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then - echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" - exit 1 + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 fi diff --git a/test/cases/max-pods-cni-1-11-2.sh b/test/cases/max-pods-cni-1-11-2.sh index ec47c0730..c1311b707 100755 --- a/test/cases/max-pods-cni-1-11-2.sh +++ b/test/cases/max-pods-cni-1-11-2.sh @@ -4,17 +4,17 @@ set -euo pipefail echo "-> Should calc max-pods successfully for m5.8xlarge VPC CNI 1.11.2" exit_code=0 out=$(/etc/eks/max-pods-calculator.sh \ - --instance-type m5.8xlarge \ - --cni-version 1.11.2 || exit_code=$?) + --instance-type m5.8xlarge \ + --cni-version 1.11.2 || exit_code=$?) echo $out if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_max_pods="234" actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then - echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" - exit 1 + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.11.2 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 fi diff --git a/test/cases/max-pods-cni-1-7-5.sh b/test/cases/max-pods-cni-1-7-5.sh index 619767256..262ca81ed 100755 --- a/test/cases/max-pods-cni-1-7-5.sh +++ b/test/cases/max-pods-cni-1-7-5.sh @@ -5,17 +5,17 @@ echo "-> Should calc max-pods successfully for VPC CNI 1.7.5" exit_code=0 export IMDS_DEBUG=true out=$(/etc/eks/max-pods-calculator.sh \ - --instance-type-from-imds \ - --cni-version 1.7.5 || exit_code=$?) + --instance-type-from-imds \ + --cni-version 1.7.5 || exit_code=$?) echo $out if [[ ${exit_code} -ne 0 ]]; then - echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" - exit 1 + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 fi expected_max_pods="58" actual_max_pods=$(grep -o '[0-9]\+' <<< ${out}) if [[ ${actual_max_pods} -ne ${expected_max_pods} ]]; then - echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.7.5 to be '${expected_max_pods}', but got '${actual_max_pods}'" - exit 1 -fi \ No newline at end of file + echo "❌ Test Failed: expected max-pods for m4.xlarge w/ CNI 1.7.5 to be '${expected_max_pods}', but got '${actual_max_pods}'" + exit 1 +fi diff --git a/test/entrypoint.sh b/test/entrypoint.sh index fdd437768..9cf68701b 100755 --- a/test/entrypoint.sh +++ b/test/entrypoint.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" ## Start IMDS mock /sbin/ec2-metadata-mock --imdsv2 &> /var/log/ec2-metadata-mock.log & sleep 1 ## execute any other params -/test.sh \ No newline at end of file +/test.sh diff --git a/test/mocks/aws b/test/mocks/aws index 5d9d57079..b752fb6e3 100755 --- a/test/mocks/aws +++ b/test/mocks/aws @@ -1,16 +1,19 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" if [[ $1 == "ec2" ]]; then - if [[ $2 == "describe-instance-types" ]]; then - instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-') - if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then - cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" - exit 0 - fi - echo "instance type not found" - exit 1 + if [[ $2 == "describe-instance-types" ]]; then + instance_type=$(echo "${@}" | grep -o '[a-z]\+[0-9]\+[a-z]*\.[0-9a-z]\+' | tr '.' '-') + if [[ -f "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" ]]; then + cat "${SCRIPTPATH}/describe-instance-types/${instance_type}.json" + exit 0 fi -fi \ No newline at end of file + echo "instance type not found" + exit 1 + fi +fi diff --git a/test/mocks/iptables-save b/test/mocks/iptables-save index b2bd12826..b4d037268 100755 --- a/test/mocks/iptables-save +++ b/test/mocks/iptables-save @@ -1,4 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -echo "mocking iptables-save with params $@" \ No newline at end of file +echo "mocking iptables-save with params $@" diff --git a/test/mocks/kubelet b/test/mocks/kubelet index c689a18dc..b3fed23c1 100755 --- a/test/mocks/kubelet +++ b/test/mocks/kubelet @@ -5,7 +5,7 @@ set -euo pipefail # so we'll set a default here to avoid test failures, and you can # override by setting the KUBELET_VERSION environment variable. if [ $# == 1 ] && [ $1 == "--version" ]; then - echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" + echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" else - echo "mocking kubelet with params $@" + echo "mocking kubelet with params $@" fi diff --git a/test/mocks/sudo b/test/mocks/sudo index f91c5ff33..e485cef2e 100755 --- a/test/mocks/sudo +++ b/test/mocks/sudo @@ -1,4 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -echo "mocking sudo with params $@" \ No newline at end of file +echo "mocking sudo with params $@" diff --git a/test/mocks/systemctl b/test/mocks/systemctl index 47846145d..5efdd955e 100755 --- a/test/mocks/systemctl +++ b/test/mocks/systemctl @@ -1,4 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -echo "mocking systemctl with $@" \ No newline at end of file +echo "mocking systemctl with $@" diff --git a/test/test-harness.sh b/test/test-harness.sh index f632194a4..a04558086 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -1,10 +1,14 @@ #!/usr/bin/env bash -export SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +export SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" set -euo pipefail TEST_CASE_SCRIPT="" -USAGE=$(cat << 'EOM' +USAGE=$( + cat << 'EOM' Usage: test-harness.sh [-c ] Executes the test harness for the EKS Optimized AL2 AMI. By default the test harness executes all scripts in the cases directory. @@ -16,16 +20,16 @@ EOM while getopts "c:h" opt; do case ${opt} in - c ) # Case Script Path - TEST_CASE_SCRIPT="$OPTARG" + c) # Case Script Path + TEST_CASE_SCRIPT="$OPTARG" ;; - h ) # help - echo "$USAGE" 1>&2 - exit + h) # help + echo "$USAGE" 1>&2 + exit ;; - \? ) - echo "$USAGE" 1>&2 - exit + \?) + echo "$USAGE" 1>&2 + exit ;; esac done @@ -33,38 +37,38 @@ done docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/../" overall_status=0 -function run(){ - docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ - -v "$(realpath $1):/test.sh" \ - --attach STDOUT \ - --attach STDERR \ - --rm \ - eks-optimized-ami +function run() { + docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ + -v "$(realpath $1):/test.sh" \ + --attach STDOUT \ + --attach STDERR \ + --rm \ + eks-optimized-ami } if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then - test_cases=${TEST_CASE_SCRIPT} + test_cases=${TEST_CASE_SCRIPT} else - test_cases=($(find ${SCRIPTPATH}/cases -name "*.sh" -type f)) + test_cases=($(find ${SCRIPTPATH}/cases -name "*.sh" -type f)) fi for case in "${test_cases[@]}"; do - status=0 - echo "=================================================================================================================" - echo "-> Executing Test Case: $(basename ${case})" - run ${case} || status=1 - if [[ ${status} -eq 0 ]]; then - echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" - else - echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" - overall_status=1 - fi - echo "=================================================================================================================" + status=0 + echo "=================================================================================================================" + echo "-> Executing Test Case: $(basename ${case})" + run ${case} || status=1 + if [[ ${status} -eq 0 ]]; then + echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" + else + echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" + overall_status=1 + fi + echo "=================================================================================================================" done if [[ ${overall_status} -eq 0 ]]; then - echo "✅ ✅ All Tests Passed! ✅ ✅" + echo "✅ ✅ All Tests Passed! ✅ ✅" else - echo "❌ ❌ Some Tests Failed! ❌ ❌" + echo "❌ ❌ Some Tests Failed! ❌ ❌" fi exit $overall_status From 6cdf8390777f38c1fe531901cf8707394d888482 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 15:41:55 -0700 Subject: [PATCH 100/232] Ignore shell formatting commit in git blame (#1083) --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 000000000..b78d5db21 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,3 @@ +# Applied code style rules to shell files +6014c4e6872a23f82ca295afa93b033207042876 + From ac6f2297a1e87c84c7e2f88b2cd428d04b621d65 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 3 Nov 2022 15:42:44 -0700 Subject: [PATCH 101/232] Add lint make target (#1068) --- .github/workflows/ci.yaml | 7 +++++++ Makefile | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 45b749ed9..dfc7f6804 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -10,6 +10,13 @@ on: - reopened - synchronize jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: echo "$(go env GOPATH)/bin" >> $GITHUB_PATH + - run: go install mvdan.cc/sh/v3/cmd/shfmt@latest + - run: make lint test: runs-on: ubuntu-latest steps: diff --git a/Makefile b/Makefile index 97d3374a6..30ea43e98 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,17 @@ endif fmt: ## Format the source files $(SHFMT_COMMAND) $(SHFMT_FLAGS) --write $(MAKEFILE_DIR) +SHELLCHECK_COMMAND := $(shell which shellcheck) +ifeq (, $(SHELLCHECK_COMMAND)) +SHELLCHECK_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) koalaman/shellcheck:stable +endif +SHELL_FILES := $(shell find $(MAKEFILE_DIR) -type f -name '*.sh') + +.PHONY: lint +lint: ## Check the source files for syntax and format issues + $(SHFMT_COMMAND) $(SHFMT_FLAGS) --diff $(MAKEFILE_DIR) + $(SHELLCHECK_COMMAND) --format gcc --severity error $(SHELL_FILES) + .PHONY: test test: ## run the test-harness test/test-harness.sh From ee841f3eabd886bffbc1c98262b991ad4b9326c6 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 4 Nov 2022 11:29:47 -0400 Subject: [PATCH 102/232] Updated CHANGELOG for v20221101 (#1085) --- CHANGELOG.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f268cc05..4eea99b25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,47 @@ # Changelog +### AMI Release v20221101 +* amazon-eks-gpu-node-1.23-v20221101 +* amazon-eks-gpu-node-1.22-v20221101 +* amazon-eks-gpu-node-1.21-v20221101 +* amazon-eks-gpu-node-1.20-v20221101 +* amazon-eks-arm64-node-1.23-v20221101 +* amazon-eks-arm64-node-1.22-v20221101 +* amazon-eks-arm64-node-1.21-v20221101 +* amazon-eks-arm64-node-1.20-v20221101 +* amazon-eks-node-1.23-v20221101 +* amazon-eks-node-1.22-v20221101 +* amazon-eks-node-1.21-v20221101 +* amazon-eks-node-1.20-v20221101 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.23.9-20221101` +* `1.22.12-20221101` +* `1.21.14-20221101` +* `1.20.15-20221101` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.209-116.367.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Pin Kernel 5.4 to 5.4.209-116.367 to prevent nodes from going into Unready [#1072](https://github.com/awslabs/amazon-eks-ami/pull/1072) +* Increase the kube-api-server QPS from 5/10 to 10/20 [#1030](https://github.com/awslabs/amazon-eks-ami/pull/1030) +* Update docker and containerd for [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html) [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) +* runc version is updated to 1.1.3-1.amzn2.0.2 to include ALAS2DOCKER-2022-020 [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) +* Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 is not supported in this region since it will be deprecated soon. + ### [Recalled] AMI Release v20221027 * amazon-eks-gpu-node-1.23-v20221027 * amazon-eks-gpu-node-1.22-v20221027 From 6f629e1ea90f9289a5ef503dfed54ee0105bceb9 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 4 Nov 2022 11:31:27 -0400 Subject: [PATCH 103/232] Revert "Pin Kernel 5.4 to 5.4.209-116.367 to prevent nodes from going into Unready (#1072)" (#1086) This reverts commit ff27e2440b6a02d51ebcc5fec2ae42d315b31310. --- scripts/upgrade_kernel.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 232d59c22..5736d0f5c 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -13,12 +13,7 @@ fi if [[ $KERNEL_VERSION == "4.14" ]]; then sudo yum update -y kernel elif [[ $KERNEL_VERSION == "5.4" ]]; then - # Pinning Kernel to 5.4.209-116.367 since we're investigating issues with later Kernel versions which cause nodes to become Unready. - # sudo amazon-linux-extras install -y kernel-5.4 - sudo amazon-linux-extras enable kernel-5.4=latest - sudo yum -y install kernel-5.4.209-116.367.amzn2 - sudo yum install -y yum-plugin-versionlock - sudo yum versionlock kernel-5.4* + sudo amazon-linux-extras install -y kernel-5.4 elif [[ $KERNEL_VERSION == "5.10" ]]; then sudo amazon-linux-extras install -y kernel-5.10 else From 112e919c44397da2c1f5cbadcd0d521f4aac0218 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 4 Nov 2022 15:28:28 -0700 Subject: [PATCH 104/232] Add stale issue workflow (#1088) --- .github/workflows/stale-issues.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/stale-issues.yaml diff --git a/.github/workflows/stale-issues.yaml b/.github/workflows/stale-issues.yaml new file mode 100644 index 000000000..a56181160 --- /dev/null +++ b/.github/workflows/stale-issues.yaml @@ -0,0 +1,21 @@ +name: 'Close stale issues' +on: + schedule: + # once a day at noon + - cron: '0 12 * * *' +permissions: + issues: write +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v6 + with: + days-before-stale: 90 + days-before-close: 14 + stale-issue-message: 'Please update this issue if it applies to the latest AMI release; otherwise it will be closed soon.' + stale-issue-label: 'stale' + exempt-issue-labels: 'never-stale' + # empty message will prevent PR's from being staled + stale-pr-message: '' + debug-only: true \ No newline at end of file From 832c4bb597c977d21382d1f384e56a930dbc7eb0 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 9 Nov 2022 10:28:18 -0800 Subject: [PATCH 105/232] AMI Release v20221104 (#1093) --- CHANGELOG.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4eea99b25..b5d9cef46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,49 @@ # Changelog +### AMI Release v20221104 +* amazon-eks-gpu-node-1.24-v20221104 +* amazon-eks-gpu-node-1.23-v20221104 +* amazon-eks-gpu-node-1.22-v20221104 +* amazon-eks-gpu-node-1.21-v20221104 +* amazon-eks-gpu-node-1.20-v20221104 +* amazon-eks-arm64-node-1.24-v20221104 +* amazon-eks-arm64-node-1.23-v20221104 +* amazon-eks-arm64-node-1.22-v20221104 +* amazon-eks-arm64-node-1.21-v20221104 +* amazon-eks-arm64-node-1.20-v20221104 +* amazon-eks-node-1.24-v20221104 +* amazon-eks-node-1.23-v20221104 +* amazon-eks-node-1.22-v20221104 +* amazon-eks-node-1.21-v20221104 +* amazon-eks-node-1.20-v20221104 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.6-20221104` +* `1.23.9-20221104` +* `1.22.12-20221104` +* `1.21.14-20221104` +* `1.20.15-20221104` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.6/2022-10-05/ +* s3://amazon-eks/1.23.9/2022-07-27/ +* s3://amazon-eks/1.22.12/2022-07-27/ +* s3://amazon-eks/1.21.14/2022-07-27/ +* s3://amazon-eks/1.20.15/2022-07-27/ + +AMI details: +* kernel: 5.4.219-126.411.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.3-1.amzn2.0.2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Adds support for 1.24 with version 1.24.6 +* Upgrades kernel at `5.4.219-126.411.amzn2` to address [known issues with the previous kernel version](https://github.com/awslabs/amazon-eks-ami/issues/1071) + ### AMI Release v20221101 * amazon-eks-gpu-node-1.23-v20221101 * amazon-eks-gpu-node-1.22-v20221101 From 36cbdeb3363e59009548f17e27df1bb9f2a46d57 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 9 Nov 2022 11:10:58 -0800 Subject: [PATCH 106/232] Add vercmp helper (#1094) --- files/bin/vercmp | 90 +++++++++++++++++ files/bootstrap.sh | 11 +-- scripts/install-worker.sh | 6 +- test/cases/vercmp.sh | 199 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 294 insertions(+), 12 deletions(-) create mode 100755 files/bin/vercmp create mode 100755 test/cases/vercmp.sh diff --git a/files/bin/vercmp b/files/bin/vercmp new file mode 100755 index 000000000..8edf7b920 --- /dev/null +++ b/files/bin/vercmp @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +# Comparison expressions for semantic versions. +# only supports semver standard MAJOR.MINOR.PATCH syntax; +# pre-release or build-metadata extensions have undefined behavior. + +set -o errexit +set -o pipefail + +function usage() { + echo "Comparison expressions for semantic versions." + echo + echo "usage: vercmp VERSION_A OPERATOR VERSION_B" + echo + echo "OPERATORS" + echo + echo " lt - Less than" + echo " lteq - Less than or equal to" + echo " eq - Equal to" + echo " gteq - Grater than or equal to" + echo " gt - Greater than" + echo +} + +if [ "$#" -ne 3 ]; then + usage + exit 1 +fi + +LEFT="$1" +OPERATOR="$2" +RIGHT="$3" + +if [ "$LEFT" = "$RIGHT" ]; then + COMPARISON=0 +else + SORTED=($(for VER in "$LEFT" "$RIGHT"; do echo "$VER"; done | sort -V)) + if [ "${SORTED[0]}" = "$LEFT" ]; then + COMPARISON=-1 + else + COMPARISON=1 + fi +fi + +OUTCOME=false + +case $OPERATOR in + lt) + if [ "$COMPARISON" -eq -1 ]; then + OUTCOME=true + fi + ;; + + lteq) + if [ "$COMPARISON" -lt 1 ]; then + OUTCOME=true + fi + ;; + + eq) + if [ "$COMPARISON" -eq 0 ]; then + OUTCOME=true + fi + ;; + + gteq) + if [ "$COMPARISON" -gt -1 ]; then + OUTCOME=true + fi + ;; + + gt) + if [ "$COMPARISON" -eq 1 ]; then + OUTCOME=true + fi + ;; + + *) + usage + exit 1 + ;; +esac + +echo "$OUTCOME" + +if [ "$OUTCOME" = "true" ]; then + exit 0 +else + exit 1 +fi diff --git a/files/bootstrap.sh b/files/bootstrap.sh index c7aa93c22..18f69dd2d 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -138,18 +138,11 @@ set -u KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') echo "Using kubelet version $KUBELET_VERSION" -function is_greater_than_or_equal_to_version() { - local actual_version="$1" - local compared_version="$2" - - [ $actual_version = "$(echo -e "$actual_version\n$compared_version" | sort -V | tail -n1)" ] -} - # As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd # and no longer support docker as a container runtime. IS_124_OR_GREATER=false DEFAULT_CONTAINER_RUNTIME=dockerd -if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.24.0"; then +if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then IS_124_OR_GREATER=true DEFAULT_CONTAINER_RUNTIME=containerd fi @@ -467,7 +460,7 @@ else fi INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') -if is_greater_than_or_equal_to_version $KUBELET_VERSION "1.22.0"; then +if vercmp "$KUBELET_VERSION" gteq "1.22.0"; then # for K8s versions that suport API Priority & Fairness, increase our API server QPS echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG fi diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index f329cd996..744fd659a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -166,7 +166,7 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then # enable CredentialProviders features in kubelet-containerd service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service @@ -278,7 +278,7 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi -if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then # enable CredentialProviders feature flags in kubelet service file IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service @@ -316,7 +316,7 @@ fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ -if [[ ! $KUBERNETES_VERSION =~ "1.19"* && ! $KUBERNETES_VERSION =~ "1.20"* && ! $KUBERNETES_VERSION =~ "1.21"* ]]; then +if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then ECR_BINARY="ecr-credential-provider" if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." diff --git a/test/cases/vercmp.sh b/test/cases/vercmp.sh new file mode 100755 index 000000000..d020c7d81 --- /dev/null +++ b/test/cases/vercmp.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should compare strictly less-than" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" lt "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lt "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lt "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" lt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare less-than-or-equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lteq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" lteq "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.1" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" lteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare strictly equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" eq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.1" eq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" eq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare greater-than-or-equal-to" +# should succeed +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "2.0.0" gteq "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gteq "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gteq "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi + +echo "--> Should compare strictly greater-than" +# should succeed +EXIT_CODE=0 +vercmp "2.0.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.1" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.1.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +# should fail +EXIT_CODE=0 +vercmp "1.0.0" gt "1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +EXIT_CODE=0 +vercmp "1.0.0" gt "2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi From 794ed5f10842b436e10c9bc89ee41491a6494ade Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 9 Nov 2022 11:21:36 -0800 Subject: [PATCH 107/232] Install awscli v2 bundle when possible (#1066) * Install awscli v2 bundle when possible * Correct awscli version parsing * Apply formatting --- scripts/generate-version-info.sh | 1 + scripts/install-worker.sh | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index 22ef83a45..f35243936 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -17,3 +17,4 @@ sudo rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' # binaries echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE +echo $(jq ".binaries.awscli = \"$(aws --version | awk '{print $1}' | cut -d '/' -f 2)\"" $OUTPUT_FILE) > $OUTPUT_FILE diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 744fd659a..0b1bc8964 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -62,7 +62,6 @@ sudo yum update -y # Install necessary packages sudo yum install -y \ aws-cfn-bootstrap \ - awscli \ chrony \ conntrack \ curl \ @@ -118,6 +117,27 @@ sudo systemctl restart sshd.service sudo mkdir -p /etc/eks sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service +################################################################################ +### awscli ##################################################### +################################################################################ + +if [[ "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]]; then + # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html + echo "Installing awscli v2 bundle" + AWSCLI_DIR=$(mktemp -d) + curl \ + --silent \ + --show-error \ + --retry 10 \ + --retry-delay 1 \ + -L "https://awscli.amazonaws.com/awscli-exe-linux-${MACHINE}.zip" -o "${AWSCLI_DIR}/awscliv2.zip" + unzip -q "${AWSCLI_DIR}/awscliv2.zip" -d ${AWSCLI_DIR} + sudo "${AWSCLI_DIR}/aws/install" +else + echo "Installing awscli package" + sudo yum install -y awscli +fi + ################################################################################ ### Docker ##################################################################### ################################################################################ From 614d623c807b33d482f3830d1035ea66754d056c Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Thu, 10 Nov 2022 10:33:53 -0600 Subject: [PATCH 108/232] Log collector enhancements (#1092) * save region information This can be looked up from the instance ID, but it's convenient to have it easily parsable from the log archive. * dump containerd goroutines Send containerd a USR1 signal which triggers it to dump goroutines. These are saved to the log as well as dumped to a separate log that we archive. * bump log collector version --- .../linux/eks-log-collector.sh | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index b538dd97a..e7d45f9d1 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.2" +readonly PROGRAM_VERSION="0.7.3" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -195,7 +195,7 @@ get_instance_id() { cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) else - readonly INSTANCE_ID=$(curl --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) + readonly INSTANCE_ID=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) if [ 0 -eq $? ]; then # Check if previous command was successful. echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt else @@ -204,6 +204,20 @@ get_instance_id() { fi } +get_region() { + if REGION=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/region); then + echo "${REGION}" > "${COLLECT_DIR}"/system/region.txt + else + warning "Unable to find EC2 Region, skipping." + fi + + if AZ=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/availability-zone); then + echo "${AZ}" > "${COLLECT_DIR}"/system/availability-zone.txt + else + warning "Unable to find EC2 AZ, skipping." + fi +} + is_diskfull() { local threshold local result @@ -242,6 +256,7 @@ collect() { init is_diskfull get_instance_id + get_region get_common_logs get_kernel_info get_mounts_info @@ -578,8 +593,11 @@ get_containerd_info() { try "Collect Containerd daemon information" if [[ "$(pgrep -o containerd)" -ne 0 ]]; then + # force containerd to dump goroutines + timeout 75 killall -sUSR1 containerd timeout 75 containerd config dump > "${COLLECT_DIR}"/containerd/containerd-config.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " timeout 75 journalctl -u containerd > "${COLLECT_DIR}"/containerd/containerd-log.txt 2>&1 || echo -e "\tTimed out, ignoring \"containerd info output \" " + timeout 75 cp -f /tmp/containerd.*.stacks.log "${COLLECT_DIR}"/containerd/ else warning "The Containerd daemon is not running." fi From a521047d1b097b9c3dbb562ca9bdab5a641f347f Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 10 Nov 2022 16:27:18 -0500 Subject: [PATCH 109/232] Update credential provider API to beta for 1.24+ (#1089) --- CONTRIBUTING.md | 15 +++++ files/bootstrap.sh | 8 +++ files/ecr-credential-provider-config | 6 +- test/Dockerfile | 3 + test/cases/ecr-credential-provider-config.sh | 70 ++++++++++++++++++++ test/cases/vercmp.sh | 60 +++++++++++++++++ test/test-harness.sh | 3 +- 7 files changed, 160 insertions(+), 5 deletions(-) create mode 100755 test/cases/ecr-credential-provider-config.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0576691bc..2d6946816 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -115,6 +115,21 @@ Delete the cluster: eksctl delete cluster -f cluster.yaml ``` +## Troubleshooting + +**Tests fail with `realpath: command not found`** + +When running `make test`, you may see a message like below: + +``` +test/test-harness.sh: line 41: realpath: command not found +/entrypoint.sh: line 13: /test.sh: No such file or directory +``` + +The issue is discussed in [this StackExchange post](https://unix.stackexchange.com/questions/101080/realpath-command-not-found). + +On OSX, running `brew install coreutils` resolves the issue. + ## Finding contributions to work on Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-eks-ami/labels/help%20wanted) issues is a great place to start. diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 18f69dd2d..24af30233 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -145,6 +145,14 @@ DEFAULT_CONTAINER_RUNTIME=dockerd if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then IS_124_OR_GREATER=true DEFAULT_CONTAINER_RUNTIME=containerd +elif vercmp "$KUBELET_VERSION" gteq "1.22.0"; then + # Ensure that these exist for testing purposes + mkdir -p /etc/eks/ecr-credential-provider + touch /etc/eks/ecr-credential-provider/ecr-credential-provider-config + # These APIs are only available in alpha pre-1.24. + # This can be removed when version 1.23 is no longer supported. + sed -i s,kubelet.config.k8s.io/v1beta1,kubelet.config.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config + sed -i s,credentialprovider.kubelet.k8s.io/v1beta1,credentialprovider.kubelet.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config fi # Set container runtime related variables diff --git a/files/ecr-credential-provider-config b/files/ecr-credential-provider-config index d6117ceb4..ae1f9d7a5 100644 --- a/files/ecr-credential-provider-config +++ b/files/ecr-credential-provider-config @@ -1,4 +1,4 @@ -apiVersion: kubelet.config.k8s.io/v1alpha1 +apiVersion: kubelet.config.k8s.io/v1beta1 kind: CredentialProviderConfig providers: - name: ecr-credential-provider @@ -9,6 +9,6 @@ providers: - "*.dkr.ecr.us-iso-east-1.c2s.ic.gov" - "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" defaultCacheDuration: "12h" - apiVersion: credentialprovider.kubelet.k8s.io/v1alpha1 + apiVersion: credentialprovider.kubelet.k8s.io/v1beta1 args: - - get-credentials \ No newline at end of file + - get-credentials diff --git a/test/Dockerfile b/test/Dockerfile index cf0d7c020..4114980b1 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,6 +1,9 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 RUN yum install -y jq +RUN yum install -y wget +RUN wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 +RUN chmod a+x /usr/local/bin/yq ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock diff --git a/test/cases/ecr-credential-provider-config.sh b/test/cases/ecr-credential-provider-config.sh new file mode 100755 index 000000000..e8339540c --- /dev/null +++ b/test/cases/ecr-credential-provider-config.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 +TEMP_DIR=$(mktemp -d) + +# Setup test cases +mkdir -p /etc/eks/ecr-credential-provider +export CRED_PROVIDER_FILE="/etc/eks/ecr-credential-provider/ecr-credential-provider-config" + +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.24" + +# Ensure the credential provider config is present and fresh +cp /etc/eks/ecr-credential-provider-config $CRED_PROVIDER_FILE +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.22.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1alpha1" +actual=$(yq e '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1alpha1" +actual=$(yq e '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi + +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1beta1 and kubelet.config.k8s.io/v1beta1 when at or above k8s version 1.24" + +# Ensure the credential provider config is present and fresh +cp /etc/eks/ecr-credential-provider-config $CRED_PROVIDER_FILE +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1beta1" +actual=$(yq e '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.24 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1beta1" +actual=$(yq e '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.24 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi + +exit_code=0 diff --git a/test/cases/vercmp.sh b/test/cases/vercmp.sh index d020c7d81..93c8b24df 100755 --- a/test/cases/vercmp.sh +++ b/test/cases/vercmp.sh @@ -24,6 +24,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" lt "v1.1.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.0" lt "1.0.0" || EXIT_CODE=$? @@ -49,6 +55,12 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v2.0.0" lt "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi echo "--> Should compare less-than-or-equal-to" # should succeed @@ -70,6 +82,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" lteq "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.1" lteq "1.0.0" || EXIT_CODE=$? @@ -89,6 +107,12 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v2.0.0" lteq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi echo "--> Should compare strictly equal-to" # should succeed @@ -98,6 +122,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" eq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.1" eq "1.0.0" || EXIT_CODE=$? @@ -111,6 +141,12 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" eq "v1.0.1" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi echo "--> Should compare greater-than-or-equal-to" # should succeed @@ -132,6 +168,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v2.0.0" gteq "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.0" gteq "1.0.1" || EXIT_CODE=$? @@ -151,6 +193,12 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" gteq "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi echo "--> Should compare strictly greater-than" # should succeed @@ -172,6 +220,12 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.1.0" gt "v1.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi # should fail EXIT_CODE=0 vercmp "1.0.0" gt "1.0.0" || EXIT_CODE=$? @@ -197,3 +251,9 @@ if [[ ${EXIT_CODE} -eq 0 ]]; then echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" exit 1 fi +EXIT_CODE=0 +vercmp "v1.0.0" gt "v2.0.0" || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${EXIT_CODE}'" + exit 1 +fi diff --git a/test/test-harness.sh b/test/test-harness.sh index a04558086..b0cc2180f 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -38,8 +38,7 @@ docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/. overall_status=0 function run() { - docker run -v ${SCRIPTPATH}/../files/:/etc/eks/ \ - -v "$(realpath $1):/test.sh" \ + docker run -v "$(realpath $1):/test.sh" \ --attach STDOUT \ --attach STDERR \ --rm \ From ce1c11f9db5bf5a730e978e74e13174d4b9f73a3 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 10 Nov 2022 18:00:52 -0500 Subject: [PATCH 110/232] Update client.authentication.k8s.io to v1beta1 (#1095) --- Makefile | 12 ++++++++---- files/bootstrap.sh | 3 --- files/kubelet-kubeconfig | 4 ++-- scripts/install-worker.sh | 10 ++++++++++ test/Dockerfile | 9 +++++---- test/cases/ecr-credential-provider-config.sh | 17 +++++++++++------ 6 files changed, 36 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 30ea43e98..43dd76ca0 100644 --- a/Makefile +++ b/Makefile @@ -78,19 +78,23 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.20 1.20: ## Build EKS Optimized AL2 AMI - K8s 1.20 - $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-10-31 pull_cni_from_github=true .PHONY: 1.21 1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 - $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-10-31 pull_cni_from_github=true .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.12 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.15 kubernetes_build_date=2022-10-31 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.9 kubernetes_build_date=2022-07-27 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.13 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + +.PHONY: 1.24 +1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 + $(MAKE) k8s kubernetes_version=1.24.7 kubernetes_build_date=2022-10-31 pull_cni_from_github=true .PHONY: help help: ## Display help diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 24af30233..dddb136b7 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -146,9 +146,6 @@ if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then IS_124_OR_GREATER=true DEFAULT_CONTAINER_RUNTIME=containerd elif vercmp "$KUBELET_VERSION" gteq "1.22.0"; then - # Ensure that these exist for testing purposes - mkdir -p /etc/eks/ecr-credential-provider - touch /etc/eks/ecr-credential-provider/ecr-credential-provider-config # These APIs are only available in alpha pre-1.24. # This can be removed when version 1.23 is no longer supported. sed -i s,kubelet.config.k8s.io/v1beta1,kubelet.config.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config diff --git a/files/kubelet-kubeconfig b/files/kubelet-kubeconfig index 91c25cf29..e8ad7efd4 100644 --- a/files/kubelet-kubeconfig +++ b/files/kubelet-kubeconfig @@ -15,11 +15,11 @@ users: - name: kubelet user: exec: - apiVersion: client.authentication.k8s.io/v1alpha1 + apiVersion: client.authentication.k8s.io/v1beta1 command: /usr/bin/aws-iam-authenticator args: - "token" - "-i" - "CLUSTER_NAME" - --region - - "AWS_REGION" \ No newline at end of file + - "AWS_REGION" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 0b1bc8964..023c6cd84 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -260,6 +260,16 @@ for binary in ${BINARIES[*]}; do sudo mv $binary /usr/bin/ done +# Verify that the aws-iam-authenticator is at last v0.5.9 or greater. Otherwise, nodes will be +# unable to join clusters due to upgrading to client.authentication.k8s.io/v1beta1 +iam_auth_version=$(sudo /usr/bin/aws-iam-authenticator version | jq -r .Version) +if vercmp "$iam_auth_version" lt "v0.5.9"; then + # To resolve this issue, you need to update the aws-iam-authenticator binary. Using binaries distributed by EKS + # with kubernetes_build_date 2022-10-31 or later include v0.5.10 or greater. + echo "❌ The aws-iam-authenticator should be on version v0.5.9 or later. Found $iam_auth_version" + exit 1 +fi + # Since CNI 0.7.0, all releases are done in the plugins repo. CNI_PLUGIN_FILENAME="cni-plugins-linux-${ARCH}-${CNI_PLUGIN_VERSION}" diff --git a/test/Dockerfile b/test/Dockerfile index 4114980b1..f30bf14ef 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,14 +1,15 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 -RUN yum install -y jq -RUN yum install -y wget -RUN wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -RUN chmod a+x /usr/local/bin/yq +RUN yum install -y jq && \ + yum install -y wget && \ + wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 && \ + chmod a+x /usr/local/bin/yq ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig +COPY files/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config COPY test/entrypoint.sh /entrypoint.sh COPY files /etc/eks COPY files/bin/* /usr/bin/ diff --git a/test/cases/ecr-credential-provider-config.sh b/test/cases/ecr-credential-provider-config.sh index e8339540c..5d4856ed1 100755 --- a/test/cases/ecr-credential-provider-config.sh +++ b/test/cases/ecr-credential-provider-config.sh @@ -4,14 +4,20 @@ set -euo pipefail exit_code=0 TEMP_DIR=$(mktemp -d) -# Setup test cases -mkdir -p /etc/eks/ecr-credential-provider export CRED_PROVIDER_FILE="/etc/eks/ecr-credential-provider/ecr-credential-provider-config" +export CRED_PROVIDER_RESET_FILE="./cred-provider-config" + +# Store the original version of the config +cp $CRED_PROVIDER_FILE $CRED_PROVIDER_RESET_FILE +# Reset the file that may have changed +function reset_scenario { + echo "Resetting test scenario" + cp $CRED_PROVIDER_RESET_FILE $CRED_PROVIDER_FILE +} echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.24" +reset_scenario -# Ensure the credential provider config is present and fresh -cp /etc/eks/ecr-credential-provider-config $CRED_PROVIDER_FILE # This variable is used to override the default value in the kubelet mock export KUBELET_VERSION=v1.22.15-eks-ba74326 /etc/eks/bootstrap.sh \ @@ -39,9 +45,8 @@ if [[ "$expected_kubelet_config_api" != "$actual" ]]; then fi echo "--> Should default to credentialprovider.kubelet.k8s.io/v1beta1 and kubelet.config.k8s.io/v1beta1 when at or above k8s version 1.24" +reset_scenario -# Ensure the credential provider config is present and fresh -cp /etc/eks/ecr-credential-provider-config $CRED_PROVIDER_FILE export KUBELET_VERSION=v1.24.15-eks-ba74326 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ From 4dc2e4377ea02bf67d337ddd6e763ffd2841e40e Mon Sep 17 00:00:00 2001 From: Steven Davidovitz Date: Fri, 11 Nov 2022 18:03:04 -0800 Subject: [PATCH 111/232] Configure containerd registry certificates by default (#1049) --- files/bootstrap.sh | 2 ++ files/containerd-config.toml | 3 ++ test/Dockerfile | 8 ++++-- test/cases/container-runtime-defaults.sh | 1 - test/cases/containerd-config.sh | 35 ++++++++++++++++++++++++ test/mocks/sudo | 2 +- 6 files changed, 46 insertions(+), 5 deletions(-) create mode 100755 test/cases/containerd-config.sh diff --git a/files/bootstrap.sh b/files/bootstrap.sh index dddb136b7..76228fa63 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -540,6 +540,8 @@ EOF sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/sandbox-image.service + # Validate containerd config + sudo containerd config dump > /dev/null systemctl daemon-reload systemctl enable containerd systemctl restart containerd diff --git a/files/containerd-config.toml b/files/containerd-config.toml index 8a668ce84..1cddeb2f6 100644 --- a/files/containerd-config.toml +++ b/files/containerd-config.toml @@ -11,6 +11,9 @@ default_runtime_name = "runc" [plugins."io.containerd.grpc.v1.cri"] sandbox_image = "SANDBOX_IMAGE" +[plugins."io.containerd.grpc.v1.cri".registry] +config_path = "/etc/containerd/certs.d:/etc/docker/certs.d" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runc.v2" diff --git a/test/Dockerfile b/test/Dockerfile index f30bf14ef..9aaa44905 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,17 +1,19 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 -RUN yum install -y jq && \ - yum install -y wget && \ +RUN amazon-linux-extras enable docker && \ + yum install -y jq containerd wget && \ wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 && \ chmod a+x /usr/local/bin/yq ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock +RUN mkdir -p /etc/eks/containerd +COPY files/ /etc/eks/ +COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig COPY files/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config COPY test/entrypoint.sh /entrypoint.sh -COPY files /etc/eks COPY files/bin/* /usr/bin/ COPY test/mocks/ /sbin/ ENTRYPOINT ["/entrypoint.sh"] diff --git a/test/cases/container-runtime-defaults.sh b/test/cases/container-runtime-defaults.sh index d4aaf407e..8218867c5 100755 --- a/test/cases/container-runtime-defaults.sh +++ b/test/cases/container-runtime-defaults.sh @@ -2,7 +2,6 @@ set -euo pipefail exit_code=0 -TEMP_DIR=$(mktemp -d) echo "--> Should allow dockerd as container runtime when below k8s version 1.24" # This variable is used to override the default value in the kubelet mock diff --git a/test/cases/containerd-config.sh b/test/cases/containerd-config.sh new file mode 100755 index 000000000..048ea9dfe --- /dev/null +++ b/test/cases/containerd-config.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +exit_code=0 + +echo "--> Default containerd config file should be valid" +STDERR_FILE=$(mktemp) +containerd -c /etc/eks/containerd/containerd-config.toml config dump > /dev/null 2> "$STDERR_FILE" || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: default containerd config file is invalid! $(cat "$STDERR_FILE")" + exit 1 +fi + +echo "--> Should fail when given an invalid containerd config" +CONTAINERD_TOML=$(mktemp containerd-XXXXX.toml) +cat > "$CONTAINERD_TOML" << EOF +[cgroup] +path = "foo" +[cgroup] +path = "bar" +EOF + +export KUBELET_VERSION=v1.24.15-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + --container-runtime containerd \ + --containerd-config-file "$CONTAINERD_TOML" \ + test || exit_code=$? + +if [[ ${exit_code} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi diff --git a/test/mocks/sudo b/test/mocks/sudo index e485cef2e..7bc3d2ffb 100755 --- a/test/mocks/sudo +++ b/test/mocks/sudo @@ -1,4 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -echo "mocking sudo with params $@" +exec "$@" From 90c5e73fbda727d200dfd0d787337a3adfc55d13 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 14 Nov 2022 21:05:27 -0800 Subject: [PATCH 112/232] Update CHANGELOG.md for release v20221112 (#1101) --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5d9cef46..a305f1061 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +### AMI Release v20221112 +* amazon-eks-gpu-node-1.24-v20221112 +* amazon-eks-gpu-node-1.23-v20221112 +* amazon-eks-gpu-node-1.22-v20221112 +* amazon-eks-gpu-node-1.21-v20221112 +* amazon-eks-gpu-node-1.20-v20221112 +* amazon-eks-arm64-node-1.24-v20221112 +* amazon-eks-arm64-node-1.23-v20221112 +* amazon-eks-arm64-node-1.22-v20221112 +* amazon-eks-arm64-node-1.21-v20221112 +* amazon-eks-arm64-node-1.20-v20221112 +* amazon-eks-node-1.24-v20221112 +* amazon-eks-node-1.23-v20221112 +* amazon-eks-node-1.22-v20221112 +* amazon-eks-node-1.21-v20221112 +* amazon-eks-node-1.20-v20221112 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20221112` +* `1.23.13-20221112` +* `1.22.15-20221112` +* `1.21.14-20221112` +* `1.20.15-20221112` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.219-126.411.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: runc-1.1.4-1.amzn2 +* cuda: 470.141.03-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Upgrades `runc` to version `1.1.4` +* Updates [aws-iam-authenticator](https://github.com/kubernetes-sigs/aws-iam-authenticator) to version `0.5.10` and updates `kubelet` versions to `1.22.15`, `1.23.13` and `1.24.7` +* [Updates `client.authentication.k8s.io` to `v1beta1`](https://github.com/awslabs/amazon-eks-ami/commit/ce1c11f9db5bf5a730e978e74e13174d4b9f73a3) +* [Updates credential provider API to beta for Kubernetes versions `1.24+`](https://github.com/awslabs/amazon-eks-ami/commit/a521047d1b097b9c3dbb562ca9bdab5a641f347f) +* [Installs awscli v2 bundle when possible](https://github.com/awslabs/amazon-eks-ami/commit/794ed5f10842b436e10c9bc89ee41491a6494ade) + ### AMI Release v20221104 * amazon-eks-gpu-node-1.24-v20221104 * amazon-eks-gpu-node-1.23-v20221104 From 524312f6d5f9763a641cbc4a2414ac0fcca38fb6 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 17 Nov 2022 10:22:11 -0600 Subject: [PATCH 113/232] sym-link awscli to /bin (#1102) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 023c6cd84..6c3fa305a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -132,7 +132,7 @@ if [[ "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != " --retry-delay 1 \ -L "https://awscli.amazonaws.com/awscli-exe-linux-${MACHINE}.zip" -o "${AWSCLI_DIR}/awscliv2.zip" unzip -q "${AWSCLI_DIR}/awscliv2.zip" -d ${AWSCLI_DIR} - sudo "${AWSCLI_DIR}/aws/install" + sudo "${AWSCLI_DIR}/aws/install" --bin-dir /bin/ else echo "Installing awscli package" sudo yum install -y awscli From 670b3f2a1a24fee9b44a962347b28ad7a8dd971d Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 18 Nov 2022 09:58:12 -0800 Subject: [PATCH 114/232] =?UTF-8?q?Add=201.24=20to=20=E2=80=98all=E2=80=99?= =?UTF-8?q?=20target=20(#1104)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43dd76ca0..e66a9988b 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.20 1.21 1.22 1.23 ## Build all versions of EKS Optimized AL2 AMI +all: 1.20 1.21 1.22 1.23 1.24 ## Build all versions of EKS Optimized AL2 AMI # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ From 057f3e47b6c26f26f46a40d380112ea8598ca537 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Fri, 18 Nov 2022 13:07:53 -0600 Subject: [PATCH 115/232] Cache pause, vpc-cni, and kube-proxy images during build (#938) --- Makefile | 2 +- README.md | 98 +++++++++++++++++++++ eks-worker-al2.json | 9 +- files/bootstrap.sh | 74 +++------------- files/get-ecr-uri.sh | 54 ++++++++++++ files/pull-image.sh | 27 ++++++ files/pull-sandbox-image.sh | 28 +----- scripts/install-worker.sh | 171 ++++++++++++++++++++++++++++-------- 8 files changed, 338 insertions(+), 125 deletions(-) create mode 100755 files/get-ecr-uri.sh create mode 100755 files/pull-image.sh diff --git a/Makefile b/Makefile index e66a9988b..f3986ee9a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry ami_regions volume_type +PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable eks-worker-al2.json | grep 'template-variable' | awk -F ',' '{print $$4}') K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) diff --git a/README.md b/README.md index 21c4b6641..1158ef702 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,104 @@ Provisioner](https://www.packer.io/docs/provisioners/shell.html) runs the necessary configuration tasks. Then, Packer creates an AMI from the instance and terminates the instance after the AMI is created. +### Container Image Caching + +Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. + +To turn on container image caching: + +``` +cache_container_images=true make 1.23 +``` + +When container image caching is enabled, the following images are cached: + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-minimal-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/pause:3.5 + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni-init: + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni: + +The account ID can be different depending on the region and partition you are building the AMI in. See [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html) for more details. + +Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. + +The images listed above are also tagged with each region in the partition the AMI is built in, since images are often built in one region and copied to others within the same partition. Images that are available to pull from an ECR FIPS endpoint are also tagged as such (i.e. `602401143452.dkr.ecr-fips.us-east-1.amazonaws.com/eks/pause:3.5`). + +When listing images on a node, you'll notice a long list of images. However, most of these images are simply tagged in different ways with no storage overhead. Images cached in the AMI total around 1.0 GiB. In general, a node with no images cached using the VPC CNI will use around 500 MiB of images when in a `Ready` state with no other pods running on the node. + +### IAM Permissions + +To build the EKS Optimized AMI, you will need the following permissions: + +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CopyImage", + "ec2:CreateImage", + "ec2:CreateKeypair", + "ec2:CreateSecurityGroup", + "ec2:CreateSnapshot", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeleteKeyPair", + "ec2:DeleteSecurityGroup", + "ec2:DeleteSnapshot", + "ec2:DeleteVolume", + "ec2:DeregisterImage", + "ec2:DescribeImageAttribute", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceStatus", + "ec2:DescribeRegions", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSnapshots", + "ec2:DescribeSubnets", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DetachVolume", + "ec2:GetPasswordData", + "ec2:ModifyImageAttribute", + "ec2:ModifyInstanceAttribute", + "ec2:ModifySnapshotAttribute", + "ec2:RegisterImage", + "ec2:RunInstances", + "ec2:StopInstances", + "ec2:TerminateInstances", + "eks:DescribeAddonVersions", + "ecr:GetAuthorizationToken" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer" + ], + "Resource": "arn:aws:ecr:us-west-2:602401143452:repository/*" + }, + { + "Effect": "Allow", + "Action": [ + "s3:GetObject" + ], + "Resource": "arn:aws:s3:::amazon-eks/*" + } + ] +} +``` + +You will need to use the region you are building the AMI in to specify the ECR repository resource in the second IAM statement. You may also need to change the account if you are building the AMI in a different partition or special region. You can see a mapping of regions to account ID [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html). +If you're using a custom s3 bucket to vend different K8s binaries, you will need to change the resource in the third IAM statement above to reference your custom bucket. +For more information about the permissions required by Packer with different configurations, see the [docs](https://www.packer.io/plugins/builders/amazon#iam-task-or-instance-role). + ## Using the AMI If you are just getting started with Amazon EKS, we recommend that you follow diff --git a/eks-worker-al2.json b/eks-worker-al2.json index dda34dd39..6d3278cb0 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -13,6 +13,7 @@ "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", + "cache_container_images": "false", "cni_plugin_version": "v0.8.6", "containerd_version": "1.6.6-1.amzn2.0.2", "creator": "{{env `USER`}}", @@ -23,7 +24,8 @@ "kms_key_id": "", "kubernetes_build_date": null, "kubernetes_version": null, - "launch_block_device_mappings_volume_size": "4", + "launch_block_device_mappings_volume_size": "8", + "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", "runc_version": "1.1.3-1.amzn2.0.2", @@ -161,7 +163,10 @@ "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", "AWS_SESSION_TOKEN={{user `aws_session_token`}}", - "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}" + "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}", + "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", + "KUBE_PROXY_VERSION_SUFFIX={{user `kube_proxy_version_suffix`}}", + "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}" ] }, { diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 76228fa63..78e9d4fab 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -178,51 +178,6 @@ SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" -function get_pause_container_account_for_region() { - local region="$1" - case "${region}" in - ap-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-800184023465}" - ;; - me-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-558608220178}" - ;; - cn-north-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-918309763551}" - ;; - cn-northwest-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-961992271922}" - ;; - us-gov-west-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-013241004608}" - ;; - us-gov-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-151742754352}" - ;; - us-iso-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-725322719131}" - ;; - us-isob-east-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-187977181151}" - ;; - af-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-877085696533}" - ;; - eu-south-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-590381155156}" - ;; - ap-southeast-3) - echo "${PAUSE_CONTAINER_ACCOUNT:-296578399912}" - ;; - me-central-1) - echo "${PAUSE_CONTAINER_ACCOUNT:-759879836304}" - ;; - *) - echo "${PAUSE_CONTAINER_ACCOUNT:-602401143452}" - ;; - esac -} - # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage # of the resource to reserve. Note that we return zero if the start of the resource range is @@ -314,8 +269,8 @@ if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then exit 1 fi -PAUSE_CONTAINER_ACCOUNT=$(get_pause_container_account_for_region "${AWS_DEFAULT_REGION}") -PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$PAUSE_CONTAINER_ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.$AWS_SERVICES_DOMAIN/eks/pause} +ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") +PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$ECR_URI/eks/pause} PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" ### kubelet kubeconfig @@ -525,29 +480,26 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d - mkdir -p /etc/systemd/system/containerd.service.d - cat << EOF > /etc/systemd/system/containerd.service.d/10-compat-symlink.conf -[Service] -ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock -EOF if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml fi echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml - sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml - sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + + # Check if the containerd config file is the same as the one used in the image build. + # If different, then restart containerd w/ proper config + if ! cmp -s /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml; then + sudo cp -v /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo chown root:root /etc/systemd/system/sandbox-image.service + systemctl daemon-reload + systemctl enable containerd sandbox-image + systemctl restart sandbox-image containerd + fi sudo cp -v /etc/eks/containerd/kubelet-containerd.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service - sudo chown root:root /etc/systemd/system/sandbox-image.service # Validate containerd config sudo containerd config dump > /dev/null - systemctl daemon-reload - systemctl enable containerd - systemctl restart containerd - systemctl enable sandbox-image - systemctl start sandbox-image - elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh new file mode 100755 index 000000000..f5e87b932 --- /dev/null +++ b/files/get-ecr-uri.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +set -euo pipefail + +# More details about the mappings in this file can be found here https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html + +region=$1 +aws_domain=$2 +if [[ $# -eq 3 ]] && [[ ! -z $3 ]]; then + acct=$3 +else + case "${region}" in + ap-east-1) + acct="800184023465" + ;; + me-south-1) + acct="558608220178" + ;; + cn-north-1) + acct="918309763551" + ;; + cn-northwest-1) + acct="961992271922" + ;; + us-gov-west-1) + acct="013241004608" + ;; + us-gov-east-1) + acct="151742754352" + ;; + us-iso-east-1) + acct="725322719131" + ;; + us-isob-east-1) + acct="187977181151" + ;; + af-south-1) + acct="877085696533" + ;; + eu-south-1) + acct="590381155156" + ;; + ap-southeast-3) + acct="296578399912" + ;; + me-central-1) + acct="759879836304" + ;; + *) + acct="602401143452" + ;; + esac +fi + +echo "${acct}.dkr.ecr.${region}.${aws_domain}" diff --git a/files/pull-image.sh b/files/pull-image.sh new file mode 100755 index 000000000..2d37d88b9 --- /dev/null +++ b/files/pull-image.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +img=$1 +region=$(echo "${img}" | cut -f4 -d ".") +MAX_RETRIES=3 + +function retry() { + local rc=0 + for attempt in $(seq 0 $MAX_RETRIES); do + rc=0 + [[ $attempt -gt 0 ]] && echo "Attempt $attempt of $MAX_RETRIES" 1>&2 + "$@" + rc=$? + [[ $rc -eq 0 ]] && break + [[ $attempt -eq $MAX_RETRIES ]] && exit $rc + local jitter=$((1 + RANDOM % 10)) + local sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" + sleep $sleep_sec + done +} + +ecr_password=$(retry aws ecr get-login-password --region $region) +if [[ -z ${ecr_password} ]]; then + echo >&2 "Unable to retrieve the ECR password." + exit 1 +fi +retry sudo ctr --namespace k8s.io image pull "${img}" --user AWS:${ecr_password} diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index 270be7d32..523e7ed4c 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -1,27 +1,5 @@ #!/usr/bin/env bash +set -euo pipefail -### fetching sandbox image from /etc/containerd/config.toml -sandbox_image=$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml) -region=$(echo "$sandbox_image" | cut -f4 -d ".") -ecr_password=$(aws ecr get-login-password --region $region) -API_RETRY_ATTEMPTS=5 - -for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do - rc=0 - if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" - fi - ### pull sandbox image from ecr - ### username will always be constant i.e; AWS - sudo ctr --namespace k8s.io image pull $sandbox_image --user AWS:$ecr_password - rc=$? - if [[ $rc -eq 0 ]]; then - break - fi - if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then - exit $rc - fi - jitter=$((1 + RANDOM % 10)) - sleep_sec="$(($((5 << $((1 + $attempt)))) + $jitter))" - sleep $sleep_sec -done +sandbox_image="$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml)" +/etc/eks/containerd/pull-image.sh "${sandbox_image}" diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 6c3fa305a..154e6dfa3 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -4,6 +4,7 @@ set -o pipefail set -o nounset set -o errexit IFS=$'\n\t' +export AWS_DEFAULT_OUTPUT="json" TEMPLATE_DIR=${TEMPLATE_DIR:-/tmp/worker} @@ -30,6 +31,8 @@ validate_env_set CNI_PLUGIN_VERSION validate_env_set KUBERNETES_VERSION validate_env_set KUBERNETES_BUILD_DATE validate_env_set PULL_CNI_FROM_GITHUB +validate_env_set PAUSE_CONTAINER_VERSION +validate_env_set CACHE_CONTAINER_IMAGES ################################################################################ ### Machine Architecture ####################################################### @@ -138,46 +141,18 @@ else sudo yum install -y awscli fi -################################################################################ -### Docker ##################################################################### -################################################################################ - -sudo yum install -y device-mapper-persistent-data lvm2 - -INSTALL_DOCKER="${INSTALL_DOCKER:-true}" -if [[ "$INSTALL_DOCKER" == "true" ]]; then - sudo amazon-linux-extras enable docker - sudo groupadd -og 1950 docker - sudo useradd --gid $(getent group docker | cut -d: -f3) docker - - # install runc and lock version - sudo yum install -y runc-${RUNC_VERSION} - sudo yum versionlock runc-* - - # install containerd and lock version - sudo yum install -y containerd-${CONTAINERD_VERSION} - sudo yum versionlock containerd-* - - # install docker and lock version - sudo yum install -y docker-${DOCKER_VERSION}* - sudo yum versionlock docker-* - sudo usermod -aG docker $USER - - # Remove all options from sysconfig docker. - sudo sed -i '/OPTIONS/d' /etc/sysconfig/docker - - sudo mkdir -p /etc/docker - sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json - sudo chown root:root /etc/docker/daemon.json - - # Enable docker daemon to start on boot. - sudo systemctl daemon-reload -fi - ############################################################################### ### Containerd setup ########################################################## ############################################################################### +# install runc and lock version +sudo yum install -y runc-${RUNC_VERSION} +sudo yum versionlock runc-* + +# install containerd and lock version +sudo yum install -y containerd-${CONTAINERD_VERSION} +sudo yum versionlock containerd-* + sudo mkdir -p /etc/eks/containerd if [ -f "/etc/eks/containerd/containerd-config.toml" ]; then ## this means we are building a gpu ami and have already placed a containerd configuration file in /etc/eks @@ -195,7 +170,15 @@ fi sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh +sudo mv $TEMPLATE_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh +sudo chmod +x /etc/eks/containerd/pull-image.sh + +sudo mkdir -p /etc/systemd/system/containerd.service.d +cat << EOF | sudo tee /etc/systemd/system/containerd.service.d/10-compat-symlink.conf +[Service] +ExecStartPre=/bin/ln -sf /run/containerd/containerd.sock /run/dockershim.sock +EOF cat << EOF | sudo tee -a /etc/modules-load.d/containerd.conf overlay @@ -208,6 +191,34 @@ net.bridge.bridge-nf-call-iptables = 1 net.ipv4.ip_forward = 1 EOF +################################################################################ +### Docker ##################################################################### +################################################################################ + +sudo yum install -y device-mapper-persistent-data lvm2 + +INSTALL_DOCKER="${INSTALL_DOCKER:-true}" +if [[ "$INSTALL_DOCKER" == "true" ]]; then + sudo amazon-linux-extras enable docker + sudo groupadd -og 1950 docker + sudo useradd --gid $(getent group docker | cut -d: -f3) docker + + # install docker and lock version + sudo yum install -y docker-${DOCKER_VERSION}* + sudo yum versionlock docker-* + sudo usermod -aG docker $USER + + # Remove all options from sysconfig docker. + sudo sed -i '/OPTIONS/d' /etc/sysconfig/docker + + sudo mkdir -p /etc/docker + sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json + sudo chown root:root /etc/docker/daemon.json + + # Enable docker daemon to start on boot. + sudo systemctl daemon-reload +fi + ################################################################################ ### Logrotate ################################################################## ################################################################################ @@ -331,6 +342,8 @@ sudo systemctl disable kubelet ################################################################################ sudo mkdir -p /etc/eks +sudo mv $TEMPLATE_DIR/get-ecr-uri.sh /etc/eks/get-ecr-uri.sh +sudo chmod +x /etc/eks/get-ecr-uri.sh sudo mv $TEMPLATE_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt sudo mv $TEMPLATE_DIR/bootstrap.sh /etc/eks/bootstrap.sh sudo chmod +x /etc/eks/bootstrap.sh @@ -363,6 +376,92 @@ if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config fi +################################################################################ +### Cache Images ############################################################### +################################################################################ +if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]]; then + AWS_DOMAIN=$(imds 'latest/meta-data/services/domain') + ECR_URI=$(/etc/eks/get-ecr-uri.sh "${BINARY_BUCKET_REGION}" "${AWS_DOMAIN}") + + PAUSE_CONTAINER="${ECR_URI}/eks/pause:${PAUSE_CONTAINER_VERSION}" + cat /etc/eks/containerd/containerd-config.toml | sed s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g | sudo tee /etc/eks/containerd/containerd-cached-pause-config.toml + sudo cp -v /etc/eks/containerd/containerd-cached-pause-config.toml /etc/containerd/config.toml + sudo cp -v /etc/eks/containerd/sandbox-image.service /etc/systemd/system/sandbox-image.service + sudo chown root:root /etc/systemd/system/sandbox-image.service + sudo systemctl daemon-reload + sudo systemctl start containerd + sudo systemctl enable containerd sandbox-image + + K8S_MINOR_VERSION=$(echo "${KUBERNETES_VERSION}" | cut -d'.' -f1-2) + KUBE_PROXY_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name kube-proxy --kubernetes-version=${K8S_MINOR_VERSION}) + + DEFAULT_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + DEFAULT_KUBE_PROXY_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + DEFAULT_KUBE_PROXY_PLATFORM_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + LATEST_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + KUBE_PROXY_IMGS=( + ## Default kube-proxy images + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + + ## Latest kube-proxy images + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + ) + + #### Cache VPC CNI images starting with the addon default version and the latest version + VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) + DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + CNI_IMG="${ECR_URI}/amazon-k8s-cni" + CNI_INIT_IMG="${CNI_IMG}-init" + CNI_IMGS=( + ## Default VPC CNI Images + "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" + + ## Latest VPC CNI Images + "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" + ) + + CACHED_IMGS=( + "${PAUSE_CONTAINER}" + ${KUBE_PROXY_IMGS[@]} + ${CNI_IMGS[@]} + ) + + for img in "${CACHED_IMGS[@]}"; do + ## only kube-proxy-minimal is vended for K8s 1.24+ + if [[ "${img}" == *"kube-proxy:"* ]] && [[ "${img}" != *"-minimal-"* ]] && vercmp "${K8S_MINOR_VERSION}" gteq "1.24"; then + continue + fi + /etc/eks/containerd/pull-image.sh "${img}" + done + + #### Tag the pulled down image for all other regions in the partition + for region in $(aws ec2 describe-regions --all-regions | jq -r '.Regions[] .RegionName'); do + for img in "${CACHED_IMGS[@]}"; do + regional_img="${img/$BINARY_BUCKET_REGION/$region}" + sudo ctr -n k8s.io image tag "${img}" "${regional_img}" || : + ## Tag ECR fips endpoint for supported regions + if [[ "${region}" =~ (us-east-1|us-east-2|us-west-1|us-west-2|us-gov-east-1|us-gov-east-2) ]]; then + regional_fips_img="${regional_img/.ecr./.ecr-fips.}" + sudo ctr -n k8s.io image tag "${img}" "${regional_fips_img}" || : + sudo ctr -n k8s.io image tag "${img}" "${regional_fips_img/-eksbuild.1/}" || : + fi + ## Cache the non-addon VPC CNI images since "v*.*.*-eksbuild.1" is equivalent to leaving off the eksbuild suffix + if [[ "${img}" == *"-cni"*"-eksbuild.1" ]]; then + sudo ctr -n k8s.io image tag "${img}" "${regional_img/-eksbuild.1/}" || : + fi + done + done +fi + ################################################################################ ### SSM Agent ################################################################## ################################################################################ From 21870b9f3be2c25e3a95b3adc6fef831e65bda40 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Fri, 18 Nov 2022 13:09:41 -0600 Subject: [PATCH 116/232] Disable yum updates in cloud-init (#1074) --- scripts/install-worker.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 154e6dfa3..669eb4330 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -501,7 +501,14 @@ echo fs.inotify.max_user_instances=8192 | sudo tee -a /etc/sysctl.conf echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf ################################################################################ -### adding log-collector-script ############################################### +### adding log-collector-script ################################################ ################################################################################ sudo mkdir -p /etc/eks/log-collector-script/ sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ + +################################################################################ +### Remove Yum Update from cloud-init config ################################### +################################################################################ +sudo sed -i \ + 's/ - package-update-upgrade-install/# Removed so that nodes do not have version skew based on when the node was started.\n# - package-update-upgrade-install/' \ + /etc/cloud/cloud.cfg From 5fcd34e7d5bf5666e5b86120421ee3111ac5439b Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 21 Nov 2022 10:30:04 -0800 Subject: [PATCH 117/232] Move variable defaults to -var-file (#1079) --- Makefile | 12 ++++-- README.md | 12 ++++++ eks-worker-al2-variables.json | 36 ++++++++++++++++++ eks-worker-al2.json | 71 ++++++++++++++++++----------------- 4 files changed, 93 insertions(+), 38 deletions(-) create mode 100644 eks-worker-al2-variables.json diff --git a/Makefile b/Makefile index f3986ee9a..6eb4e4b67 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PACKER_BINARY ?= packer -PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable eks-worker-al2.json | grep 'template-variable' | awk -F ',' '{print $$4}') +AVAILABLE_PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable eks-worker-al2.json | grep 'template-variable' | awk -F ',' '{print $$4}') K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) @@ -65,14 +65,20 @@ lint: ## Check the source files for syntax and format issues test: ## run the test-harness test/test-harness.sh +# include only variables which have a defined value +PACKER_VARIABLES := $(foreach packerVar,$(AVAILABLE_PACKER_VARIABLES),$(if $($(packerVar)),$(packerVar))) +PACKER_VAR_FLAGS := -var-file eks-worker-al2-variables.json \ +$(if $(PACKER_VARIABLE_FILE),--var-file=$(PACKER_VARIABLE_FILE),) \ +$(foreach packerVar,$(PACKER_VARIABLES),-var $(packerVar)='$($(packerVar))') + .PHONY: validate validate: ## Validate packer config - $(PACKER_BINARY) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json + $(PACKER_BINARY) validate $(PACKER_VAR_FLAGS) eks-worker-al2.json .PHONY: k8s k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build -timestamp-ui -color=false $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui -color=false $(PACKER_VAR_FLAGS) eks-worker-al2.json # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html diff --git a/README.md b/README.md index 1158ef702..04f1bca1d 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,18 @@ To build an Amazon EKS Worker AMI for a particular Kubernetes version run the fo ```bash make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 ``` + +### AMI template variables + +Default values for most variables are defined in [a default variable file](eks-worker-al2-variables.json). + +Users have the following options for specifying their own values: + +1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. +2. Pass a key-value pair for any template variable to `make`. These values will override any values specified using the first method. + +**Note** that some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. + ### Building against other versions of Kubernetes binaries To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json new file mode 100644 index 000000000..5f1caaa0b --- /dev/null +++ b/eks-worker-al2-variables.json @@ -0,0 +1,36 @@ +{ + "additional_yum_repos": "", + "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", + "ami_regions": "", + "ami_users": "", + "associate_public_ip_address": "", + "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", + "aws_region": "us-west-2", + "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", + "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", + "binary_bucket_name": "amazon-eks", + "binary_bucket_region": "us-west-2", + "cache_container_images": "false", + "cni_plugin_version": "v0.8.6", + "containerd_version": "1.6.6-1.amzn2.0.2", + "creator": "{{env `USER`}}", + "docker_version": "20.10.17-1.amzn2.0.1", + "encrypted": "false", + "kernel_version": "", + "kms_key_id": "", + "launch_block_device_mappings_volume_size": "8", + "pause_container_version": "3.5", + "pull_cni_from_github": "true", + "remote_folder": "", + "runc_version": "1.1.3-1.amzn2.0.2", + "security_group_id": "", + "sonobuoy_e2e_registry": "", + "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", + "source_ami_id": "", + "source_ami_owners": "137112412989", + "ssh_interface": "", + "ssh_username": "ec2-user", + "subnet_id": "", + "temporary_security_group_source_cidrs": "", + "volume_type": "gp2" +} \ No newline at end of file diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 6d3278cb0..96394457c 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -1,44 +1,45 @@ { + "_comment": "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2-variables.json", "variables": { - "additional_yum_repos": "", - "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", + "additional_yum_repos": null, + "ami_description": null, "ami_name": null, - "ami_regions": "", - "ami_users": "", + "ami_regions": null, + "ami_users": null, "arch": null, - "associate_public_ip_address": "", - "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", - "aws_region": "us-west-2", - "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", - "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", - "binary_bucket_name": "amazon-eks", - "binary_bucket_region": "us-west-2", - "cache_container_images": "false", - "cni_plugin_version": "v0.8.6", - "containerd_version": "1.6.6-1.amzn2.0.2", - "creator": "{{env `USER`}}", - "docker_version": "20.10.17-1.amzn2.0.1", - "encrypted": "false", + "associate_public_ip_address": null, + "aws_access_key_id": null, + "aws_region": null, + "aws_secret_access_key": null, + "aws_session_token": null, + "binary_bucket_name": null, + "binary_bucket_region": null, + "cache_container_images": null, + "cni_plugin_version": null, + "containerd_version": null, + "creator": null, + "docker_version": null, + "encrypted": null, "instance_type": null, - "kernel_version": "", - "kms_key_id": "", + "kernel_version": null, + "kms_key_id": null, "kubernetes_build_date": null, "kubernetes_version": null, - "launch_block_device_mappings_volume_size": "8", - "pause_container_version": "3.5", - "pull_cni_from_github": "true", - "remote_folder": "", - "runc_version": "1.1.3-1.amzn2.0.2", - "security_group_id": "", - "sonobuoy_e2e_registry": "", - "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", - "source_ami_id": "", - "source_ami_owners": "137112412989", - "ssh_interface": "", - "ssh_username": "ec2-user", - "subnet_id": "", - "temporary_security_group_source_cidrs": "", - "volume_type": "gp2" + "launch_block_device_mappings_volume_size": null, + "pause_container_version": null, + "pull_cni_from_github": null, + "remote_folder": null, + "runc_version": null, + "security_group_id": null, + "sonobuoy_e2e_registry": null, + "source_ami_filter_name": null, + "source_ami_id": null, + "source_ami_owners": null, + "ssh_interface": null, + "ssh_username": null, + "subnet_id": null, + "temporary_security_group_source_cidrs": null, + "volume_type": null }, "builders": [ { @@ -223,4 +224,4 @@ } } ] -} +} \ No newline at end of file From 5e18bbbdb987ba384a21ad1a8c08e0abf7544332 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 21 Nov 2022 16:53:39 -0800 Subject: [PATCH 118/232] Reorganize documentation (#1105) --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- PLAN.md | 30 -- README.md | 252 +---------------- USER_GUIDE.md | 25 -- CHANGELOG.md => doc/CHANGELOG.md | 0 CODE_OF_CONDUCT.md => doc/CODE_OF_CONDUCT.md | 0 CONTRIBUTING.md => doc/CONTRIBUTING.md | 0 doc/USER_GUIDE.md | 276 +++++++++++++++++++ 8 files changed, 290 insertions(+), 295 deletions(-) delete mode 100644 PLAN.md delete mode 100644 USER_GUIDE.md rename CHANGELOG.md => doc/CHANGELOG.md (100%) rename CODE_OF_CONDUCT.md => doc/CODE_OF_CONDUCT.md (100%) rename CONTRIBUTING.md => doc/CONTRIBUTING.md (100%) create mode 100644 doc/USER_GUIDE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 09b7a68ad..360447696 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,4 +11,4 @@ By submitting this pull request, I confirm that you can use, modify, copy, and r -*[See this guide for recommended testing for PRs.](https://github.com/awslabs/amazon-eks-ami/blob/master/CONTRIBUTING.md#testing-changes) Some tests may not apply. Completing tests and providing additional validation steps are not required, but it is recommended and may reduce review time and time to merge.* +*[See this guide for recommended testing for PRs.](../doc/CONTRIBUTING.md#testing-changes) Some tests may not apply. Completing tests and providing additional validation steps are not required, but it is recommended and may reduce review time and time to merge.* diff --git a/PLAN.md b/PLAN.md deleted file mode 100644 index c60bb07d7..000000000 --- a/PLAN.md +++ /dev/null @@ -1,30 +0,0 @@ -### Goal - -The goal of this plan is to move the `amazon-eks-ami` package from Amazon Web Services - Labs to Amazon Web Services. EKS and EKS customers depend on this package to build and vend AMIs used in production, and while EKS does test the AMIs before releasing, we'd like to enable more rigorous testing and provide customers more visibility into the process AMIs go through before releasing. - -To achieve the higher level goal of moving the project to an AWS project, here are the following goals: - -1. As much as possible, move all scripts, processes, etc. to the open by including maintaining all related scripts in a GitHub repo and using common tools for testing and releasing OSS software. -1. Implement processes that enable timely support for issues and PRs -1. Improve the safety and reliablity of releases by improving testing - -### Stage 1: Improve GitHub Repo Hygiene - -1. Create GitHub project board for tracking progress on current stage -1. Create GitHub project roadmap, similar to [this one](https://github.com/aws/aws-controllers-k8s/projects/1) -1. Triage 100% of current GitHub issues and set SLA to 3 days going forward -1. Review 100% of current PRs and set SLA to 3 days going forward for initial review -1. Update README.md so that customers are comfortable building AMIs, understand how it works and know how to test custom AMIs manually - -### Stage 2: Improve Safety and Reliability - -1. Build AMIs as part of PR process -1. Enable running Kubernetes conformance tests (or similar) with built AMIs -1. Enable adding additional tests to validate built AMIs -1. Run end-to-end tests are part of the PR process -1. All EKS Linux AMIs can be built from GitHub repo, including ARM, GPU, Bottlerocket, etc. - -### Stage 3: Productionalize Release Process - -1. Customers have some visibility into releases and the release process -1. New AMIs are built and released from the GitHub repo automatically, either on a schedule or after PRs are merged diff --git a/README.md b/README.md index 04f1bca1d..97baeed2f 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,16 @@ custom Amazon EKS AMI with [HashiCorp Packer](https://www.packer.io/). This is the same configuration that Amazon EKS uses to create the official Amazon EKS-optimized AMI. -## Setup +## Getting started + +If you are just getting started with Amazon EKS, we recommend that you follow +our [Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) +chapter in the Amazon EKS User Guide. If you already have a cluster, and you +want to launch a node group with your new AMI, see [Launching Amazon EKS Worker +Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) +in the Amazon EKS User Guide. + +## Pre-requisites You must have [Packer](https://www.packer.io/) version 1.8.0 or later installed on your local system. For more information, see [Installing Packer](https://www.packer.io/docs/install/index.html) @@ -14,10 +23,6 @@ configured so that Packer can make calls to AWS API operations on your behalf. For more information, see [Authentication](https://www.packer.io/docs/builders/amazon.html#specifying-amazon-credentials) in the Packer documentation. -**Note** -The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created -when building this AMI. - ## Building the AMI A Makefile is provided to build the Amazon EKS Worker AMI, but it is just a small wrapper around @@ -33,240 +38,9 @@ To build an Amazon EKS Worker AMI for a particular Kubernetes version run the fo make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 ``` -### AMI template variables - -Default values for most variables are defined in [a default variable file](eks-worker-al2-variables.json). - -Users have the following options for specifying their own values: - -1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. -2. Pass a key-value pair for any template variable to `make`. These values will override any values specified using the first method. - -**Note** that some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. - -### Building against other versions of Kubernetes binaries -To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command -Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 -```bash -#List of all avalable Kuberenets Versions: -aws s3 ls s3://amazon-eks -KUBERNETES_VERSION=1.23.9 # Chose a version and set the variable - -#List of all builds for the specified Kubernetes Version: -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ -KUBERNETES_BUILD_DATE=2022-07-27 # Chose a date and set the variable - -#List of all platforms available for the selected Kubernetes Version and build date -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/ -PLATFORM=linux # Chose a platform and set the variable - -#List of all architectures for the selected Kubernetes Version, build date and platform -aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ -ARCH=x86_64 #Chose an architecture and set the variable -``` -Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step -```bash -make k8s \ - kubernetes_version=$KUBERNETES_VERSION \ - kubernetes_build_date=$KUBERNETES_BUILD_DATE \ - arch=$ARCH -``` - -### Providing your own Kubernetes Binaries - -By default, binaries are downloaded from the Amazon EKS public Amazon Simple Storage Service (Amazon S3) -bucket amazon-eks in us-west-2. You can instead choose to provide your own version of Kubernetes binaries to be used. To use your own binaries - -1. Copy the binaries to your own S3 bucket using the AWS CLI. Here is an example that uses Kubelet binary -```bash - aws s3 cp kubelet s3://my-custom-bucket/kubernetes_version/kubernetes_build_date/bin/linux/arch/kubelet -``` -**Note**: Replace my-custom-bucket, amazon-eks, kubernetes_version, kubernetes_build_date, and arch with your values. - -**Important**: You must provide all the binaries listed in the default amazon-eks bucket for a specific kubernetes_version, kubernetes_build_date, and arch combination. These binaries must be accessible through AWS Identity and Access Management (IAM) credentials configured in the Install and configure HashiCorp Packer section. - -2. Run the following command to start the build process to use your own Kubernetes binaries -```bash -make k8s \ - binary_bucket_name=my-custom-bucket \ - binary_bucket_region=eu-west-1 \ - kubernetes_version=1.14.9 \ - kubernetes_build_date=2020-01-22 -``` -**Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. - -The Makefile runs Packer with the `eks-worker-al2.json` build specification -template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) -builder. An instance is launched and the Packer [Shell -Provisioner](https://www.packer.io/docs/provisioners/shell.html) runs the -`install-worker.sh` script on the instance to install software and perform other -necessary configuration tasks. Then, Packer creates an AMI from the instance -and terminates the instance after the AMI is created. - -### Container Image Caching - -Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. - -To turn on container image caching: - -``` -cache_container_images=true make 1.23 -``` - -When container image caching is enabled, the following images are cached: - - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-eksbuild. - - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-minimal-eksbuild. - - 602401143452.dkr.ecr..amazonaws.com/eks/pause:3.5 - - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni-init: - - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni: - -The account ID can be different depending on the region and partition you are building the AMI in. See [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html) for more details. - -Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. - -The images listed above are also tagged with each region in the partition the AMI is built in, since images are often built in one region and copied to others within the same partition. Images that are available to pull from an ECR FIPS endpoint are also tagged as such (i.e. `602401143452.dkr.ecr-fips.us-east-1.amazonaws.com/eks/pause:3.5`). - -When listing images on a node, you'll notice a long list of images. However, most of these images are simply tagged in different ways with no storage overhead. Images cached in the AMI total around 1.0 GiB. In general, a node with no images cached using the VPC CNI will use around 500 MiB of images when in a `Ready` state with no other pods running on the node. - -### IAM Permissions - -To build the EKS Optimized AMI, you will need the following permissions: - -``` -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:AttachVolume", - "ec2:AuthorizeSecurityGroupIngress", - "ec2:CopyImage", - "ec2:CreateImage", - "ec2:CreateKeypair", - "ec2:CreateSecurityGroup", - "ec2:CreateSnapshot", - "ec2:CreateTags", - "ec2:CreateVolume", - "ec2:DeleteKeyPair", - "ec2:DeleteSecurityGroup", - "ec2:DeleteSnapshot", - "ec2:DeleteVolume", - "ec2:DeregisterImage", - "ec2:DescribeImageAttribute", - "ec2:DescribeImages", - "ec2:DescribeInstances", - "ec2:DescribeInstanceStatus", - "ec2:DescribeRegions", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSnapshots", - "ec2:DescribeSubnets", - "ec2:DescribeTags", - "ec2:DescribeVolumes", - "ec2:DetachVolume", - "ec2:GetPasswordData", - "ec2:ModifyImageAttribute", - "ec2:ModifyInstanceAttribute", - "ec2:ModifySnapshotAttribute", - "ec2:RegisterImage", - "ec2:RunInstances", - "ec2:StopInstances", - "ec2:TerminateInstances", - "eks:DescribeAddonVersions", - "ecr:GetAuthorizationToken" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer" - ], - "Resource": "arn:aws:ecr:us-west-2:602401143452:repository/*" - }, - { - "Effect": "Allow", - "Action": [ - "s3:GetObject" - ], - "Resource": "arn:aws:s3:::amazon-eks/*" - } - ] -} -``` - -You will need to use the region you are building the AMI in to specify the ECR repository resource in the second IAM statement. You may also need to change the account if you are building the AMI in a different partition or special region. You can see a mapping of regions to account ID [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html). -If you're using a custom s3 bucket to vend different K8s binaries, you will need to change the resource in the third IAM statement above to reference your custom bucket. -For more information about the permissions required by Packer with different configurations, see the [docs](https://www.packer.io/plugins/builders/amazon#iam-task-or-instance-role). - -## Using the AMI - -If you are just getting started with Amazon EKS, we recommend that you follow -our [Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) -chapter in the Amazon EKS User Guide. If you already have a cluster, and you -want to launch a node group with your new AMI, see [Launching Amazon EKS Worker -Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) -in the Amazon EKS User Guide. - -## AL2 / Linux Kernel Information - -By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). - -When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. - -## Customizing Kubelet Config - -In some cases, customers may want to customize the [kubelet configuration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration) on their nodes, and there are two mechanisms to do that with the EKS Optimized AMI. - -**Set the "--kubelet-extra-args" flag when invoking bootstrap.sh** - -`bootstrap.sh`, the script that bootstraps nodes when using the EKS Optimized AMI, supports a flag called `--kubelet-extra-args` that allows you to pass in additional `kubelet` configuration. If you invoke the bootstrap script yourself (self-managed nodegroups or EKS managed nodegroups with custom AMIs), you can use that to customize your configuration. For example, you can use something like the following in your userdata: - -``` -/etc/eks/bootstrap.sh my-cluster --kubelet-extra-args '--registry-qps=20 --registry-burst=40' -``` - -In this case, it will set `registryPullQPS` to 20 and `registryBurst` to 40 in `kubelet`. Some of the flags, like the ones above, are marked as deprecated and you're encouraged to set them in the `kubelet` config file (described below), but they continue to work as of 1.23. - -**Update the kubelet config file** - -You can update the `kubelet` config file directly with new configuration. On EKS Optimized AMIs, the file is stored at `/etc/kubernetes/kubelet/kubelet-config.json`. It must be valid JSON. You can use a utility like `jq` (or your tool of choice) to edit the config in your user data: - -``` -echo "$(jq ".registryPullQPS=20 | .registryBurst=40" /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json -``` - -There are a couple of important caveats here: - -1. If you update the `kubelet` config file after `kubelet` has already started (i.e. `bootstrap.sh` already ran), you'll need to restart `kubelet` to pick up the latest configuration. -2. [bootstrap.sh](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) does modify a few fields, like `kubeReserved` and `evictionHard`, so you'd need to modify the config after the bootstrap script is run and restart `kubelet` to overwrite those properties. - -**View active kubelet config** - -When `kubelet` starts up, it logs all possible flags, including unset flags. The unset flags get logged with default values. *These logs do not necessarily reflect the actual active configuration.* This has caused confusion in the past when customers have configured the `kubelet` config file with one value and notice the default value is logged. Here is an example of the referenced log: - -``` -Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202824 3935 flags.go:59] FLAG: --registry-burst="10" -Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202829 3935 flags.go:59] FLAG: --registry-qps="5" -``` - -To view the actual `kubelet` config on your node, you can use the Kubernetes API to confirm that your configuration has applied. - -``` -$ kubectl proxy -$ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.compute.internal/proxy/configz" | jq - -{ - "kubeletconfig": { - ... - "registryPullQPS": 20, - "registryBurst": 40, - ... - } -} -``` +**Note** +The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created +when building this AMI. ## Security diff --git a/USER_GUIDE.md b/USER_GUIDE.md deleted file mode 100644 index 03f8c445d..000000000 --- a/USER_GUIDE.md +++ /dev/null @@ -1,25 +0,0 @@ -## User Guide - -This guide will provide more detailed usage information on this repo. - -## Updating known instance types - -`files/bootstrap.sh` configures the maximum number of pods on a node based off of the number of ENIs available, which is determined by the instance type. Larger instances generally have more ENIs. The number of ENIs limits how many IPV4 addresses are available on an instance, and we need one IP address per pod. You can [see this file](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/scripts/gen_vpc_ip_limits.go) for the code that calculates the max pods for more information. - -To add support for new instance types, at a minimum, we need to update `files/eni-max-pods.txt` using the [amazon-vpc-cni-k8s package.](https://github.com/aws/amazon-vpc-cni-k8s) to set the number of max pods available for those instance types. If the instance type is not on the list, `bootstrap.sh` will fail when the node is started. - -``` -$ git clone git@github.com:aws/amazon-vpc-cni-k8s.git - -# AWS credentials required at this point -$ make generate-limits -# misc/eni-max-pods.txt should be generated - -# Copy the generated file to this repo, something like this: -$ cp misc/eni-max-pods.txt ../amazon-eks-ami/files/ - -# Verify that expected types were added -$ git diff -``` - -At this point, you can build an AMI and it will include the updated list of instance types. diff --git a/CHANGELOG.md b/doc/CHANGELOG.md similarity index 100% rename from CHANGELOG.md rename to doc/CHANGELOG.md diff --git a/CODE_OF_CONDUCT.md b/doc/CODE_OF_CONDUCT.md similarity index 100% rename from CODE_OF_CONDUCT.md rename to doc/CODE_OF_CONDUCT.md diff --git a/CONTRIBUTING.md b/doc/CONTRIBUTING.md similarity index 100% rename from CONTRIBUTING.md rename to doc/CONTRIBUTING.md diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md new file mode 100644 index 000000000..0ed73532b --- /dev/null +++ b/doc/USER_GUIDE.md @@ -0,0 +1,276 @@ +# User Guide + +This guide will provide more detailed usage information on this repo. + +1. [AMI template variables](#ami-template-variables) +1. [Building against other versions of Kubernetes binaries](#building-against-other-versions-of-kubernetes-binaries) +1. [Providing your own Kubernetes binaries](#providing-your-own-kubernetes-binaries) +1. [Container image caching](#container-image-caching) +1. [IAM permissions](#iam-permissions) +1. [Customizing kubelet config](#customizing-kubelet-config) +1. [AL2 and Linux kernel information](#al2-and-linux-kernel-information) +1. [Updating known instance types](#updating-known-instance-types) + +--- + +## AMI template variables + +Default values for most variables are defined in [a default variable file](eks-worker-al2-variables.json). + +Users have the following options for specifying their own values: + +1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. +2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. + +**Note** that some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. + +--- + +## Building against other versions of Kubernetes binaries +To build an Amazon EKS Worker AMI with other versions of Kubernetes that are not listed above run the following AWS Command +Line Interface (AWS CLI) commands to obtain values for KUBERNETES_VERSION, KUBERNETES_BUILD_DATE, PLATFORM, ARCH from S3 +```bash +#List of all avalable Kuberenets Versions: +aws s3 ls s3://amazon-eks +KUBERNETES_VERSION=1.23.9 # Chose a version and set the variable + +#List of all builds for the specified Kubernetes Version: +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/ +KUBERNETES_BUILD_DATE=2022-07-27 # Chose a date and set the variable + +#List of all platforms available for the selected Kubernetes Version and build date +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/ +PLATFORM=linux # Chose a platform and set the variable + +#List of all architectures for the selected Kubernetes Version, build date and platform +aws s3 ls s3://amazon-eks/$KUBERNETES_VERSION/$KUBERNETES_BUILD_DATE/bin/linux/ +ARCH=x86_64 #Chose an architecture and set the variable +``` +Run the following command to build an Amazon EKS Worker AMI based on the chosen parameters in the previous step +```bash +make k8s \ + kubernetes_version=$KUBERNETES_VERSION \ + kubernetes_build_date=$KUBERNETES_BUILD_DATE \ + arch=$ARCH +``` + +--- + +## Providing your own Kubernetes Binaries + +By default, binaries are downloaded from the Amazon EKS public Amazon Simple Storage Service (Amazon S3) +bucket amazon-eks in us-west-2. You can instead choose to provide your own version of Kubernetes binaries to be used. To use your own binaries + +1. Copy the binaries to your own S3 bucket using the AWS CLI. Here is an example that uses Kubelet binary +```bash + aws s3 cp kubelet s3://my-custom-bucket/kubernetes_version/kubernetes_build_date/bin/linux/arch/kubelet +``` +**Note**: Replace my-custom-bucket, amazon-eks, kubernetes_version, kubernetes_build_date, and arch with your values. + +**Important**: You must provide all the binaries listed in the default amazon-eks bucket for a specific kubernetes_version, kubernetes_build_date, and arch combination. These binaries must be accessible through AWS Identity and Access Management (IAM) credentials configured in the Install and configure HashiCorp Packer section. + +2. Run the following command to start the build process to use your own Kubernetes binaries +```bash +make k8s \ + binary_bucket_name=my-custom-bucket \ + binary_bucket_region=eu-west-1 \ + kubernetes_version=1.14.9 \ + kubernetes_build_date=2020-01-22 +``` +**Note**: Confirm that the binary_bucket_name, binary_bucket_region, kubernetes_version, and kubernetes_build_date parameters match the path to your binaries in Amazon S3. + +The Makefile runs Packer with the `eks-worker-al2.json` build specification +template and the [amazon-ebs](https://www.packer.io/docs/builders/amazon-ebs.html) +builder. An instance is launched and the Packer [Shell +Provisioner](https://www.packer.io/docs/provisioners/shell.html) runs the +`install-worker.sh` script on the instance to install software and perform other +necessary configuration tasks. Then, Packer creates an AMI from the instance +and terminates the instance after the AMI is created. + +--- + +## Container Image Caching + +Optionally, some container images can be cached during the AMI build process in order to reduce the latency of the node getting to a `Ready` state when launched. + +To turn on container image caching: + +``` +cache_container_images=true make 1.23 +``` + +When container image caching is enabled, the following images are cached: + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/kube-proxy:-minimal-eksbuild. + - 602401143452.dkr.ecr..amazonaws.com/eks/pause:3.5 + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni-init: + - 602401143452.dkr.ecr..amazonaws.com/amazon-k8s-cni: + +The account ID can be different depending on the region and partition you are building the AMI in. See [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html) for more details. + +Since the VPC CNI is not versioned with K8s itself, the latest version of the VPC CNI and the default version, based on the response from the EKS DescribeAddonVersions at the time of the AMI build, will be cached. + +The images listed above are also tagged with each region in the partition the AMI is built in, since images are often built in one region and copied to others within the same partition. Images that are available to pull from an ECR FIPS endpoint are also tagged as such (i.e. `602401143452.dkr.ecr-fips.us-east-1.amazonaws.com/eks/pause:3.5`). + +When listing images on a node, you'll notice a long list of images. However, most of these images are simply tagged in different ways with no storage overhead. Images cached in the AMI total around 1.0 GiB. In general, a node with no images cached using the VPC CNI will use around 500 MiB of images when in a `Ready` state with no other pods running on the node. + +--- + +## IAM Permissions + +To build the EKS Optimized AMI, you will need the following permissions: + +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CopyImage", + "ec2:CreateImage", + "ec2:CreateKeypair", + "ec2:CreateSecurityGroup", + "ec2:CreateSnapshot", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeleteKeyPair", + "ec2:DeleteSecurityGroup", + "ec2:DeleteSnapshot", + "ec2:DeleteVolume", + "ec2:DeregisterImage", + "ec2:DescribeImageAttribute", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceStatus", + "ec2:DescribeRegions", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSnapshots", + "ec2:DescribeSubnets", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DetachVolume", + "ec2:GetPasswordData", + "ec2:ModifyImageAttribute", + "ec2:ModifyInstanceAttribute", + "ec2:ModifySnapshotAttribute", + "ec2:RegisterImage", + "ec2:RunInstances", + "ec2:StopInstances", + "ec2:TerminateInstances", + "eks:DescribeAddonVersions", + "ecr:GetAuthorizationToken" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer" + ], + "Resource": "arn:aws:ecr:us-west-2:602401143452:repository/*" + }, + { + "Effect": "Allow", + "Action": [ + "s3:GetObject" + ], + "Resource": "arn:aws:s3:::amazon-eks/*" + } + ] +} +``` + +You will need to use the region you are building the AMI in to specify the ECR repository resource in the second IAM statement. You may also need to change the account if you are building the AMI in a different partition or special region. You can see a mapping of regions to account ID [here](https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html). +If you're using a custom s3 bucket to vend different K8s binaries, you will need to change the resource in the third IAM statement above to reference your custom bucket. +For more information about the permissions required by Packer with different configurations, see the [docs](https://www.packer.io/plugins/builders/amazon#iam-task-or-instance-role). + +--- + +## Customizing Kubelet Config + +In some cases, customers may want to customize the [kubelet configuration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration) on their nodes, and there are two mechanisms to do that with the EKS Optimized AMI. + +**Set the "--kubelet-extra-args" flag when invoking bootstrap.sh** + +`bootstrap.sh`, the script that bootstraps nodes when using the EKS Optimized AMI, supports a flag called `--kubelet-extra-args` that allows you to pass in additional `kubelet` configuration. If you invoke the bootstrap script yourself (self-managed nodegroups or EKS managed nodegroups with custom AMIs), you can use that to customize your configuration. For example, you can use something like the following in your userdata: + +``` +/etc/eks/bootstrap.sh my-cluster --kubelet-extra-args '--registry-qps=20 --registry-burst=40' +``` + +In this case, it will set `registryPullQPS` to 20 and `registryBurst` to 40 in `kubelet`. Some of the flags, like the ones above, are marked as deprecated and you're encouraged to set them in the `kubelet` config file (described below), but they continue to work as of 1.23. + +**Update the kubelet config file** + +You can update the `kubelet` config file directly with new configuration. On EKS Optimized AMIs, the file is stored at `/etc/kubernetes/kubelet/kubelet-config.json`. It must be valid JSON. You can use a utility like `jq` (or your tool of choice) to edit the config in your user data: + +``` +echo "$(jq ".registryPullQPS=20 | .registryBurst=40" /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json +``` + +There are a couple of important caveats here: + +1. If you update the `kubelet` config file after `kubelet` has already started (i.e. `bootstrap.sh` already ran), you'll need to restart `kubelet` to pick up the latest configuration. +2. [bootstrap.sh](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh) does modify a few fields, like `kubeReserved` and `evictionHard`, so you'd need to modify the config after the bootstrap script is run and restart `kubelet` to overwrite those properties. + +**View active kubelet config** + +When `kubelet` starts up, it logs all possible flags, including unset flags. The unset flags get logged with default values. *These logs do not necessarily reflect the actual active configuration.* This has caused confusion in the past when customers have configured the `kubelet` config file with one value and notice the default value is logged. Here is an example of the referenced log: + +``` +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202824 3935 flags.go:59] FLAG: --registry-burst="10" +Aug 16 21:53:49 ip-192-168-92-220.us-east-2.compute.internal kubelet[3935]: I0816 21:53:49.202829 3935 flags.go:59] FLAG: --registry-qps="5" +``` + +To view the actual `kubelet` config on your node, you can use the Kubernetes API to confirm that your configuration has applied. + +``` +$ kubectl proxy +$ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.compute.internal/proxy/configz" | jq + +{ + "kubeletconfig": { + ... + "registryPullQPS": 20, + "registryBurst": 40, + ... + } +} +``` + +--- + +## AL2 and Linux Kernel Information + +By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). + +When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. + +--- + +## Updating known instance types + +`files/bootstrap.sh` configures the maximum number of pods on a node based off of the number of ENIs available, which is determined by the instance type. Larger instances generally have more ENIs. The number of ENIs limits how many IPV4 addresses are available on an instance, and we need one IP address per pod. You can [see this file](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/scripts/gen_vpc_ip_limits.go) for the code that calculates the max pods for more information. + +To add support for new instance types, at a minimum, we need to update `files/eni-max-pods.txt` using the [amazon-vpc-cni-k8s package.](https://github.com/aws/amazon-vpc-cni-k8s) to set the number of max pods available for those instance types. If the instance type is not on the list, `bootstrap.sh` will fail when the node is started. + +``` +$ git clone git@github.com:aws/amazon-vpc-cni-k8s.git + +# AWS credentials required at this point +$ make generate-limits +# misc/eni-max-pods.txt should be generated + +# Copy the generated file to this repo, something like this: +$ cp misc/eni-max-pods.txt ../amazon-eks-ami/files/ + +# Verify that expected types were added +$ git diff +``` + +At this point, you can build an AMI and it will include the updated list of instance types. From 82ba970d30f0376ad9c2e176c4b27fdcdd0ef8aa Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 21 Nov 2022 18:29:52 -0800 Subject: [PATCH 119/232] Remove unused variable (#1107) --- eks-worker-al2.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 96394457c..020758d36 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -166,7 +166,6 @@ "AWS_SESSION_TOKEN={{user `aws_session_token`}}", "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}", "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", - "KUBE_PROXY_VERSION_SUFFIX={{user `kube_proxy_version_suffix`}}", "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}" ] }, @@ -224,4 +223,4 @@ } } ] -} \ No newline at end of file +} From aa02e745a9f767b8f4b5fb0bc79ffa8ddca57f36 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 22 Nov 2022 08:53:34 -0800 Subject: [PATCH 120/232] Move CHANGELOG back to root dir (#1108) --- doc/CHANGELOG.md => CHANGELOG.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename doc/CHANGELOG.md => CHANGELOG.md (100%) diff --git a/doc/CHANGELOG.md b/CHANGELOG.md similarity index 100% rename from doc/CHANGELOG.md rename to CHANGELOG.md From 06fb2fcc0d5697b874ab165cd1495f9e67184880 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 22 Nov 2022 11:27:59 -0800 Subject: [PATCH 121/232] Add eks-worker-al2-variables.json to archive (#1109) --- ArchiveBuildConfig.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ArchiveBuildConfig.yaml b/ArchiveBuildConfig.yaml index 2c5bf850c..ba146715d 100644 --- a/ArchiveBuildConfig.yaml +++ b/ArchiveBuildConfig.yaml @@ -13,6 +13,7 @@ dependencies: files: - src: Makefile - src: eks-worker-al2.json + - src: eks-worker-al2-variables.json archive: name: amazon-eks-ami.tar.gz type: tgz From 667600536b93b089c805195bf00aa275b6bb76c4 Mon Sep 17 00:00:00 2001 From: Victor Pineda Gonzalez Date: Tue, 22 Nov 2022 14:48:10 -0800 Subject: [PATCH 122/232] Skip sandbox image pull if already present (#1090) --- files/pull-sandbox-image.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index 523e7ed4c..e1c1a6eb8 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -2,4 +2,10 @@ set -euo pipefail sandbox_image="$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml)" + +### Short-circuit fetching sandbox image if its already present +if [[ "$(sudo ctr --namespace k8s.io image ls | grep $sandbox_image)" != "" ]]; then + exit 0 +fi + /etc/eks/containerd/pull-image.sh "${sandbox_image}" From f6c96de6f588e44b2d4d254831d902dcf233a14c Mon Sep 17 00:00:00 2001 From: Victor Pineda Gonzalez Date: Wed, 23 Nov 2022 15:26:49 -0800 Subject: [PATCH 123/232] Fix typo in comment (#1110) --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 78e9d4fab..51a12405b 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -355,7 +355,7 @@ fi ### - append entries to /etc/hosts with the mappings of control plane host IP address and API server ### domain name. So that the domain name can be resolved to IP addresses locally. ### - use aws-iam-authenticator as bootstrap auth for kubelet TLS bootstrapping which downloads client -### X.509 certificate and generate kubelet kubeconfig file which uses the cleint cert. So that the +### X.509 certificate and generate kubelet kubeconfig file which uses the client cert. So that the ### worker node can be authentiacated through X.509 certificate which works for both connected and #### disconnected state. if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then From 4305812a0ccf0f5f4fd778614b0880ae9bdaed0b Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 1 Dec 2022 15:08:08 -0800 Subject: [PATCH 124/232] Add link to doc/USER_GUIDE.md (#1114) --- README.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 97baeed2f..cbb911f78 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,17 @@ custom Amazon EKS AMI with [HashiCorp Packer](https://www.packer.io/). This is the same configuration that Amazon EKS uses to create the official Amazon EKS-optimized AMI. -## Getting started +**Check out the AMI's [user guide](doc/USER_GUIDE.md) for more information.** -If you are just getting started with Amazon EKS, we recommend that you follow +## 🚀 Getting started + +If you are new to Amazon EKS, we recommend that you follow our [Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html) chapter in the Amazon EKS User Guide. If you already have a cluster, and you want to launch a node group with your new AMI, see [Launching Amazon EKS Worker -Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html) -in the Amazon EKS User Guide. +Nodes](https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html). -## Pre-requisites +## 🔢 Pre-requisites You must have [Packer](https://www.packer.io/) version 1.8.0 or later installed on your local system. For more information, see [Installing Packer](https://www.packer.io/docs/install/index.html) @@ -23,7 +24,7 @@ configured so that Packer can make calls to AWS API operations on your behalf. For more information, see [Authentication](https://www.packer.io/docs/builders/amazon.html#specifying-amazon-credentials) in the Packer documentation. -## Building the AMI +## 👷 Building the AMI A Makefile is provided to build the Amazon EKS Worker AMI, but it is just a small wrapper around invoking Packer directly. You can initiate the build process by running the @@ -42,10 +43,10 @@ make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created when building this AMI. -## Security +## 🔒 Security For security issues or concerns, please do not open an issue or pull request on GitHub. Please report any suspected or confirmed security issues to AWS Security https://aws.amazon.com/security/vulnerability-reporting/ -## License Summary +## ⚖️ License Summary This sample code is made available under a modified MIT license. See the LICENSE file. From acf0d78478346de4dfd3b18aef396a483a3443a3 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 5 Dec 2022 10:36:41 -0800 Subject: [PATCH 125/232] Remove aws_region and binary_bucket_region overrides (#1115) --- Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile b/Makefile index 6eb4e4b67..71c410343 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,6 @@ K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -aws_region ?= $(AWS_DEFAULT_REGION) -binary_bucket_region ?= $(AWS_DEFAULT_REGION) arch ?= x86_64 ifeq ($(arch), arm64) instance_type ?= m6g.large From cb9c8a8640cd1baba29cd6dd89d0ab6f105c3fe2 Mon Sep 17 00:00:00 2001 From: Gerald Barker Date: Fri, 9 Dec 2022 17:28:52 +0000 Subject: [PATCH 126/232] Lookup instanceId using IMDSv2 (#1116) --- log-collector-script/windows/eks-log-collector.ps1 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 index d3e03660b..f96916e7e 100644 --- a/log-collector-script/windows/eks-log-collector.ps1 +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -26,7 +26,8 @@ param( # Common options $basedir="C:\log-collector" -$instanceid = Invoke-RestMethod -uri http://169.254.169.254/latest/meta-data/instance-id +$token = Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token-ttl-seconds" = "5"} -Method PUT -Uri http://169.254.169.254/latest/api/token +$instanceId = Invoke-RestMethod -Headers @{"X-aws-ec2-metadata-token" = $token} -Method GET -Uri http://169.254.169.254/latest/meta-data/instance-id $curtime = Get-Date -Format FileDateTimeUniversal $outfilename = "eks_" + $instanceid + "_" + $curtime + ".zip" $infodir="$basedir\collect" From f1bb10b4b161fc43eddad04359b1ab03530dcb2e Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 9 Dec 2022 16:13:55 -0800 Subject: [PATCH 127/232] Handle indentation when parsing `sandbox_image` (#1119) --- files/pull-sandbox-image.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/pull-sandbox-image.sh b/files/pull-sandbox-image.sh index e1c1a6eb8..e6484a962 100644 --- a/files/pull-sandbox-image.sh +++ b/files/pull-sandbox-image.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -sandbox_image="$(awk -F'[ ="]+' '$1 == "sandbox_image" { print $2 }' /etc/containerd/config.toml)" +source <(grep "sandbox_image" /etc/containerd/config.toml | tr -d ' ') ### Short-circuit fetching sandbox image if its already present if [[ "$(sudo ctr --namespace k8s.io image ls | grep $sandbox_image)" != "" ]]; then From c5a09beba2c4bdb8ac18a3eaa319685716368637 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Fri, 9 Dec 2022 18:40:48 -0600 Subject: [PATCH 128/232] Discover latest eksbuild version when caching container images (#1120) --- scripts/install-worker.sh | 46 ++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 669eb4330..ca40e2a7b 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -403,7 +403,16 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso- LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) - KUBE_PROXY_IMGS=( + #### Cache VPC CNI images starting with the addon default version and the latest version + VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) + DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + CNI_IMG="${ECR_URI}/amazon-k8s-cni" + CNI_INIT_IMG="${CNI_IMG}-init" + + CACHE_IMGS=( + "${PAUSE_CONTAINER}" + ## Default kube-proxy images "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" @@ -411,15 +420,7 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso- ## Latest kube-proxy images "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" - ) - #### Cache VPC CNI images starting with the addon default version and the latest version - VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) - DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') - LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) - CNI_IMG="${ECR_URI}/amazon-k8s-cni" - CNI_INIT_IMG="${CNI_IMG}-init" - CNI_IMGS=( ## Default VPC CNI Images "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" @@ -428,24 +429,33 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso- "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" ) + PULLED_IMGS=() - CACHED_IMGS=( - "${PAUSE_CONTAINER}" - ${KUBE_PROXY_IMGS[@]} - ${CNI_IMGS[@]} - ) - - for img in "${CACHED_IMGS[@]}"; do + for img in "${CACHE_IMGS[@]}"; do ## only kube-proxy-minimal is vended for K8s 1.24+ if [[ "${img}" == *"kube-proxy:"* ]] && [[ "${img}" != *"-minimal-"* ]] && vercmp "${K8S_MINOR_VERSION}" gteq "1.24"; then continue fi - /etc/eks/containerd/pull-image.sh "${img}" + ## Since eksbuild.x version may not match the image tag, we need to decrement the eksbuild version until we find the latest image tag within the app semver + eksbuild_version="1" + if [[ ${img} == *'eksbuild.'* ]]; then + eksbuild_version=$(echo "${img}" | grep -o 'eksbuild\.[0-9]\+' | cut -d'.' -f2) + fi + ## iterate through decrementing the build version each time + for build_version in $(seq "${eksbuild_version}" -1 1); do + img=$(echo "${img}" | sed -E "s/eksbuild.[0-9]+/eksbuild.${build_version}/") + if /etc/eks/containerd/pull-image.sh "${img}"; then + PULLED_IMGS+=("${img}") + break + elif [[ "${build_version}" -eq 1 ]]; then + exit 1 + fi + done done #### Tag the pulled down image for all other regions in the partition for region in $(aws ec2 describe-regions --all-regions | jq -r '.Regions[] .RegionName'); do - for img in "${CACHED_IMGS[@]}"; do + for img in "${PULLED_IMGS[@]}"; do regional_img="${img/$BINARY_BUCKET_REGION/$region}" sudo ctr -n k8s.io image tag "${img}" "${regional_img}" || : ## Tag ECR fips endpoint for supported regions From d022ac29f5c21f9b2d5f9bc35dad85d92efae829 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Mon, 12 Dec 2022 12:48:07 -0600 Subject: [PATCH 129/232] Add cached images to version-info (#1122) --- scripts/generate-version-info.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/generate-version-info.sh b/scripts/generate-version-info.sh index f35243936..9a52f42ce 100644 --- a/scripts/generate-version-info.sh +++ b/scripts/generate-version-info.sh @@ -18,3 +18,6 @@ sudo rpm --query --all --queryformat '\{"%{NAME}": "%{VERSION}-%{RELEASE}"\}\n' # binaries echo $(jq ".binaries.kubelet = \"$(kubelet --version | awk '{print $2}')\"" $OUTPUT_FILE) > $OUTPUT_FILE echo $(jq ".binaries.awscli = \"$(aws --version | awk '{print $1}' | cut -d '/' -f 2)\"" $OUTPUT_FILE) > $OUTPUT_FILE + +# cached images +echo $(jq ".images = [ $(sudo ctr -n k8s.io image ls -q | cut -d'/' -f2- | sort | uniq | grep -v 'sha256' | xargs -r printf "\"%s\"," | sed 's/,$//') ]" $OUTPUT_FILE) > $OUTPUT_FILE From 9c0da3482c8f9693cea225f149b07fb61e33133c Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 12 Dec 2022 15:36:25 -0800 Subject: [PATCH 130/232] Document daemon.json change in GPU AMI (#1123) --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a305f1061..3e6d5c0d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -132,6 +132,8 @@ Notable changes: * Update docker and containerd for [ALASDOCKER-2022-021](https://alas.aws.amazon.com/AL2/ALASDOCKER-2022-021.html) [#1056](https://github.com/awslabs/amazon-eks-ami/pull/1056) * runc version is updated to 1.1.3-1.amzn2.0.2 to include ALAS2DOCKER-2022-020 [#1055](https://github.com/awslabs/amazon-eks-ami/pull/1055) * Release AMI in me-central-1 with version 1.21, 1.22, 1.23. 1.20 is not supported in this region since it will be deprecated soon. +* Fixes an issue with Docker daemon configuration on the GPU AMI (#351). + * **Note** that if you have a workaround in place for this issue, you'll likely need to revert it. ### [Recalled] AMI Release v20221027 * amazon-eks-gpu-node-1.23-v20221027 From d4e0921b4bd28188ccbbaf6701bb9aab28947c28 Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Tue, 13 Dec 2022 09:17:56 -0800 Subject: [PATCH 131/232] Add ECR accounts for eu-south-2, eu-central-2, ap-south-2 (#1125) Co-authored-by: Zaid Farooq --- files/get-ecr-uri.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index f5e87b932..e865ab24d 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -45,6 +45,15 @@ else me-central-1) acct="759879836304" ;; + eu-south-2) + acct="455263428931" + ;; + eu-central-2) + acct="900612956339" + ;; + ap-south-2) + acct="900889452093" + ;; *) acct="602401143452" ;; From 5da3eb794a1439e7515dc569cf6de5803a680a31 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 13 Dec 2022 11:18:27 -0800 Subject: [PATCH 132/232] Update to runc-1.1.4-1.amzn2 (#1124) --- eks-worker-al2-variables.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 5f1caaa0b..7346acccb 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -22,7 +22,7 @@ "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", - "runc_version": "1.1.3-1.amzn2.0.2", + "runc_version": "runc-1.1.4-1.amzn2", "security_group_id": "", "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", @@ -33,4 +33,4 @@ "subnet_id": "", "temporary_security_group_source_cidrs": "", "volume_type": "gp2" -} \ No newline at end of file +} From 671ed37c4328e8b8909cbdae325d520cd5f632f2 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 13 Dec 2022 11:42:15 -0800 Subject: [PATCH 133/232] Fix runc_version typo (#1127) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 7346acccb..59d69ba28 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -22,7 +22,7 @@ "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", - "runc_version": "runc-1.1.4-1.amzn2", + "runc_version": "1.1.4-1.amzn2", "security_group_id": "", "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", From 3e1c6c2c421be1e3b0b46f5bce7ac28c9b6123fb Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Mon, 19 Dec 2022 08:26:34 -0600 Subject: [PATCH 134/232] Do not cache addons images if none are available (#1133) --- scripts/install-worker.sh | 69 +++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index ca40e2a7b..cf1008410 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -393,41 +393,54 @@ if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso- sudo systemctl enable containerd sandbox-image K8S_MINOR_VERSION=$(echo "${KUBERNETES_VERSION}" | cut -d'.' -f1-2) - KUBE_PROXY_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name kube-proxy --kubernetes-version=${K8S_MINOR_VERSION}) - - DEFAULT_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') - DEFAULT_KUBE_PROXY_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) - DEFAULT_KUBE_PROXY_PLATFORM_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) - LATEST_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) - LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) - LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + #### Cache kube-proxy images starting with the addon default version and the latest version + KUBE_PROXY_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name kube-proxy --kubernetes-version=${K8S_MINOR_VERSION}) + KUBE_PROXY_IMGS=() + if [[ $(jq '.addons | length' <<< $KUBE_PROXY_ADDON_VERSIONS) -gt 0 ]]; then + DEFAULT_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + DEFAULT_KUBE_PROXY_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + DEFAULT_KUBE_PROXY_PLATFORM_VERSION=$(echo "${DEFAULT_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + LATEST_KUBE_PROXY_FULL_VERSION=$(echo "${KUBE_PROXY_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + LATEST_KUBE_PROXY_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f1) + LATEST_KUBE_PROXY_PLATFORM_VERSION=$(echo "${LATEST_KUBE_PROXY_FULL_VERSION}" | cut -d"-" -f2) + + KUBE_PROXY_IMGS=( + ## Default kube-proxy images + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" + + ## Latest kube-proxy images + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" + ) + fi #### Cache VPC CNI images starting with the addon default version and the latest version VPC_CNI_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name vpc-cni --kubernetes-version=${K8S_MINOR_VERSION}) - DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') - LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) - CNI_IMG="${ECR_URI}/amazon-k8s-cni" - CNI_INIT_IMG="${CNI_IMG}-init" + VPC_CNI_IMGS=() + if [[ $(jq '.addons | length' <<< $VPC_CNI_ADDON_VERSIONS) -gt 0 ]]; then + DEFAULT_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] | select(.compatibilities[] .defaultVersion==true).addonVersion') + LATEST_VPC_CNI_VERSION=$(echo "${VPC_CNI_ADDON_VERSIONS}" | jq -r '.addons[] .addonVersions[] .addonVersion' | sort -V | tail -n1) + CNI_IMG="${ECR_URI}/amazon-k8s-cni" + CNI_INIT_IMG="${CNI_IMG}-init" + + VPC_CNI_IMGS=( + ## Default VPC CNI Images + "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" + + ## Latest VPC CNI Images + "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" + "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" + ) + fi CACHE_IMGS=( "${PAUSE_CONTAINER}" - - ## Default kube-proxy images - "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" - "${ECR_URI}/eks/kube-proxy:${DEFAULT_KUBE_PROXY_VERSION}-minimal-${DEFAULT_KUBE_PROXY_PLATFORM_VERSION}" - - ## Latest kube-proxy images - "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" - "${ECR_URI}/eks/kube-proxy:${LATEST_KUBE_PROXY_VERSION}-minimal-${LATEST_KUBE_PROXY_PLATFORM_VERSION}" - - ## Default VPC CNI Images - "${CNI_IMG}:${DEFAULT_VPC_CNI_VERSION}" - "${CNI_INIT_IMG}:${DEFAULT_VPC_CNI_VERSION}" - - ## Latest VPC CNI Images - "${CNI_IMG}:${LATEST_VPC_CNI_VERSION}" - "${CNI_INIT_IMG}:${LATEST_VPC_CNI_VERSION}" + ${KUBE_PROXY_IMGS[@]+"${KUBE_PROXY_IMGS[@]}"} + ${VPC_CNI_IMGS[@]+"${VPC_CNI_IMGS[@]}"} ) PULLED_IMGS=() From 6b1df1107d6e7ec2f46612ebb509c16101ded22d Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 29 Dec 2022 18:54:56 -0600 Subject: [PATCH 135/232] Update CHANGELOG for v20221222 (#1140) --- CHANGELOG.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e6d5c0d3..0a8d94770 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,64 @@ # Changelog +### AMI Release v20221222 +* amazon-eks-gpu-node-1.24-v20221222 +* amazon-eks-gpu-node-1.23-v20221222 +* amazon-eks-gpu-node-1.22-v20221222 +* amazon-eks-gpu-node-1.21-v20221222 +* amazon-eks-gpu-node-1.20-v20221222 +* amazon-eks-arm64-node-1.24-v20221222 +* amazon-eks-arm64-node-1.23-v20221222 +* amazon-eks-arm64-node-1.22-v20221222 +* amazon-eks-arm64-node-1.21-v20221222 +* amazon-eks-arm64-node-1.20-v20221222 +* amazon-eks-node-1.24-v20221222 +* amazon-eks-node-1.23-v20221222 +* amazon-eks-node-1.22-v20221222 +* amazon-eks-node-1.21-v20221222 +* amazon-eks-node-1.20-v20221222 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20221222` +* `1.23.13-20221222` +* `1.22.15-20221222` +* `1.21.14-20221222` +* `1.20.15-20221222` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.226-129.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kernel updated to `5.4.226-129.415.amzn2` for: + - [ALASKERNEL-5.4-2022-040](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-040.html) + - [ALASKERNEL-5.4-2022-039](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2022-039.html) +- NVIDIA driver updated to `470.161.03-1` to address security issues. More information is available in [NVIDIA security bulletin #5415](https://nvidia.custhelp.com/app/answers/detail/a_id/5415). +- Cache pause, vpc-cni, and kube-proxy images during build ([#938](https://github.com/awslabs/amazon-eks-ami/pull/938)) + - *Note* that this has only been enabled for 1.24 AMIs at this time. +- Disable yum updates in cloud-init ([#1074](https://github.com/awslabs/amazon-eks-ami/pull/1074)) +- Skip sandbox image pull if already present ([#1090](https://github.com/awslabs/amazon-eks-ami/pull/1090)) +- Move variable defaults to `--var-file` ([#1079](https://github.com/awslabs/amazon-eks-ami/pull/1079)) + +Minor changes: +- Add ECR accounts for `eu-south-2`, `eu-central-2`, `ap-south-2` ([#1125](https://github.com/awslabs/amazon-eks-ami/pull/1125)) +- Handle indentation when parsing `sandbox_image` from `containerd` config ([#1119](https://github.com/awslabs/amazon-eks-ami/pull/1119)) +- Lookup instanceId using IMDSv2 in Windows log collector script ([#1116](https://github.com/awslabs/amazon-eks-ami/pull/1116)) +- Remove `aws_region` and `binary_bucket_region` overrides from Makefile ([#1115](https://github.com/awslabs/amazon-eks-ami/pull/1115)) +- Sym-link awscli to /bin ([#1102](https://github.com/awslabs/amazon-eks-ami/pull/1102)) +- Configure containerd registry certificates by default ([#1049](https://github.com/awslabs/amazon-eks-ami/pull/1049)) + ### AMI Release v20221112 * amazon-eks-gpu-node-1.24-v20221112 * amazon-eks-gpu-node-1.23-v20221112 From eab112a19877122e46a706d3a91d42b85218f268 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 4 Jan 2023 10:31:59 -0800 Subject: [PATCH 136/232] Decrease launch_block_device_mappings_volume_size to 4 (#1143) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 59d69ba28..2685e5aa1 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -18,7 +18,7 @@ "encrypted": "false", "kernel_version": "", "kms_key_id": "", - "launch_block_device_mappings_volume_size": "8", + "launch_block_device_mappings_volume_size": "4", "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", From 6fefb5000896b42e6fb1d916bb88b9efec1bf37e Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 9 Jan 2023 18:35:50 -0800 Subject: [PATCH 137/232] AMI Release v20230105 (#1146) --- CHANGELOG.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a8d94770..0c2d77b6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,54 @@ # Changelog +### AMI Release v20230105 +* amazon-eks-gpu-node-1.24-v20230105 +* amazon-eks-gpu-node-1.23-v20230105 +* amazon-eks-gpu-node-1.22-v20230105 +* amazon-eks-gpu-node-1.21-v20230105 +* amazon-eks-gpu-node-1.20-v20230105 +* amazon-eks-arm64-node-1.24-v20230105 +* amazon-eks-arm64-node-1.23-v20230105 +* amazon-eks-arm64-node-1.22-v20230105 +* amazon-eks-arm64-node-1.21-v20230105 +* amazon-eks-arm64-node-1.20-v20230105 +* amazon-eks-node-1.24-v20230105 +* amazon-eks-node-1.23-v20230105 +* amazon-eks-node-1.22-v20230105 +* amazon-eks-node-1.21-v20230105 +* amazon-eks-node-1.20-v20230105 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.7-20230105` +* `1.23.13-20230105` +* `1.22.15-20230105` +* `1.21.14-20230105` +* `1.20.15-20230105` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.7/2022-10-31/ +* s3://amazon-eks/1.23.13/2022-10-31/ +* s3://amazon-eks/1.22.15/2022-10-31/ +* s3://amazon-eks/1.21.14/2022-10-31/ +* s3://amazon-eks/1.20.15/2022-10-31/ + +AMI details: +* kernel: 5.4.226-129.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- This will be the last release for 1.20 AMI's. +- Decrease `launch_block_device_mappings_volume_size` to 4 ([#1143](https://github.com/awslabs/amazon-eks-ami/pull/1143)). + - This fixes an issue with 4GiB launch block devices. More information is available in [#1142](https://github.com/awslabs/amazon-eks-ami/issues/1142). +- Container image caching has been disabled while we work to optimize the disk usage of this feature. This feature was only enabled for 1.24 AMI's in the previous release, [v20221222](https://github.com/awslabs/amazon-eks-ami/releases/tag/v20221222). + +Minor changes: +- Update AWS CLI to `2.9.12` + ### AMI Release v20221222 * amazon-eks-gpu-node-1.24-v20221222 * amazon-eks-gpu-node-1.23-v20221222 From e2ef710139a671b2a49b459c0b4654cdc7bab9ba Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 9 Jan 2023 18:44:02 -0800 Subject: [PATCH 138/232] Cache image content without unpacking/snapshotting (#1144) --- files/pull-image.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/pull-image.sh b/files/pull-image.sh index 2d37d88b9..ca89e2550 100755 --- a/files/pull-image.sh +++ b/files/pull-image.sh @@ -24,4 +24,4 @@ if [[ -z ${ecr_password} ]]; then echo >&2 "Unable to retrieve the ECR password." exit 1 fi -retry sudo ctr --namespace k8s.io image pull "${img}" --user AWS:${ecr_password} +retry sudo ctr --namespace k8s.io content fetch "${img}" --user AWS:${ecr_password} From b95c3e671fdbc82a4651968335e69205af70b549 Mon Sep 17 00:00:00 2001 From: Victor Pineda Gonzalez Date: Wed, 11 Jan 2023 11:09:06 -0800 Subject: [PATCH 139/232] Use external cloud provider for EKS Local deployments (#1111) --- files/bootstrap.sh | 10 ++++++++++ files/kubelet-containerd.service | 2 +- files/kubelet.service | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 51a12405b..52bf5847d 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -376,8 +376,13 @@ if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" fi + ### For Local Outpost deployments, we will use the the external cloud provider + KUBELET_CLOUD_PROVIDER="external" else sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig + + ### For any other type of deployment we will use the aws cloud provider for backwards compatibility + KUBELET_CLOUD_PROVIDER="aws" fi ### kubelet.service configuration @@ -462,6 +467,11 @@ cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' EOF +cat << EOF > /etc/systemd/system/kubelet.service.d/20-kubelet-cloud-provider.conf +[Service] +Environment='KUBELET_CLOUD_PROVIDER=$KUBELET_CLOUD_PROVIDER' +EOF + if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf [Service] diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index b01a5bf15..74b30f70a 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -6,7 +6,7 @@ Requires=containerd.service sandbox-image.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ diff --git a/files/kubelet.service b/files/kubelet.service index 387470da1..c650d491a 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -6,7 +6,7 @@ Requires=docker.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ From d0baa7af2561f18777cb5ecc484cb11220f59e05 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Wed, 11 Jan 2023 11:10:55 -0800 Subject: [PATCH 140/232] Remove 1.20 (#1147) --- Makefile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 71c410343..1f74fd28d 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.20 1.21 1.22 1.23 1.24 ## Build all versions of EKS Optimized AL2 AMI +all: 1.21 1.22 1.23 1.24 ## Build all versions of EKS Optimized AL2 AMI # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -80,10 +80,6 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.20 -1.20: ## Build EKS Optimized AL2 AMI - K8s 1.20 - $(MAKE) k8s kubernetes_version=1.20.15 kubernetes_build_date=2022-10-31 pull_cni_from_github=true - .PHONY: 1.21 1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-10-31 pull_cni_from_github=true From b798b4fe508dc359bca9e8ffb375b00d6723cc03 Mon Sep 17 00:00:00 2001 From: guessi Date: Thu, 12 Jan 2023 10:32:42 +0800 Subject: [PATCH 141/232] Clean up log collector SSM documents, README's (#1135) --- log-collector-script/README.md | 5 +- log-collector-script/linux/README.md | 137 ++++++++++-------- .../linux/eks-ssm-content.json | 118 +++++---------- log-collector-script/windows/README.md | 60 +++++--- .../windows/eks-ssm-content.json | 122 ++++++---------- 5 files changed, 205 insertions(+), 237 deletions(-) diff --git a/log-collector-script/README.md b/log-collector-script/README.md index 79951fa11..eda832408 100644 --- a/log-collector-script/README.md +++ b/log-collector-script/README.md @@ -1,2 +1,3 @@ -### EKS Logs Collector -This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. \ No newline at end of file +### EKS Logs Collector + +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index f634e271c..efa100fa5 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -1,6 +1,6 @@ -### EKS Logs Collector +### EKS Logs Collector (Linux) -This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. #### Usage @@ -8,7 +8,8 @@ At a high level, you run this script on your Kubernetes node, and it will collec * Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ -* Run this project as the root user: +* Run this project as the root user + ``` curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh sudo bash eks-log-collector.sh @@ -17,93 +18,113 @@ sudo bash eks-log-collector.sh Confirm if the tarball file was successfully created (it can be .tgz or .tar.gz) #### Retrieving the logs -Download the tarball using your favourite Secure Copy tool. + +Download the tarball using your favorite Secure Copy tool. #### Example output -The project can be used in normal or enable_debug(**Caution: enable_debug will prompt to confirm if we can restart Docker daemon which would kill running containers**). + +The project can be used in normal or enable_debug (**Caution: enable_debug will prompt to confirm if we can restart Docker daemon which would kill running containers**). ``` -# sudo bash eks-log-collector.sh --help +$ sudo bash eks-log-collector.sh --help + USAGE: eks-log-collector --help [ --ignore_introspection=true|false --ignore_metrics=true|false ] OPTIONS: - --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI - - --ignore_metrics To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI - --help Show this help message. + --ignore_introspection To ignore introspection of IPAMD; Pass this flag if DISABLE_INTROSPECTION is enabled on CNI -Example to Ignore IPAMD introspection: -sudo bash eks-log-collector.sh --ignore_introspection=true + --ignore_metrics Variable To ignore prometheus metrics collection; Pass this flag if DISABLE_METRICS enabled on CNI -Example to Ignore IPAMD Prometheus metrics collection: -sudo bash eks-log-collector.sh --ignore_metrics=true - -Example to Ignore IPAMD introspection and Prometheus metrics collection: -sudo bash eks-log-collector.sh --ignore_introspection=true --ignore_metrics=true + --help Show this help message. ``` + #### Example output in normal mode + The following output shows this project running in normal mode. ``` -sudo bash eks-log-collector.sh - - This is version 0.6.1. New versions can be found at https://github.com/awslabs/amazon-eks-ami - -Trying to collect common operating system logs... -Trying to collect kernel logs... -Trying to collect mount points and volume information... -Trying to collect SELinux status... -Trying to collect iptables information... -Trying to collect installed packages... -Trying to collect active system services... -Trying to collect Docker daemon information... -Trying to collect kubelet information... -Trying to collect L-IPAMD information... -Trying to collect sysctls information... -Trying to collect networking infomation... -Trying to collect CNI configuration information... -Trying to collect running Docker containers and gather container data... -Trying to collect Docker daemon logs... -Trying to archive gathered information... - - Done... your bundled logs are located in /var/log/eks_i-0717c9d54b6cfaa19_2020-03-24_0103-UTC_0.6.1.tar.gz +$ sudo bash eks-log-collector.sh + + This is version 0.7.3. New versions can be found at https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/ + +Trying to collect common operating system logs... +Trying to collect kernel logs... +Trying to collect mount points and volume information... +Trying to collect SELinux status... +Trying to collect iptables information... +Trying to collect installed packages... +Trying to collect active system services... +Trying to Collect Containerd daemon information... +Trying to Collect Containerd running information... +Trying to Collect Docker daemon information... + + Warning: The Docker daemon is not running. + +Trying to collect kubelet information... +Trying to collect L-IPAMD introspection information... Trying to collect L-IPAMD prometheus metrics... Trying to collect L-IPAMD checkpoint... +Trying to collect Multus logs if they exist... +Trying to collect sysctls information... +Trying to collect networking infomation... conntrack v1.4.4 (conntrack-tools): 165 flow entries have been shown. + +Trying to collect CNI configuration information... +Trying to collect Docker daemon logs... +Trying to Collect sandbox-image daemon information... +Trying to Collect CPU Throttled Process Information... +Trying to Collect IO Throttled Process Information... +Trying to archive gathered information... + + Done... your bundled logs are located in /var/log/eks_i-XXXXXXXXXXXXXXXXX_2022-12-19_1639-UTC_0.7.3.tar.gz ``` +### Collect EKS logs using SSM agent + +#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps -### Collect EKS logs using SSM agent -#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps +##### Prerequisites -##### *Prerequisites*: +* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm create-document`, `aws ssm send-command` and `aws ssm get-command-invocation` commands. -* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm send-command` and `get-command-invocation` commands. + * `ssm:CreateDocument` + * `ssm:GetCommandInvocation` + * `ssm:SendCommand` * SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. -        *Note:* For more granular control of the IAM permission check [AWS Systems Manager Permissions link ](https://docs.aws.amazon.com/systems-manager/latest/userguide/auth-and-access-control-permissions-reference.html) +*Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) * A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. +#### To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s) -#### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* +1. Create the SSM document named "EKSLogCollector" using the following commands: -1. Create the SSM document named "EKSLogCollector" using the following commands:
``` -curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json -aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content file://eks-ssm-content.json +aws ssm create-document \ + --name "EKSLogCollectorLinux" \ + --document-type "Command" \ + --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json ``` -2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
-``` -aws ssm send-command --instance-ids --document-name "EKSLogCollector" --parameters "bucketName=" --output json + +2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command: + ``` -3. To check the status of SSM command submitted in previous step use the command
-``` -aws ssm get-command-invocation --command-id "" --instance-id "" --output text +aws ssm send-command \ + --instance-ids \ + --document-name "EKSLogCollectorLinux" \ + --parameters "bucketName=" \ + --output json ``` -    `SSM command ID`One of the response parameters after running `aws ssm send-command` in step2
-    `EC2 Instance ID`The EC2 Instance ID provided in the `aws ssm send-command` in step2 -4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. +3. To check the status of SSM command submitted in previous step use the command + +``` +aws ssm get-command-invocation \ + --command-id "" \ + --instance-id "" \ + --output text +``` +4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. diff --git a/log-collector-script/linux/eks-ssm-content.json b/log-collector-script/linux/eks-ssm-content.json index a830f5068..42caf4503 100644 --- a/log-collector-script/linux/eks-ssm-content.json +++ b/log-collector-script/linux/eks-ssm-content.json @@ -1,83 +1,43 @@ { - "schemaVersion": "2.2", - "description": "EKS Log Collector", - "parameters": { - "bucketName": { - "type": "String", - "default": "Enabled" - } - }, - "mainSteps": [ - { - "action": "aws:runShellScript", - "name": "PatchLinux", - "precondition": { - "StringEquals": [ - "platformType", - "Linux" - ] - }, - "inputs": { - "runCommand": [ - "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", - "echo \"Cleaning old eks-log-collector files\"", - "rm /var/log/eks_i*", - "bash ./eks-log-collector.sh >/dev/null 2>&1", - "echo \"EKS logs collected\"", - "if [ -f /usr/local/bin/aws ]; then", - "echo \"AWS_already_installed\"", - "else", - "echo \"Installing AWSCLI\"", - "curl \"https://s3.amazonaws.com/aws-cli/awscli-bundle.zip\" -o \"awscli-bundle.zip\" >/dev/null 2>&1", - "yum install unzip -y >/dev/null 2>&1", - "unzip awscli-bundle.zip >/dev/null 2>&1", - "./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws", - "echo \"AWSCLI version is\"", - "/usr/local/bin/aws --version", - "fi", - "echo \"Pushing to S3\"", - "/usr/local/bin/aws s3 cp /var/log/eks_i* s3://{{bucketName}}", - "echo \"Logs uploaded to S3\"" - ] - } + "schemaVersion": "2.2", + "description": "EKS Log Collector", + "parameters": { + "bucketName": { + "type": "String", + "default": "Enabled" + } + }, + "mainSteps": [ + { + "action": "aws:runShellScript", + "name": "PatchLinux", + "precondition": { + "StringEquals": ["platformType", "Linux"] }, - { - "precondition": { - "StringEquals": [ - "platformType", - "Windows" - ] - }, - "action": "aws:runPowerShellScript", - "name": "PatchWindows", - "inputs": { - "runCommand": [ - "if (!(Get-Module 'AWSPowerShell')) { ", - " Write-Host 'AWSPowerShell does not exist' ", - " Install-Module -Name AWSPowerShell -Force ", - "} ", - "try { ", - " Write-Host 'Downloading EKS Log collector script' ", - " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", - "} ", - "catch { ", - " Write-Host 'Uploaded to S3 failed' ", - " break ", - "} ", - "powershell .\\eks-log-collector.ps1", - "try { ", - " Write-Host 'Pushing to S3' ", - " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", - " Write-Host 'Logs uploaded to S3' ", - "} ", - "catch { ", - " Write-Host 'Uploaded to S3 failed' ", - " break ", - "} ", - "", - "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " - ] - } + "inputs": { + "runCommand": [ + "curl -s -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", + "echo \"* Cleaning old eks-log-collector files\"", + "rm -vf /var/log/eks_i*", + "bash ./eks-log-collector.sh >/dev/null 2>&1", + "echo \"* EKS logs collected\"", + "if [ -f /bin/aws ]; then", + "echo \"* AWS CLI v2 already installed\"", + "else", + "echo \"* Installing AWS CLI v2\"", + "AWSCLI_DIR=$(mktemp -d)", + "curl \"https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip\" -o \"${AWSCLI_DIR}/awscliv2.zip\" >/dev/null 2>&1", + "yum install unzip -y >/dev/null 2>&1", + "unzip -q ${AWSCLI_DIR}/awscliv2.zip -d ${AWSCLI_DIR} >/dev/null 2>&1", + "${AWSCLI_DIR}/aws/install --bin-dir /bin --update", + "/bin/aws --version", + "fi", + "echo \"* Bucket name: s3://{{bucketName}}\"", + "echo \"* Pushing to S3\"", + "/bin/aws s3 cp /var/log/eks_i* s3://{{bucketName}}", + "echo \"* Logs uploaded to S3\"" + ] } - ] + } + ] } diff --git a/log-collector-script/windows/README.md b/log-collector-script/windows/README.md index b9a1612b8..374a4053b 100644 --- a/log-collector-script/windows/README.md +++ b/log-collector-script/windows/README.md @@ -1,23 +1,28 @@ -### EKS Logs Collector +### EKS Logs Collector (Windows) -This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. +This project was created to collect Amazon EKS log files and OS logs for troubleshooting Amazon EKS customer support cases. #### Usage + * Collect EKS logs using SSM agent, jump to below [section](#collect-eks-logs-using-ssm-agent) _(or)_ * Run this project as the Administrator user: + ``` Invoke-WebRequest -OutFile eks-log-collector.ps1 https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1 .\eks-log-collector.ps1 ``` #### Example output + The project can be used in normal or Enable/Disable Debug(**Caution: Enable/Disable Debug will restart Docker daemon which would kill running containers**). ``` USAGE: .\eks-log-collector.ps1 ``` + #### Example output in normal mode + The following output shows this project running in normal mode. ``` @@ -65,39 +70,54 @@ Archiving gathered data Done... your bundled logs are located in C:\log-collector\eks_i-0b318f704c74b6ab2_20200101T0620179658Z.zip ``` +### Collect EKS logs using SSM agent + +#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps -### Collect EKS logs using SSM agent -#### To run EKS log collector script on Worker Node(s) and upload the bundle(tar) to a S3 Bucket using SSM agent, please follow below steps +##### Prerequisites -##### *Prerequisites*: +* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm create-document`, `aws ssm send-command` and `aws ssm get-command-invocation` commands. -* Configure AWS CLI on the system where you will run the below commands. The IAM entity (User/Role) should have permissions to run/invoke `aws ssm send-command` and `get-command-invocation` commands. + * `ssm:CreateDocument` + * `ssm:GetCommandInvocation` + * `ssm:SendCommand` * SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. -        *Note:* For more granular control of the IAM permission check [AWS Systems Manager Permissions link ](https://docs.aws.amazon.com/systems-manager/latest/userguide/auth-and-access-control-permissions-reference.html) +*Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) * A S3 bucket location is required which is taken as an input parameter to `aws ssm send-command` command, to which the logs should be pushed. +#### To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s) -#### *To invoke SSM agent to run EKS log collector script and push bundle to S3 from Worker Node(s):* +1. Create the SSM document named "EKSLogCollector" using the following commands: -1. Create the SSM document named "EKSLogCollector" using the following command:
``` -aws ssm create-document --name "EKSLogCollector" --document-type "Command" --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-ssm-content.json +aws ssm create-document \ + --name "EKSLogCollectorWindows" \ + --document-type "Command" \ + --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-ssm-content.json ``` -2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command:
-``` -aws ssm send-command --instance-ids --document-name "EKSLogCollector" --parameters "bucketName=" --output json + +2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command: + ``` -3. To check the status of SSM command submitted in previous step use the command
-``` -aws ssm get-command-invocation --command-id "" --instance-id "" --output text +aws ssm send-command \ + --instance-ids \ + --document-name "EKSLogCollectorWindows" \ + --parameters "bucketName=" \ + --output json ``` -    `SSM command ID`One of the response parameters after running `aws ssm send-command` in step2
-    `EC2 Instance ID`The EC2 Instance ID provided in the `aws ssm send-command` in step2 -4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. +3. To check the status of SSM command submitted in previous step use the command + +``` +aws ssm get-command-invocation \ + --command-id "" \ + --instance-id "" \ + --output text +``` +4. Once the above command is executed successfully, the logs should be present in the S3 bucket specified in the previous step. diff --git a/log-collector-script/windows/eks-ssm-content.json b/log-collector-script/windows/eks-ssm-content.json index c2f4f4ed5..a3d6360a0 100644 --- a/log-collector-script/windows/eks-ssm-content.json +++ b/log-collector-script/windows/eks-ssm-content.json @@ -1,81 +1,47 @@ { - "schemaVersion":"2.2", - "description":"EKS Log Collector", - "parameters":{ - "bucketName":{ - "type": "String", - "default": "Enabled" - } - }, - "mainSteps":[ - { - "action": "aws:runShellScript", - "name": "PatchLinux", - "precondition": { - "StringEquals": [ - "platformType", - "Linux" - ] - }, - "inputs": { - "runCommand": [ - "curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-log-collector.sh", - "bash ./eks-log-collector.sh >/dev/null 2>&1", - "echo \"EKS logs collected\"", - "if [ -f /usr/local/bin/aws ]; then", - "echo \"AWS_already_installed\"", - "else", - "echo \"Installing AWSCLI\"", - "curl \"https://s3.amazonaws.com/aws-cli/awscli-bundle.zip\" -o \"awscli-bundle.zip\" >/dev/null 2>&1", - "yum install unzip -y >/dev/null 2>&1", - "unzip awscli-bundle.zip >/dev/null 2>&1", - "./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws", - "echo \"AWSCLI version is\"", - "/usr/local/bin/aws --version", - "fi", - "echo \"Pushing to S3\"", - "/usr/local/bin/aws s3 cp --recursive /opt/log-collector/ s3://{{bucketName}}", - "echo \"Logs uploaded to S3\"" - ] - } + "schemaVersion": "2.2", + "description": "EKS Log Collector", + "parameters": { + "bucketName": { + "type": "String", + "default": "Enabled" + } + }, + "mainSteps": [ + { + "precondition": { + "StringEquals": ["platformType", "Windows"] }, - { - "precondition": { - "StringEquals": [ - "platformType", - "Windows" - ] - }, - "action": "aws:runPowerShellScript", - "name": "PatchWindows", - "inputs": { - "runCommand": [ - "if (!(Get-Module 'AWSPowerShell')) { ", - " Write-Host 'AWSPowerShell does not exist' ", - " Install-Module -Name AWSPowerShell -Force ", - "} ", - "try { ", - " Write-Host 'Downloading EKS Log collector script' ", - " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", - "} ", - "catch { ", - " Write-Host 'Uploaded to S3 failed' ", - " break ", - "} ", - "powershell .\\eks-log-collector.ps1", - "try { ", - " Write-Host 'Pushing to S3' ", - " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", - " Write-Host 'Logs uploaded to S3' ", - "} ", - "catch { ", - " Write-Host 'Uploaded to S3 failed' ", - " break ", - "} ", - "", - "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " - ] - } + "action": "aws:runPowerShellScript", + "name": "PatchWindows", + "inputs": { + "runCommand": [ + "if (!(Get-Module 'AWSPowerShell')) { ", + " Write-Host 'AWSPowerShell does not exist' ", + " Install-Module -Name AWSPowerShell -Force ", + "} ", + "try { ", + " Write-Host 'Downloading EKS Log collector script' ", + " Invoke-WebRequest -UseBasicParsing 'https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/windows/eks-log-collector.ps1' -OutFile eks-log-collector.ps1 ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "powershell .\\eks-log-collector.ps1", + "try { ", + " Write-Host 'Pushing to S3' ", + " Write-S3Object -BucketName {{bucketName}} -Folder C:\\log-collector -KeyPrefix eks-log-collector\\ -SearchPattern *.zip ", + " Write-Host 'Logs uploaded to S3' ", + "} ", + "catch { ", + " Write-Host 'Uploaded to S3 failed' ", + " break ", + "} ", + "", + "Remove-Item -Force .\\eks-log-collector.ps1 -ErrorAction Ignore " + ] } - ] -} \ No newline at end of file + } + ] +} From c0318dae76b5cef29e506b2a6b1cd9a1ee2afb13 Mon Sep 17 00:00:00 2001 From: Steve Hipwell Date: Thu, 26 Jan 2023 19:09:15 +0000 Subject: [PATCH 142/232] Use runtime.slice for containerd runtime (#1051) --- files/bootstrap.sh | 14 +++++++++++--- files/kubelet-containerd.service | 3 +++ files/runtime.slice | 4 ++++ scripts/install-worker.sh | 6 ++++++ 4 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 files/runtime.slice diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 52bf5847d..545ddd1e3 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -490,12 +490,20 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo mkdir -p /etc/containerd sudo mkdir -p /etc/cni/net.d - if [[ -n "$CONTAINERD_CONFIG_FILE" ]]; then - sudo cp -v $CONTAINERD_CONFIG_FILE /etc/eks/containerd/containerd-config.toml + + sudo mkdir -p /etc/systemd/system/containerd.service.d + printf '[Service]\nSlice=runtime.slice\n' | sudo tee /etc/systemd/system/containerd.service.d/00-runtime-slice.conf + + if [[ -n "${CONTAINERD_CONFIG_FILE}" ]]; then + sudo cp -v "${CONTAINERD_CONFIG_FILE}" /etc/eks/containerd/containerd-config.toml fi - echo "$(jq '.cgroupDriver="systemd"' $KUBELET_CONFIG)" > $KUBELET_CONFIG + sudo sed -i s,SANDBOX_IMAGE,$PAUSE_CONTAINER,g /etc/eks/containerd/containerd-config.toml + echo "$(jq '.cgroupDriver="systemd"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + echo "$(jq '.systemReservedCgroup="/system"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + echo "$(jq '.kubeReservedCgroup="/runtime"' "${KUBELET_CONFIG}")" > "${KUBELET_CONFIG}" + # Check if the containerd config file is the same as the one used in the image build. # If different, then restart containerd w/ proper config if ! cmp -s /etc/eks/containerd/containerd-config.toml /etc/containerd/config.toml; then diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index 74b30f70a..72b58f8b8 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -5,6 +5,7 @@ After=containerd.service sandbox-image.service Requires=containerd.service sandbox-image.service [Service] +Slice=runtime.slice ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ --config /etc/kubernetes/kubelet/kubelet-config.json \ @@ -17,6 +18,8 @@ Restart=on-failure RestartForceExitStatus=SIGPIPE RestartSec=5 KillMode=process +CPUAccounting=true +MemoryAccounting=true [Install] WantedBy=multi-user.target diff --git a/files/runtime.slice b/files/runtime.slice new file mode 100644 index 000000000..5e189639a --- /dev/null +++ b/files/runtime.slice @@ -0,0 +1,4 @@ +[Unit] +Description=Kubernetes and container runtime slice +Documentation=man:systemd.special(7) +Before=slices.target diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index cf1008410..ee59a90e4 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -141,6 +141,12 @@ else sudo yum install -y awscli fi +################################################################################ +### systemd #################################################################### +################################################################################ + +sudo mv "${TEMPLATE_DIR}/runtime.slice" /etc/systemd/system/runtime.slice + ############################################################################### ### Containerd setup ########################################################## ############################################################################### From d498b68df8ae61361782595275bb9f121b2d67b3 Mon Sep 17 00:00:00 2001 From: Jeffrey Nelson Date: Thu, 26 Jan 2023 17:02:28 -0600 Subject: [PATCH 143/232] Update max pods values (#1153) --- files/eni-max-pods.txt | 52 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index a8de14260..889e6b339 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2022-09-21T13:34:09-07:00 +# This file was generated at 2023-01-25T20:06:57Z # # The regions queried were: # - ap-northeast-1 @@ -158,6 +158,15 @@ c6id.8xlarge 234 c6id.large 29 c6id.metal 737 c6id.xlarge 58 +c6in.12xlarge 234 +c6in.16xlarge 737 +c6in.24xlarge 737 +c6in.2xlarge 58 +c6in.32xlarge 345 +c6in.4xlarge 234 +c6in.8xlarge 234 +c6in.large 29 +c6in.xlarge 58 c7g.12xlarge 234 c7g.16xlarge 737 c7g.2xlarge 58 @@ -209,7 +218,7 @@ g5.12xlarge 737 g5.16xlarge 234 g5.24xlarge 737 g5.2xlarge 58 -g5.48xlarge 737 +g5.48xlarge 345 g5.4xlarge 234 g5.8xlarge 234 g5.xlarge 58 @@ -224,6 +233,7 @@ h1.2xlarge 58 h1.4xlarge 234 h1.8xlarge 234 hpc6a.48xlarge 100 +hpc6id.32xlarge 51 hs1.8xlarge 234 i2.2xlarge 58 i2.4xlarge 234 @@ -393,6 +403,24 @@ m6id.8xlarge 234 m6id.large 29 m6id.metal 737 m6id.xlarge 58 +m6idn.12xlarge 234 +m6idn.16xlarge 737 +m6idn.24xlarge 737 +m6idn.2xlarge 58 +m6idn.32xlarge 345 +m6idn.4xlarge 234 +m6idn.8xlarge 234 +m6idn.large 29 +m6idn.xlarge 58 +m6in.12xlarge 234 +m6in.16xlarge 737 +m6in.24xlarge 737 +m6in.2xlarge 58 +m6in.32xlarge 345 +m6in.4xlarge 234 +m6in.8xlarge 234 +m6in.large 29 +m6in.xlarge 58 mac1.metal 234 mac2.metal 234 p2.16xlarge 234 @@ -525,6 +553,24 @@ r6id.8xlarge 234 r6id.large 29 r6id.metal 737 r6id.xlarge 58 +r6idn.12xlarge 234 +r6idn.16xlarge 737 +r6idn.24xlarge 737 +r6idn.2xlarge 58 +r6idn.32xlarge 345 +r6idn.4xlarge 234 +r6idn.8xlarge 234 +r6idn.large 29 +r6idn.xlarge 58 +r6in.12xlarge 234 +r6in.16xlarge 737 +r6in.24xlarge 737 +r6in.2xlarge 58 +r6in.32xlarge 345 +r6in.4xlarge 234 +r6in.8xlarge 234 +r6in.large 29 +r6in.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 @@ -558,7 +604,9 @@ trn1.2xlarge 58 trn1.32xlarge 247 u-12tb1.112xlarge 737 u-12tb1.metal 147 +u-18tb1.112xlarge 737 u-18tb1.metal 737 +u-24tb1.112xlarge 737 u-24tb1.metal 737 u-3tb1.56xlarge 234 u-6tb1.112xlarge 737 From 292239bb94297daa539f3413f54eb9433f4db3d4 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 31 Jan 2023 10:50:27 -0800 Subject: [PATCH 144/232] Skip docker installation for Kubernetes 1.25+ (#1157) --- scripts/install-worker.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index ee59a90e4..c68af36cf 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -203,7 +203,12 @@ EOF sudo yum install -y device-mapper-persistent-data lvm2 -INSTALL_DOCKER="${INSTALL_DOCKER:-true}" +if [[ ! -v "INSTALL_DOCKER" ]]; then + INSTALL_DOCKER=$(vercmp "$KUBERNETES_VERSION" lt "1.25.0" || true) +else + echo "WARNING: using override INSTALL_DOCKER=${INSTALL_DOCKER}. This option is deprecated and will be removed in a future release." +fi + if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo amazon-linux-extras enable docker sudo groupadd -og 1950 docker From 7f022ffd60d841c2123103b65831cc2af7b63616 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Tue, 31 Jan 2023 13:47:30 -0800 Subject: [PATCH 145/232] Update kubelet version and build date to 2023-01-11 (#1160) --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 1f74fd28d..3d25473cd 100644 --- a/Makefile +++ b/Makefile @@ -82,19 +82,19 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.21 1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 - $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2023-01-11 pull_cni_from_github=true .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.15 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-11 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.13 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.15 kubernetes_build_date=2023-01-11 pull_cni_from_github=true .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.7 kubernetes_build_date=2022-10-31 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.9 kubernetes_build_date=2023-01-11 pull_cni_from_github=true .PHONY: help help: ## Display help From 1eed59e5b02dccc2796255fe2414f9d68bcd29bb Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 31 Jan 2023 15:11:58 -0800 Subject: [PATCH 146/232] Uses IMDSv2 in log collector script (#1163) --- log-collector-script/linux/eks-log-collector.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index e7d45f9d1..50f759461 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -177,6 +177,9 @@ systemd_check() { fi } +# Get token for IMDSv2 calls +IMDS_TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 360") + create_directories() { # Make sure the directory the script lives in is there. Not an issue if # the EKS AMI is used, as it will have it. @@ -195,7 +198,7 @@ get_instance_id() { cp ${INSTANCE_ID_FILE} "${COLLECT_DIR}"/system/instance-id.txt readonly INSTANCE_ID=$(cat "${COLLECT_DIR}"/system/instance-id.txt) else - readonly INSTANCE_ID=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) + readonly INSTANCE_ID=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/instance-id) if [ 0 -eq $? ]; then # Check if previous command was successful. echo "${INSTANCE_ID}" > "${COLLECT_DIR}"/system/instance-id.txt else @@ -205,13 +208,13 @@ get_instance_id() { } get_region() { - if REGION=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/region); then + if REGION=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/region); then echo "${REGION}" > "${COLLECT_DIR}"/system/region.txt else warning "Unable to find EC2 Region, skipping." fi - if AZ=$(curl -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/availability-zone); then + if AZ=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -f -s --max-time 10 --retry 5 http://169.254.169.254/latest/meta-data/placement/availability-zone); then echo "${AZ}" > "${COLLECT_DIR}"/system/availability-zone.txt else warning "Unable to find EC2 AZ, skipping." From 343e830dc4cbf0b2646026fced4cea7202a345a5 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Wed, 1 Feb 2023 18:01:09 -0800 Subject: [PATCH 147/232] update CHANGELOG for AMI Release v20230127 (#1165) * Update CHANGELOG for AMI Release v20230127 * Update CHANGELOG for AMI Release v20230127 --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c2d77b6f..760784636 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +### AMI Release v20230127 +* amazon-eks-gpu-node-1.24-v20230127 +* amazon-eks-gpu-node-1.23-v20230127 +* amazon-eks-gpu-node-1.22-v20230127 +* amazon-eks-gpu-node-1.21-v20230127 +* amazon-eks-arm64-node-1.24-v20230127 +* amazon-eks-arm64-node-1.23-v20230127 +* amazon-eks-arm64-node-1.22-v20230127 +* amazon-eks-arm64-node-1.21-v20230127 +* amazon-eks-node-1.24-v20230127 +* amazon-eks-node-1.23-v20230127 +* amazon-eks-node-1.22-v20230127 +* amazon-eks-node-1.21-v20230127 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.9-20230127` +* `1.23.15-20230127` +* `1.22.17-20230127` +* `1.21.14-20230127` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.9/2023-01-11/ +* s3://amazon-eks/1.23.15/2023-01-11/ +* s3://amazon-eks/1.22.17/2023-01-11/ +* s3://amazon-eks/1.21.14/2023-01-11/ + +AMI details: +* kernel: 5.4.228-131.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +- Updated kernel version to `5.4.228-131.415.amzn2` for: + - [ALAS2KERNEL-5.4-2023-041](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-041.html). +- Add support for `C6in`, `M6in`, `M6idn`, `R6in`, `R6idn` and `Hpc6id` instances [#1153](https://github.com/awslabs/amazon-eks-ami/pull/1153) +- This is the first AMI release available in `ap-south-2`, `eu-central-2`, and `eu-south-2`. +- Cache image content without unpacking/snapshotting [#1144](https://github.com/awslabs/amazon-eks-ami/pull/1144) + - Container image caching has been re-enabled for 1.24 AMI's. + +Minor changes: +- Update AWS CLI to `2.9.18` +- Configure containerd registry certificates by default in the GPU AMI. + ### AMI Release v20230105 * amazon-eks-gpu-node-1.24-v20230105 * amazon-eks-gpu-node-1.23-v20230105 From aafd2c6dfbb7c1992be9ff64edee7c74d2cd0b23 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Thu, 2 Feb 2023 14:51:12 -0800 Subject: [PATCH 148/232] Add C7g.metal, M7g, R7g instance (#1166) --- files/eni-max-pods.txt | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 889e6b339..6bef4aed6 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2023-01-25T20:06:57Z +# This file was generated at 2023-02-02T10:15:26-08:00 # # The regions queried were: # - ap-northeast-1 @@ -421,6 +421,15 @@ m6in.4xlarge 234 m6in.8xlarge 234 m6in.large 29 m6in.xlarge 58 +m7g.12xlarge 234 +m7g.16xlarge 737 +m7g.2xlarge 58 +m7g.4xlarge 234 +m7g.8xlarge 234 +m7g.large 29 +m7g.medium 8 +m7g.metal 737 +m7g.xlarge 58 mac1.metal 234 mac2.metal 234 p2.16xlarge 234 @@ -571,6 +580,15 @@ r6in.4xlarge 234 r6in.8xlarge 234 r6in.large 29 r6in.xlarge 58 +r7g.12xlarge 234 +r7g.16xlarge 737 +r7g.2xlarge 58 +r7g.4xlarge 234 +r7g.8xlarge 234 +r7g.large 29 +r7g.medium 8 +r7g.metal 737 +r7g.xlarge 58 t1.micro 4 t2.2xlarge 44 t2.large 35 From 4b9b546dc325e6372e705f1e192f68395ce017db Mon Sep 17 00:00:00 2001 From: Victor Pineda Gonzalez Date: Thu, 2 Feb 2023 16:04:16 -0800 Subject: [PATCH 149/232] Revert "Use external cloud provider for EKS Local deployments" (#1167) This reverts commit 2297119cee8466d0c4516e775cd030bcf01971bd. --- files/bootstrap.sh | 10 ---------- files/kubelet-containerd.service | 2 +- files/kubelet.service | 2 +- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 545ddd1e3..f24595cc6 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -376,13 +376,8 @@ if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then mv /var/lib/kubelet/kubeconfig /var/lib/kubelet/bootstrap-kubeconfig KUBELET_EXTRA_ARGS="--bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig $KUBELET_EXTRA_ARGS" fi - ### For Local Outpost deployments, we will use the the external cloud provider - KUBELET_CLOUD_PROVIDER="external" else sed -i s,CLUSTER_NAME,$CLUSTER_NAME,g /var/lib/kubelet/kubeconfig - - ### For any other type of deployment we will use the aws cloud provider for backwards compatibility - KUBELET_CLOUD_PROVIDER="aws" fi ### kubelet.service configuration @@ -467,11 +462,6 @@ cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' EOF -cat << EOF > /etc/systemd/system/kubelet.service.d/20-kubelet-cloud-provider.conf -[Service] -Environment='KUBELET_CLOUD_PROVIDER=$KUBELET_CLOUD_PROVIDER' -EOF - if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf [Service] diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index 72b58f8b8..d043b30b9 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -7,7 +7,7 @@ Requires=containerd.service sandbox-image.service [Service] Slice=runtime.slice ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ +ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ diff --git a/files/kubelet.service b/files/kubelet.service index c650d491a..387470da1 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -6,7 +6,7 @@ Requires=docker.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider $KUBELET_CLOUD_PROVIDER \ +ExecStart=/usr/bin/kubelet --cloud-provider aws \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ From 3fb401098f20541c15a61e1e0937981f1e7ea077 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 3 Feb 2023 09:40:56 -0800 Subject: [PATCH 150/232] Add ALAS issue workflow (#1158) --- .github/workflows/alas-issues.yaml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/alas-issues.yaml diff --git a/.github/workflows/alas-issues.yaml b/.github/workflows/alas-issues.yaml new file mode 100644 index 000000000..d71611bdc --- /dev/null +++ b/.github/workflows/alas-issues.yaml @@ -0,0 +1,26 @@ +--- +name: "[ALAS] Open issues for new bulletins" +on: + workflow_dispatch: + inputs: + window: + description: "Only consider bulletins published within this relative time window (golang Duration)" + default: "24h" + required: true + schedule: + # once an hour, at the top of hour + - cron: "0 * * * *" +permissions: + issues: write +jobs: + alas-al2-bulletins: + runs-on: ubuntu-latest + steps: + - uses: guilhem/rss-issues-action@0.5.2 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" + feed: "https://alas.aws.amazon.com/AL2/alas.rss" + dry-run: "true" + lastTime: "${{ github.event.inputs.window || '24h' }}" + labels: "alas,alas/al2" + titleFilter: "(medium|low)" From ccf5b1525dfa7a17f5cae21f8d8395a6f21b45d8 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 6 Feb 2023 16:02:09 -0800 Subject: [PATCH 151/232] Upgrade 1.24 to kernel 5.10 (#1118) --- doc/USER_GUIDE.md | 11 ++++++++++- eks-worker-al2.json | 27 +++++++++++++++++---------- scripts/install-worker.sh | 7 ------- scripts/upgrade_kernel.sh | 9 ++++++--- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index 0ed73532b..c565cff4d 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -249,7 +249,16 @@ $ curl -sSL "http://localhost:8001/api/v1/nodes/ip-192-168-92-220.us-east-2.comp By default, the `amazon-eks-ami` uses a [source_ami_filter](https://github.com/awslabs/amazon-eks-ami/blob/e3f1b910f83ad1f27e68312e50474ea6059f052d/eks-worker-al2.json#L46) that selects the latest [hvm](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html) AL2 AMI for the given architecture as the base AMI. For more information on what kernel versions are running on published Amazon EKS optimized Linux AMIs, see [the public documentation](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html). -When building an AMI, you can set the `kernel_version` to `4.14` or `5.4` to customize the kernel version. The [upgrade_kernel.sh script](https://github.com/awslabs/amazon-eks-ami/blob/master/scripts/upgrade_kernel.sh#L26) contains the logic for updating and upgrading the kernel. For Kubernetes versions 1.18 and below, it uses the `4.14` kernel if not set, and it will install the latest patches. For Kubernetes version 1.19 and above, it uses the `5.4` kernel if not set. +When building an AMI, you can set `kernel_version` to customize the kernel version. Valid values are: +- `4.14` +- `5.4` +- `5.10` + +If `kernel_version` is not set: +- For Kubernetes 1.23 and below, `5.4` is used. +- For Kubernetes 1.24 and above, `5.10` is used. + +The [upgrade_kernel.sh script](../scripts/upgrade_kernel.sh) contains the logic for updating and upgrading the kernel. --- diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 020758d36..873978909 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -119,16 +119,6 @@ "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" ] }, - { - "type": "shell", - "remote_folder": "{{ user `remote_folder`}}", - "expect_disconnect": true, - "script": "{{template_dir}}/scripts/upgrade_kernel.sh", - "environment_vars": [ - "KUBERNETES_VERSION={{user `kubernetes_version`}}", - "KERNEL_VERSION={{user `kernel_version`}}" - ] - }, { "type": "shell", "pause_before": "90s", @@ -147,6 +137,23 @@ "source": "{{template_dir}}/log-collector-script/linux/", "destination": "/tmp/worker/log-collector-script/" }, + { + "type": "shell", + "inline": [ + "sudo chmod -R a+x /tmp/worker/bin/", + "sudo mv /tmp/worker/bin/* /usr/bin/" + ] + }, + { + "type": "shell", + "remote_folder": "{{ user `remote_folder`}}", + "expect_disconnect": true, + "script": "{{template_dir}}/scripts/upgrade_kernel.sh", + "environment_vars": [ + "KUBERNETES_VERSION={{user `kubernetes_version`}}", + "KERNEL_VERSION={{user `kernel_version`}}" + ] + }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index c68af36cf..56524243b 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -48,13 +48,6 @@ else exit 1 fi -################################################################################ -### Utilities ################################################################## -################################################################################ - -sudo chmod -R a+x $TEMPLATE_DIR/bin/ -sudo mv $TEMPLATE_DIR/bin/* /usr/bin/ - ################################################################################ ### Packages ################################################################### ################################################################################ diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 5736d0f5c..134bd454b 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -5,9 +5,12 @@ set -o nounset set -o errexit if [[ -z "$KERNEL_VERSION" ]]; then - KERNEL_VERSION=5.4 - - echo "kernel_version is unset. Setting to $KERNEL_VERSION" + if vercmp "$KUBERNETES_VERSION" gteq "1.24.0"; then + KERNEL_VERSION=5.10 + else + KERNEL_VERSION=5.4 + fi + echo "kernel_version is unset. Setting to $KERNEL_VERSION based on Kubernetes version $KUBERNETES_VERSION." fi if [[ $KERNEL_VERSION == "4.14" ]]; then From 671ce3f9a50bb8b2a1ffd92b772dc5991d15a8d9 Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Tue, 31 Jan 2023 13:38:53 -0800 Subject: [PATCH 152/232] Enable PSI --- scripts/upgrade_kernel.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 134bd454b..85390785b 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -24,4 +24,9 @@ else exit 1 fi +# enable pressure stall information +sudo grubby \ + --update-kernel=ALL \ + --args="psi=1" + sudo reboot From 61f45dde61d7343f3fe1b0db22761adec9fff12f Mon Sep 17 00:00:00 2001 From: Carter McKinnon Date: Mon, 6 Feb 2023 18:52:30 -0800 Subject: [PATCH 153/232] Pause after kernel upgrade --- eks-worker-al2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 873978909..33c37ed38 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -121,7 +121,6 @@ }, { "type": "shell", - "pause_before": "90s", "remote_folder": "{{ user `remote_folder`}}", "inline": [ "mkdir -p /tmp/worker/log-collector-script/" @@ -148,6 +147,7 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "expect_disconnect": true, + "pause_after": "90s", "script": "{{template_dir}}/scripts/upgrade_kernel.sh", "environment_vars": [ "KUBERNETES_VERSION={{user `kubernetes_version`}}", From da96ebc848a5c6de3c49e22b4401b72a54668306 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Feb 2023 10:16:15 -0800 Subject: [PATCH 154/232] Add 1.25 make target (#1173) --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3d25473cd..c3c9cd455 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: all -all: 1.21 1.22 1.23 1.24 ## Build all versions of EKS Optimized AL2 AMI +all: 1.21 1.22 1.23 1.24 1.25 ## Build all versions of EKS Optimized AL2 AMI # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -96,6 +96,10 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 $(MAKE) k8s kubernetes_version=1.24.9 kubernetes_build_date=2023-01-11 pull_cni_from_github=true +.PHONY: 1.25 +1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 + $(MAKE) k8s kubernetes_version=1.25.5 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + .PHONY: help help: ## Display help @awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m\033[0m\n"} /^[\.a-zA-Z_0-9\-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) From ecc413a4d69b7cf14333b67a853b233a3c073dba Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Feb 2023 10:19:05 -0800 Subject: [PATCH 155/232] Exclude docker from 1.25+ AMI metadata (#1174) --- Makefile | 8 ++++++++ eks-worker-al2-variables.json | 1 + eks-worker-al2.json | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c3c9cd455..d52909999 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,14 @@ K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +# Docker is not present on 1.25+ AMI's +ifeq ($(shell $(MAKEFILE_DIR)/files/bin/vercmp "$(kubernetes_version)" gteq "1.25.0"), true) +# do not tag the AMI with the Docker version +docker_version ?= none +# do not include the Docker version in the AMI description +ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) +endif + arch ?= x86_64 ifeq ($(arch), arm64) instance_type ?= m6g.large diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 2685e5aa1..b83543f49 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -1,5 +1,6 @@ { "additional_yum_repos": "", + "ami_component_description": "(k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})", "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", "ami_regions": "", "ami_users": "", diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 33c37ed38..1f805c85b 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -2,6 +2,7 @@ "_comment": "All template variables are enumerated here; and most variables have a default value defined in eks-worker-al2-variables.json", "variables": { "additional_yum_repos": null, + "ami_component_description": null, "ami_description": null, "ami_name": null, "ami_regions": null, @@ -107,7 +108,7 @@ "cni_plugin_version": "{{ user `cni_plugin_version`}}" }, "ami_name": "{{user `ami_name`}}", - "ami_description": "{{ user `ami_description` }}, (k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})" + "ami_description": "{{ user `ami_description` }}, {{ user `ami_component_description` }}" } ], "provisioners": [ From 613785695a83cda7a9a785750e6e29ddbde72e4f Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 7 Feb 2023 11:16:22 -0800 Subject: [PATCH 156/232] Update CHANGELOG for release v20230203 (#1170) --- CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 760784636..8037f146d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +### AMI Release v20230203 +* amazon-eks-gpu-node-1.24-v20230203 +* amazon-eks-gpu-node-1.23-v20230203 +* amazon-eks-gpu-node-1.22-v20230203 +* amazon-eks-gpu-node-1.21-v20230203 +* amazon-eks-arm64-node-1.24-v20230203 +* amazon-eks-arm64-node-1.23-v20230203 +* amazon-eks-arm64-node-1.22-v20230203 +* amazon-eks-arm64-node-1.21-v20230203 +* amazon-eks-node-1.24-v20230203 +* amazon-eks-node-1.23-v20230203 +* amazon-eks-node-1.22-v20230203 +* amazon-eks-node-1.21-v20230203 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.24.9-20230203` +* `1.23.15-20230203` +* `1.22.17-20230203` +* `1.21.14-20230203` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.24.9/2023-01-11/ +* s3://amazon-eks/1.23.15/2023-01-11/ +* s3://amazon-eks/1.22.17/2023-01-11/ +* s3://amazon-eks/1.21.14/2023-01-11/ + +AMI details: +* kernel: 5.4.228-131.415.amzn2 +* dockerd: 20.10.17-1.amzn2.0.1 +* containerd: 1.6.6-1.amzn2.0.2 +* runc: 1.1.4-1.amzn2 +* cuda: 11.4.0-1 +* nvidia-container-runtime-hook: 1.4.0-1.amzn2 +* SSM agent: 3.1.1732.0-1.amzn2 + +Notable changes: +* Reverted [Use external cloud provider for EKS Local deployments](https://github.com/awslabs/amazon-eks-ami/commit/4b9b546dc325e6372e705f1e192f68395ce017db) + ### AMI Release v20230127 * amazon-eks-gpu-node-1.24-v20230127 * amazon-eks-gpu-node-1.23-v20230127 From 24f908124120b58bad54d023e71f33f345b98068 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Feb 2023 13:30:39 -0800 Subject: [PATCH 157/232] Allow any KERNEL_VERSION (#1175) --- scripts/upgrade_kernel.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 85390785b..67e509caa 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -15,13 +15,8 @@ fi if [[ $KERNEL_VERSION == "4.14" ]]; then sudo yum update -y kernel -elif [[ $KERNEL_VERSION == "5.4" ]]; then - sudo amazon-linux-extras install -y kernel-5.4 -elif [[ $KERNEL_VERSION == "5.10" ]]; then - sudo amazon-linux-extras install -y kernel-5.10 else - echo "$KERNEL_VERSION is not a valid kernel version" - exit 1 + sudo amazon-linux-extras install -y "kernel-${KERNEL_VERSION}" fi # enable pressure stall information From 58bdc867afbd7808a6f78ebb1141fa33264946fd Mon Sep 17 00:00:00 2001 From: Sichaow Date: Tue, 7 Feb 2023 16:19:25 -0800 Subject: [PATCH 158/232] Version lock kernel (#1177) * Version lock header * Version lock header * Version lock header --- scripts/install-worker.sh | 1 - scripts/upgrade_kernel.sh | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 56524243b..56ae744d1 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -68,7 +68,6 @@ sudo yum install -y \ socat \ unzip \ wget \ - yum-plugin-versionlock \ yum-utils # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 67e509caa..37946283a 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -24,4 +24,7 @@ sudo grubby \ --update-kernel=ALL \ --args="psi=1" +sudo yum install -y yum-plugin-versionlock +sudo yum versionlock kernel + sudo reboot From e32d36834383f4bdfd3876af059728f54e98f7a1 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Wed, 8 Feb 2023 14:24:41 -0800 Subject: [PATCH 159/232] Update max pods (#1178) --- files/eni-max-pods.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 6bef4aed6..d7aa1db4c 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2023-02-02T10:15:26-08:00 +# This file was generated at 2023-02-07T22:38:44Z # # The regions queried were: # - ap-northeast-1 @@ -643,6 +643,12 @@ x1e.32xlarge 234 x1e.4xlarge 58 x1e.8xlarge 58 x1e.xlarge 29 +x2ezn.12xlarge 737 +x2ezn.2xlarge 58 +x2ezn.4xlarge 234 +x2ezn.6xlarge 234 +x2ezn.8xlarge 234 +x2ezn.metal 737 x2gd.12xlarge 234 x2gd.16xlarge 737 x2gd.2xlarge 58 From 1f8bb313a21fbec2c536799557a3c742e1276323 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 15 Feb 2023 11:15:39 -0800 Subject: [PATCH 160/232] Add Action to generate eni-max-pods.txt (#1184) --- sync-eni-max-pods.yaml | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 sync-eni-max-pods.yaml diff --git a/sync-eni-max-pods.yaml b/sync-eni-max-pods.yaml new file mode 100644 index 000000000..2d8e22ed5 --- /dev/null +++ b/sync-eni-max-pods.yaml @@ -0,0 +1,40 @@ +name: '[Sync] Update eni-max-pods.txt' +on: + workflow_dispatch: + schedule: + # once a day + - cron: '0 0 * * *' +permissions: + id-token: write + contents: write + pull-requests: write +jobs: + update-max-pods: + runs-on: ubuntu-latest + steps: + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ secrets.AWS_REGION }} + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + - uses: actions/checkout@v3 + with: + repository: awslabs/amazon-eks-ami + ref: refs/heads/master + path: amazon-eks-ami/ + - uses: actions/checkout@v3 + with: + repository: aws/amazon-vpc-cni-k8s + ref: refs/heads/master + path: amazon-vpc-cni-k8s/ + - run: | + #!/usr/bin/env bash + set -o errexit + cd amazon-vpc-cni-k8s/ + make generate-limits + cp misc/eni-max-pods.txt ../amazon-eks-ami/files/eni-max-pods.txt + - uses: peter-evans/create-pull-request@v4 + with: + commit-message: "Update eni-max-pods.txt" + branch: update-eni-max-pods + path: amazon-eks-ami/ + add-paths: files/eni-max-pods.txt From 4319d222984c0b58e0c12e3c8b4bfe5e74c9cbf8 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 15 Feb 2023 11:21:24 -0800 Subject: [PATCH 161/232] Move eni-max-pods.txt Action to correct location (#1186) --- .../workflows/sync-eni-max-pods.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename sync-eni-max-pods.yaml => .github/workflows/sync-eni-max-pods.yaml (100%) diff --git a/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml similarity index 100% rename from sync-eni-max-pods.yaml rename to .github/workflows/sync-eni-max-pods.yaml From c242245b6c4a29ac44a405ef2d6949fa5f49dd7c Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Wed, 15 Feb 2023 12:54:27 -0800 Subject: [PATCH 162/232] Updating kubelet versions and build date to 1-30 (#1187) --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index d52909999..c76a0dc57 100644 --- a/Makefile +++ b/Makefile @@ -90,23 +90,23 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.21 1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 - $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.15 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.16 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.9 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.10 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.5 kubernetes_build_date=2023-01-11 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.25.6 kubernetes_build_date=2023-01-30 pull_cni_from_github=true .PHONY: help help: ## Display help From 3a6a0c8df2c2025801319fff562f7ea50835e167 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 16 Feb 2023 11:33:34 -0800 Subject: [PATCH 163/232] Set title and body of eni-max-pods PR's (#1190) --- .github/workflows/sync-eni-max-pods.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 2d8e22ed5..8610a3e27 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -34,7 +34,13 @@ jobs: cp misc/eni-max-pods.txt ../amazon-eks-ami/files/eni-max-pods.txt - uses: peter-evans/create-pull-request@v4 with: - commit-message: "Update eni-max-pods.txt" branch: update-eni-max-pods path: amazon-eks-ami/ add-paths: files/eni-max-pods.txt + commit-message: "Update eni-max-pods.txt" + title: "Update eni-max-pods.txt" + body: | + Generated by [aws/amazon-vpc-cni-k8s](https://github.com/aws/amazon-vpc-cni-k8s): + ``` + make generate-limits + ``` From 8b10a76977278e7afec85b51c326d17392565fd6 Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Fri, 17 Feb 2023 09:57:44 -0800 Subject: [PATCH 164/232] Updating CHANGELOG for 1.25 Release (#1188) * Updating CHANGELOG for 1.25 Release --- CHANGELOG.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8037f146d..0dc9fe521 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,59 @@ # Changelog +### AMI Release v20230211 +* amazon-eks-gpu-node-1.25-v20230211 +* amazon-eks-gpu-node-1.24-v20230211 +* amazon-eks-gpu-node-1.23-v20230211 +* amazon-eks-gpu-node-1.22-v20230211 +* amazon-eks-gpu-node-1.21-v20230211 +* amazon-eks-arm64-node-1.25-v20230211 +* amazon-eks-arm64-node-1.24-v20230211 +* amazon-eks-arm64-node-1.23-v20230211 +* amazon-eks-arm64-node-1.22-v20230211 +* amazon-eks-arm64-node-1.21-v20230211 +* amazon-eks-node-1.25-v20230211 +* amazon-eks-node-1.24-v20230211 +* amazon-eks-node-1.23-v20230211 +* amazon-eks-node-1.22-v20230211 +* amazon-eks-node-1.21-v20230211 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230211` +* `1.24.10-20230211` +* `1.23.16-20230211` +* `1.22.17-20230211` +* `1.21.14-20230211` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/2023-01-30/ +* s3://amazon-eks/1.24.10/2023-01-30/ +* s3://amazon-eks/1.23.16/2023-01-30/ +* s3://amazon-eks/1.22.17/2023-01-30/ +* s3://amazon-eks/1.21.14/2023-01-30/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.228-132.418.amzn2 + * Kubernetes 1.24 and above: 5.10.165-143.735.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- This is the first AMI release for Kubernetes 1.25. +- Kubernetes 1.24+ now use `kernel-5.10` for x86 and ARM AMIs. + - The GPU AMI will continue to use `kernel-5.4` as we work to address a compatibility issue with `nvidia-driver-latest-dkms`. +- The `kernel` package is now version-locked. + +Minor changes: +- Updated `eni-max-pods.txt` with new instance types. +- Allow `kernel_version` to be set to any value (such as `5.15`) when building a custom AMI. +- Fix a misconfiguration in the GPU AMI with `containerd`'s registry certificates. [#1168](https://github.com/awslabs/amazon-eks-ami/issues/1168). + ### AMI Release v20230203 * amazon-eks-gpu-node-1.24-v20230203 * amazon-eks-gpu-node-1.23-v20230203 From 2b75446ce362f7b698750836f9bfbc30e45c8fb0 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 17 Feb 2023 11:18:06 -0800 Subject: [PATCH 165/232] Version lock kernel after removing old kernels (#1191) --- scripts/install-worker.sh | 5 ++++- scripts/upgrade_kernel.sh | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 56ae744d1..e9a033b91 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -68,11 +68,14 @@ sudo yum install -y \ socat \ unzip \ wget \ - yum-utils + yum-utils \ + yum-plugin-versionlock # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" sudo package-cleanup --oldkernels --count=1 -y +sudo yum versionlock kernel-$(uname -r) + # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 37946283a..67e509caa 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -24,7 +24,4 @@ sudo grubby \ --update-kernel=ALL \ --args="psi=1" -sudo yum install -y yum-plugin-versionlock -sudo yum versionlock kernel - sudo reboot From 77ca8818683809a6d0bbb51b436bdb5094ff93a5 Mon Sep 17 00:00:00 2001 From: Saurav Agarwalla Date: Fri, 17 Feb 2023 17:12:00 -0400 Subject: [PATCH 166/232] Mark v20230211 as recalled (#1192) * Mark v20230211 as recalled v20230211 is being recalled due to an issue affecting Kernel upgrades. See https://github.com/awslabs/amazon-eks-ami/issues/1193 for more details. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dc9fe521..78ad94a38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -### AMI Release v20230211 +### [Recalled] AMI Release v20230211 * amazon-eks-gpu-node-1.25-v20230211 * amazon-eks-gpu-node-1.24-v20230211 * amazon-eks-gpu-node-1.23-v20230211 From fdb91d8a77268e8a3e08a803cc68901a5c602432 Mon Sep 17 00:00:00 2001 From: Prasad Shende Date: Tue, 21 Feb 2023 17:17:15 -0800 Subject: [PATCH 167/232] Update CHANGELOG for new AMI release (#1196) --- CHANGELOG.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78ad94a38..6d66222a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,60 @@ # Changelog +### AMI Release v20230217 +* amazon-eks-gpu-node-1.25-v20230217 +* amazon-eks-gpu-node-1.24-v20230217 +* amazon-eks-gpu-node-1.23-v20230217 +* amazon-eks-gpu-node-1.22-v20230217 +* amazon-eks-gpu-node-1.21-v20230217 +* amazon-eks-arm64-node-1.25-v20230217 +* amazon-eks-arm64-node-1.24-v20230217 +* amazon-eks-arm64-node-1.23-v20230217 +* amazon-eks-arm64-node-1.22-v20230217 +* amazon-eks-arm64-node-1.21-v20230217 +* amazon-eks-node-1.25-v20230217 +* amazon-eks-node-1.24-v20230217 +* amazon-eks-node-1.23-v20230217 +* amazon-eks-node-1.22-v20230217 +* amazon-eks-node-1.21-v20230217 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230217` +* `1.24.10-20230217` +* `1.23.16-20230217` +* `1.22.17-20230217` +* `1.21.14-20230217` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/20230130/ +* s3://amazon-eks/1.24.10/20230130/ +* s3://amazon-eks/1.23.16/20230130/ +* s3://amazon-eks/1.22.17/20230211/ +* s3://amazon-eks/1.21.14/20230130/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.228-132.418.amzn2 + * Kubernetes 1.24 and above: 5.10.165-143.735.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Kubernetes 1.24+ now use `kernel-5.10` for x86 and ARM AMIs. + - The GPU AMI will continue to use `kernel-5.4` as we work to address a compatibility issue with `nvidia-driver-latest-dkms`. +- The `kernel` package is now properly version-locked [#1191](https://github.com/awslabs/amazon-eks-ami/pull/1191). + - See [#1193](https://github.com/awslabs/amazon-eks-ami/issues/1193) for more information. +- New AMIs released for kubernetes version 1.25 +- Pressure stall information (PSI) is now enabled [#1161](https://github.com/awslabs/amazon-eks-ami/pull/1161). + +Minor changes: +- Updated `eni-max-pods.txt` with new instance types. +- Allow `kernel_version` to be set to any value (such as `5.15`) when building a custom AMI. + ### [Recalled] AMI Release v20230211 * amazon-eks-gpu-node-1.25-v20230211 * amazon-eks-gpu-node-1.24-v20230211 From f2ade6202e617db49cad28f64817cf5e228e2f44 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 23 Feb 2023 17:02:17 -0800 Subject: [PATCH 168/232] Add `Using the AMI` (#1201) --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index cbb911f78..053535d7c 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,6 @@ custom Amazon EKS AMI with [HashiCorp Packer](https://www.packer.io/). This is the same configuration that Amazon EKS uses to create the official Amazon EKS-optimized AMI. -**Check out the AMI's [user guide](doc/USER_GUIDE.md) for more information.** - ## 🚀 Getting started If you are new to Amazon EKS, we recommend that you follow @@ -39,9 +37,13 @@ To build an Amazon EKS Worker AMI for a particular Kubernetes version run the fo make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 ``` -**Note** -The default instance type to build this AMI does not qualify for the AWS free tier. You are charged for any instances created -when building this AMI. +> **Note** +> The default instance type to build this AMI does not qualify for the AWS free tier. +> You are charged for any instances created when building this AMI. + +## 👩‍💻 Using the AMI + +The [AMI user guide](doc/USER_GUIDE.md) has details about the AMI's internals, and the [EKS user guide](https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-custom-ami) explains how to use a custom AMI in a managed node group. ## 🔒 Security From 3731ff06d6d7903b60143db20c4ecb907ed941da Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 23 Feb 2023 17:05:20 -0800 Subject: [PATCH 169/232] Add doc on version-locked packages (#1199) --- doc/USER_GUIDE.md | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index c565cff4d..4e7291138 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -1,6 +1,6 @@ # User Guide -This guide will provide more detailed usage information on this repo. +This document includes details about using the AMI template and the resulting AMIs. 1. [AMI template variables](#ami-template-variables) 1. [Building against other versions of Kubernetes binaries](#building-against-other-versions-of-kubernetes-binaries) @@ -10,6 +10,7 @@ This guide will provide more detailed usage information on this repo. 1. [Customizing kubelet config](#customizing-kubelet-config) 1. [AL2 and Linux kernel information](#al2-and-linux-kernel-information) 1. [Updating known instance types](#updating-known-instance-types) +1. [Version-locked packages](#version-locked-packages) --- @@ -283,3 +284,28 @@ $ git diff ``` At this point, you can build an AMI and it will include the updated list of instance types. + +--- + +## Version-locked packages + +Some packages are critical for correct, performant behavior of a Kubernetes node; such as: +- `kernel` +- `containerd` +- `runc` + +> **Note** +> This is not an exhaustive list. The complete list of locked packages is available with `yum versionlock list`. + +As a result, these packages should generally be modified within the bounds of a managed process that gracefully handles failures and prevents disruption to the cluster's workloads. + +To prevent unintentional changes, the [yum-versionlock](https://github.com/rpm-software-management/yum-utils/tree/05db7ef501fc9d6698935bcc039c83c0761c3be2/plugins/versionlock) plugin is used on these packages. + +If you wish to modify a locked package, you can: +``` +# unlock a single package +sudo yum versionlock delete $PACKAGE_NAME + +# unlock all packages +sudo yum versionlock clear +``` From 2fcd227eefd82b622e13847d9130d4c84890dc3c Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Fri, 24 Feb 2023 13:55:47 -0800 Subject: [PATCH 170/232] Add pause container accounts for ap-southeast-4, il-central-1 (#1204) --- files/get-ecr-uri.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index e865ab24d..134dc39cc 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -54,6 +54,12 @@ else ap-south-2) acct="900889452093" ;; + ap-southeast-4) + acct="491585149902" + ;; + il-central-1) + acct="066635153087" + ;; *) acct="602401143452" ;; From e3dee6345b3c1f83f49ad1bf5aea3f03beb4ef7b Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 1 Mar 2023 19:52:42 -0800 Subject: [PATCH 171/232] Disable eni-max-pods.txt workflow in forks (#1209) --- .github/workflows/sync-eni-max-pods.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 8610a3e27..c9abf4a5f 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -10,6 +10,8 @@ permissions: pull-requests: write jobs: update-max-pods: + # this workflow will always fail in forks; bail if this isn't running in the upstream + if: github.repository == awslabs/amazon-eks-ami runs-on: ubuntu-latest steps: - uses: aws-actions/configure-aws-credentials@v1 From c8931377b6502f68462bd3bd38e4ad41b5f20594 Mon Sep 17 00:00:00 2001 From: Carter Date: Sat, 4 Mar 2023 17:02:03 -0800 Subject: [PATCH 172/232] Add quotes to repository name (#1213) --- .github/workflows/sync-eni-max-pods.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index c9abf4a5f..76f02addf 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -11,7 +11,7 @@ permissions: jobs: update-max-pods: # this workflow will always fail in forks; bail if this isn't running in the upstream - if: github.repository == awslabs/amazon-eks-ami + if: github.repository == 'awslabs/amazon-eks-ami' runs-on: ubuntu-latest steps: - uses: aws-actions/configure-aws-credentials@v1 From 951c6ec76624c5959057ea57c8847d633645032f Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 6 Mar 2023 13:33:50 -0800 Subject: [PATCH 173/232] Validate package versionlocks (#1195) --- scripts/validate.sh | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/scripts/validate.sh b/scripts/validate.sh index 01297dfdd..ae329005e 100644 --- a/scripts/validate.sh +++ b/scripts/validate.sh @@ -36,11 +36,40 @@ validate_file_nonexists '/var/log/secure' validate_file_nonexists '/var/log/wtmp' actual_kernel=$(uname -r) -echo "Verifying that kernel version $actual_kernel matches $KERNEL_VERSION" +echo "Verifying that kernel version $actual_kernel matches $KERNEL_VERSION..." if [[ $actual_kernel == $KERNEL_VERSION* ]]; then - echo "Kernel matches expected version" + echo "Kernel matches expected version!" else - echo "Kernel does not match expected version." + echo "Kernel does not match expected version!" exit 1 fi + +echo "Verifying that the package versionlocks are correct..." + +function versionlock-entries() { + # the format of this output is EPOCH:NAME-VERSION-RELEASE.ARCH + # more info in yum-versionlock(1) + # rpm doesn't accept EPOCH when querying the db, so remove it + yum versionlock list --quiet | cut -d ':' -f2 +} + +function versionlock-packages() { + versionlock-entries | xargs -I '{}' rpm --query '{}' --queryformat '%{NAME}\n' +} + +for ENTRY in $(versionlock-entries); do + if ! rpm --query "$ENTRY" &> /dev/null; then + echo "There is no package matching the versionlock entry: '$ENTRY'" + exit 1 + fi +done + +LOCKED_PACKAGES=$(versionlock-packages | wc -l) +UNIQUE_LOCKED_PACKAGES=$(versionlock-packages | sort -u | wc -l) +if [ $LOCKED_PACKAGES -ne $UNIQUE_LOCKED_PACKAGES ]; then + echo "Package(s) have multiple version locks!" + versionlock-entries +fi + +echo "Package versionlocks are correct!" From 17e1e3d7cd47700202daf23dbef337f35161b3b8 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 7 Mar 2023 16:31:22 -0800 Subject: [PATCH 174/232] Remove 1.21 (#1210) --- Makefile | 8 ++------ README.md | 11 ++++++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index c76a0dc57..28ab0c3cd 100644 --- a/Makefile +++ b/Makefile @@ -36,8 +36,8 @@ T_GREEN := \e[0;32m T_YELLOW := \e[0;33m T_RESET := \e[0m -.PHONY: all -all: 1.21 1.22 1.23 1.24 1.25 ## Build all versions of EKS Optimized AL2 AMI +.PHONY: latest +latest: 1.25 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -88,10 +88,6 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html -.PHONY: 1.21 -1.21: ## Build EKS Optimized AL2 AMI - K8s 1.21 - $(MAKE) k8s kubernetes_version=1.21.14 kubernetes_build_date=2023-01-30 pull_cni_from_github=true - .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-30 pull_cni_from_github=true diff --git a/README.md b/README.md index 053535d7c..758fb9868 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,14 @@ invoking Packer directly. You can initiate the build process by running the following command in the root of this repository: ```bash +# build an AMI with the latest Kubernetes version make + +# build an AMI with a specific Kubernetes version +make 1.25 ``` -The Makefile chooses a particular kubelet binary to use per kubernetes version which you can [view here](Makefile). -To build an Amazon EKS Worker AMI for a particular Kubernetes version run the following command -```bash -make 1.23 ## Build a Amazon EKS Worker AMI for k8s 1.23 -``` + +The Makefile chooses a particular kubelet binary to use per Kubernetes version which you can [view here](Makefile). > **Note** > The default instance type to build this AMI does not qualify for the AWS free tier. From afea1918a4fd1896d9b4ef718576b55c006ea548 Mon Sep 17 00:00:00 2001 From: Eden Jose Date: Wed, 8 Mar 2023 09:08:56 +0800 Subject: [PATCH 175/232] Add --update to AWS CLI installation (#1205) --- scripts/install-worker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e9a033b91..aed1c3806 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -130,7 +130,7 @@ if [[ "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != " --retry-delay 1 \ -L "https://awscli.amazonaws.com/awscli-exe-linux-${MACHINE}.zip" -o "${AWSCLI_DIR}/awscliv2.zip" unzip -q "${AWSCLI_DIR}/awscliv2.zip" -d ${AWSCLI_DIR} - sudo "${AWSCLI_DIR}/aws/install" --bin-dir /bin/ + sudo "${AWSCLI_DIR}/aws/install" --bin-dir /bin/ --update else echo "Installing awscli package" sudo yum install -y awscli From 1e3c108c6ea8347f674593e1bcae6e7ee1711754 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 8 Mar 2023 11:32:09 -0800 Subject: [PATCH 176/232] Use --cloud-provider=external on 1.26+ (#1203) --- files/bin/imds | 115 ++++++++++++++++------------ files/bin/provider-id | 9 +++ files/bootstrap.sh | 14 +++- files/kubelet-containerd.service | 5 +- files/kubelet.service | 6 +- test/Dockerfile | 2 +- test/cases/cloud-provider-config.sh | 82 ++++++++++++++++++++ test/cases/imds-errors.sh | 32 ++++++++ test/cases/provider-id.sh | 25 ++++++ 9 files changed, 234 insertions(+), 56 deletions(-) create mode 100755 files/bin/provider-id create mode 100755 test/cases/cloud-provider-config.sh create mode 100755 test/cases/imds-errors.sh create mode 100755 test/cases/provider-id.sh diff --git a/files/bin/imds b/files/bin/imds index e9f8e749d..7619ee3fb 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -4,13 +4,6 @@ set -o errexit set -o pipefail set -o nounset -IMDS_DEBUG="${IMDS_DEBUG:-false}" -function log() { - if [ "$IMDS_DEBUG" = "true" ]; then - echo >&2 "$1" - fi -} - if [ "$#" -ne 1 ]; then echo >&2 "usage: imds API_PATH" exit 1 @@ -21,57 +14,79 @@ API_PATH="${1#/}" CURRENT_TIME=$(date '+%s') -IMDS_ENDPOINT=${IMDS_ENDPOINT:-169.254.169.254} - -log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" - -TOKEN_DIR=/tmp/imds-tokens -mkdir -p $TOKEN_DIR - -IMDS_RETRIES=${IMDS_RETRIES:-10} -IMDS_RETRY_DELAY_SECONDS=${IMDS_RETRY_DELAY_SECONDS:-1} - +IMDS_DEBUG="${IMDS_DEBUG:-false}" # default ttl is 15 minutes IMDS_TOKEN_TTL_SECONDS=${IMDS_TOKEN_TTL_SECONDS:-900} - # max ttl is 6 hours, see: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html IMDS_MAX_TOKEN_TTL_SECONDS=${IMDS_MAX_TOKEN_TTL_SECONDS:-21600} +IMDS_RETRIES=${IMDS_RETRIES:-10} +IMDS_RETRY_DELAY_SECONDS=${IMDS_RETRY_DELAY_SECONDS:-1} +IMDS_ENDPOINT=${IMDS_ENDPOINT:-169.254.169.254} -# cleanup expired tokens -DELETED_TOKENS=0 -for TOKEN_FILE in $(ls $TOKEN_DIR | awk '$0 < '$(($CURRENT_TIME - $IMDS_MAX_TOKEN_TTL_SECONDS))); do - rm $TOKEN_DIR/$TOKEN_FILE - DELETED_TOKENS=$(($DELETED_TOKENS + 1)) -done -if [ "$DELETED_TOKENS" -gt 0 ]; then - log "🗑️ Deleted $DELETED_TOKENS expired IMDS token(s)." -fi - -TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) +function log() { + if [ "$IMDS_DEBUG" = "true" ]; then + echo >&2 "$1" + fi +} -if [ "$TOKEN_FILE" = "" ]; then - TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) - curl \ +function imdscurl() { + local OUTPUT_FILE=$(mktemp) + local CODE=$(curl \ --silent \ --show-error \ + --output $OUTPUT_FILE \ + --write-out "%{http_code}" \ --retry $IMDS_RETRIES \ --retry-delay $IMDS_RETRY_DELAY_SECONDS \ - -o $TOKEN_DIR/$TOKEN_FILE \ - -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ - -X PUT \ - "http://$IMDS_ENDPOINT/latest/api/token" - # make sure any user can utilize (and clean up) these tokens - chmod a+rwx $TOKEN_DIR/$TOKEN_FILE - log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." -else - log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." -fi + "$@" || echo "1") + # CODE will be either the HTTP status code, or 1 if the exit code of `curl` is non-zero + if [[ ${CODE} -lt 200 || ${CODE} -gt 299 ]]; then + cat >&2 $OUTPUT_FILE + return $CODE + fi + printf "$(cat $OUTPUT_FILE)\n" + rm $OUTPUT_FILE +} + +function get-token() { + local TOKEN_DIR=/tmp/imds-tokens + mkdir -p $TOKEN_DIR + + # cleanup expired tokens + local DELETED_TOKENS=0 + for TOKEN_FILE in $(ls $TOKEN_DIR | awk '$0 < '$(($CURRENT_TIME - $IMDS_MAX_TOKEN_TTL_SECONDS))); do + rm $TOKEN_DIR/$TOKEN_FILE + DELETED_TOKENS=$(($DELETED_TOKENS + 1)) + done + if [ "$DELETED_TOKENS" -gt 0 ]; then + log "🗑️ Deleted $DELETED_TOKENS expired IMDS token(s)." + fi + + local TOKEN_FILE=$(ls $TOKEN_DIR | awk '$0 > '$CURRENT_TIME | sort -n -r | head -n 1) + + if [ "$TOKEN_FILE" = "" ]; then + TOKEN_FILE=$(($CURRENT_TIME + $IMDS_TOKEN_TTL_SECONDS)) + local TOKEN=$(imdscurl \ + -H "X-aws-ec2-metadata-token-ttl-seconds: $IMDS_TOKEN_TTL_SECONDS" \ + -X PUT \ + "http://$IMDS_ENDPOINT/latest/api/token") + echo "$TOKEN" > "$TOKEN_DIR/$TOKEN_FILE" + # make sure any user can utilize (and clean up) these tokens + chmod a+rwx $TOKEN_DIR/$TOKEN_FILE + log "🔑 Retrieved a fresh IMDS token that will expire in $IMDS_TOKEN_TTL_SECONDS seconds." + else + log "ℹ️ Using cached IMDS token that expires in $(($TOKEN_FILE - $CURRENT_TIME)) seconds." + fi + cat "$TOKEN_DIR/$TOKEN_FILE" +} + +function get-with-token() { + local API_PATH="$1" + imdscurl \ + -H "X-aws-ec2-metadata-token: $(get-token)" \ + "http://$IMDS_ENDPOINT/$API_PATH" +} + +log "ℹ️ Talking to IMDS at $IMDS_ENDPOINT" -curl \ - --silent \ - --show-error \ - --retry $IMDS_RETRIES \ - --retry-delay $IMDS_RETRY_DELAY_SECONDS \ - --write-out '\n' \ - -H "X-aws-ec2-metadata-token: $(cat $TOKEN_DIR/$TOKEN_FILE)" \ - "http://$IMDS_ENDPOINT/$API_PATH" +get-with-token "$API_PATH" diff --git a/files/bin/provider-id b/files/bin/provider-id new file mode 100755 index 000000000..7cced7f3a --- /dev/null +++ b/files/bin/provider-id @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset + +AVAILABILITY_ZONE=$(imds '/latest/meta-data/placement/availability-zone') +INSTANCE_ID=$(imds '/latest/meta-data/instance-id') + +echo "aws:///$AVAILABILITY_ZONE/$INSTANCE_ID" diff --git a/files/bootstrap.sh b/files/bootstrap.sh index f24595cc6..e817e7731 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -455,11 +455,23 @@ if [[ "$USE_MAX_PODS" = "true" ]]; then echo "$(jq ".maxPods=$MAX_PODS" $KUBELET_CONFIG)" > $KUBELET_CONFIG fi +KUBELET_ARGS="--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2" + +if vercmp "$KUBELET_VERSION" lt "1.26.0"; then + # TODO: remove this when 1.25 is EOL + KUBELET_CLOUD_PROVIDER="aws" +else + KUBELET_CLOUD_PROVIDER="external" + echo "$(jq ".providerID=\"$(provider-id)\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG +fi + +KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER" + mkdir -p /etc/systemd/system/kubelet.service.d cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf [Service] -Environment='KUBELET_ARGS=--node-ip=$INTERNAL_IP --pod-infra-container-image=$PAUSE_CONTAINER --v=2' +Environment='KUBELET_ARGS=$KUBELET_ARGS' EOF if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index d043b30b9..d5789530e 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -7,12 +7,13 @@ Requires=containerd.service sandbox-image.service [Service] Slice=runtime.slice ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ - $KUBELET_ARGS $KUBELET_EXTRA_ARGS + $KUBELET_ARGS \ + $KUBELET_EXTRA_ARGS Restart=on-failure RestartForceExitStatus=SIGPIPE diff --git a/files/kubelet.service b/files/kubelet.service index 387470da1..a4aa5a890 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -6,11 +6,13 @@ Requires=docker.service [Service] ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 -ExecStart=/usr/bin/kubelet --cloud-provider aws \ +ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ - --network-plugin cni $KUBELET_ARGS $KUBELET_EXTRA_ARGS + --network-plugin cni \ + $KUBELET_ARGS \ + $KUBELET_EXTRA_ARGS Restart=always RestartSec=5 diff --git a/test/Dockerfile b/test/Dockerfile index 9aaa44905..bab93ee84 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,7 +1,7 @@ FROM public.ecr.aws/aws-ec2/amazon-ec2-metadata-mock:v1.11.2 as aemm FROM public.ecr.aws/amazonlinux/amazonlinux:2 RUN amazon-linux-extras enable docker && \ - yum install -y jq containerd wget && \ + yum install -y jq containerd wget which && \ wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 && \ chmod a+x /usr/local/bin/yq diff --git a/test/cases/cloud-provider-config.sh b/test/cases/cloud-provider-config.sh new file mode 100755 index 000000000..f21cd93cb --- /dev/null +++ b/test/cases/cloud-provider-config.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +KUBELET_UNIT_DIR="/etc/systemd/system/kubelet.service.d" +KUBELET_CONFIG_FILE="/etc/kubernetes/kubelet/kubelet-config.json" + +function fail() { + echo "❌ Test Failed:" "$@" + echo "Kubelet systemd units:" + find $KUBELET_UNIT_DIR -type f | xargs cat + echo "Kubelet config file:" + cat $KUBELET_CONFIG_FILE | jq '.' + exit 1 +} + +EXPECTED_PROVIDER_ID=$(provider-id) + +echo "--> Should use in-tree cloud provider below k8s version 1.26" +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.25.5-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=aws" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=aws to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "null" ]; then + fail "expected .providerID to be absent in kubelet's config file but was '$ACTUAL_PROVIDER_ID'" +fi + +echo "--> Should use external cloud provider at k8s version 1.26" +# at 1.26 +export KUBELET_VERSION=v1.26.5-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=external" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=external to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + fail "expected .providerID=$EXPECTED_PROVIDER_ID to be present in kubelet's config file but was '$ACTUAL_PROVIDER_ID'" +fi + +echo "--> Should use external cloud provider above k8s version 1.26" +# above 1.26 +export KUBELET_VERSION=v1.27.0-eks-ba74326 +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected a zero exit code but got '${EXIT_CODE}'" +fi +EXIT_CODE=0 +grep -RFq -e "--cloud-provider=external" $KUBELET_UNIT_DIR || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + fail "expected --cloud-provider=external to be present in kubelet's systemd units" +fi +ACTUAL_PROVIDER_ID=$(jq -r '.providerID' $KUBELET_CONFIG_FILE) +if [ ! "$ACTUAL_PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + fail "expected .providerID=$EXPECTED_PROVIDER_ID to be present in kubelet's config file but was '$ACTUAL_PROVIDER_ID" +fi diff --git a/test/cases/imds-errors.sh b/test/cases/imds-errors.sh new file mode 100755 index 000000000..865ca8dc7 --- /dev/null +++ b/test/cases/imds-errors.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +export IMDS_DEBUG=true + +echo "--> Should succeed for known API" +EXIT_CODE=0 +imds /latest/meta-data/instance-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi + +echo "--> Should fail for unknown API" +EXIT_CODE=0 +imds /foo || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi + +echo "--> Should fail for invalid endpoint" +EXIT_CODE=0 +export IMDS_ENDPOINT="127.0.0.0:1234" +imds /latest/meta-data/instance-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi diff --git a/test/cases/provider-id.sh b/test/cases/provider-id.sh new file mode 100755 index 000000000..8707a41ea --- /dev/null +++ b/test/cases/provider-id.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "--> Should fetch imds details correctly" +EXPECTED_INSTANCE_ID="i-1234567890abcdef0" +EXPECTED_AVAILABILITY_ZONE="us-east-1a" +EXPECTED_PROVIDER_ID="aws:///$EXPECTED_AVAILABILITY_ZONE/$EXPECTED_INSTANCE_ID" +PROVIDER_ID=$(provider-id) +if [ ! "$PROVIDER_ID" = "$EXPECTED_PROVIDER_ID" ]; then + echo "❌ Test Failed: expected provider-id=$EXPECTED_PROVIDER_ID but got '${PROVIDER_ID}'" + exit 1 +fi + +echo "--> Should fail when imds is unreachable" +echo '#!/usr/bin/sh +exit 1' > $(which imds) +EXIT_CODE=0 +provider-id || EXIT_CODE=$? +if [[ ${EXIT_CODE} -eq 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code" + exit 1 +fi From 1f8ec9f4bb08e598db8c56f772e620481e62fd78 Mon Sep 17 00:00:00 2001 From: Sichaow Date: Thu, 9 Mar 2023 17:14:50 -0800 Subject: [PATCH 177/232] update CHANGELOG.md for v20230304 release (#1218) --- CHANGELOG.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d66222a9..0639a2883 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,55 @@ # Changelog +### AMI Release v20230304 +* amazon-eks-gpu-node-1.25-v20230304 +* amazon-eks-gpu-node-1.24-v20230304 +* amazon-eks-gpu-node-1.23-v20230304 +* amazon-eks-gpu-node-1.22-v20230304 +* amazon-eks-gpu-node-1.21-v20230304 +* amazon-eks-arm64-node-1.25-v20230304 +* amazon-eks-arm64-node-1.24-v20230304 +* amazon-eks-arm64-node-1.23-v20230304 +* amazon-eks-arm64-node-1.22-v20230304 +* amazon-eks-arm64-node-1.21-v20230304 +* amazon-eks-node-1.25-v20230304 +* amazon-eks-node-1.24-v20230304 +* amazon-eks-node-1.23-v20230304 +* amazon-eks-node-1.22-v20230304 +* amazon-eks-node-1.21-v20230304 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.6-20230304` +* `1.24.10-20230304` +* `1.23.16-20230304` +* `1.22.17-20230304` +* `1.21.14-20230304` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.6/20230130/ +* s3://amazon-eks/1.24.10/20230130/ +* s3://amazon-eks/1.23.16/20230130/ +* s3://amazon-eks/1.22.17/20230130/ +* s3://amazon-eks/1.21.14/20230130/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.231-137.341.amzn2 + * Kubernetes 1.24 and above: 5.10.167-147.601.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that with Kubernetes 1.25+, Docker is only installed on GPU AMI's. This is subject to change as we remove unnecessary dependencies, and we recommend completing the migration to `containerd` immediately. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- This is the last AMI release for Kubernetes 1.21 +- This is the first AMI release available in `ap-southeast-4` + +Minor changes: +- Adds a user guide section about packages in the versionlock file. [(#1199)](https://github.com/awslabs/amazon-eks-ami/pull/1199) + ### AMI Release v20230217 * amazon-eks-gpu-node-1.25-v20230217 * amazon-eks-gpu-node-1.24-v20230217 From e989803c4887bc4cd4d6b083ffe4628c2a640018 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 21 Mar 2023 11:57:11 -0700 Subject: [PATCH 178/232] Update eni-max-pods.txt (#1194) --- files/eni-max-pods.txt | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index d7aa1db4c..c914d80cf 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -11,8 +11,6 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. # -# This file was generated at 2023-02-07T22:38:44Z -# # The regions queried were: # - ap-northeast-1 # - ap-northeast-2 @@ -46,6 +44,7 @@ a1.large 29 a1.medium 8 a1.metal 234 a1.xlarge 58 +bmn-sf1.metal 737 c1.medium 12 c1.xlarge 58 c3.2xlarge 58 @@ -176,7 +175,6 @@ c7g.large 29 c7g.medium 8 c7g.metal 737 c7g.xlarge 58 -cc2.8xlarge 234 cr1.8xlarge 234 d2.2xlarge 58 d2.4xlarge 234 @@ -643,12 +641,6 @@ x1e.32xlarge 234 x1e.4xlarge 58 x1e.8xlarge 58 x1e.xlarge 29 -x2ezn.12xlarge 737 -x2ezn.2xlarge 58 -x2ezn.4xlarge 234 -x2ezn.6xlarge 234 -x2ezn.8xlarge 234 -x2ezn.metal 737 x2gd.12xlarge 234 x2gd.16xlarge 737 x2gd.2xlarge 58 From 93b61ac4c940b0fda6dc272f8f7b6a48d3d9ee18 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 21 Mar 2023 18:56:06 -0700 Subject: [PATCH 179/232] Update eni-max-pods.txt (#1226) --- files/eni-max-pods.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index c914d80cf..cde3c610e 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -165,6 +165,7 @@ c6in.32xlarge 345 c6in.4xlarge 234 c6in.8xlarge 234 c6in.large 29 +c6in.metal 345 c6in.xlarge 58 c7g.12xlarge 234 c7g.16xlarge 737 @@ -409,6 +410,7 @@ m6idn.32xlarge 345 m6idn.4xlarge 234 m6idn.8xlarge 234 m6idn.large 29 +m6idn.metal 345 m6idn.xlarge 58 m6in.12xlarge 234 m6in.16xlarge 737 @@ -418,6 +420,7 @@ m6in.32xlarge 345 m6in.4xlarge 234 m6in.8xlarge 234 m6in.large 29 +m6in.metal 345 m6in.xlarge 58 m7g.12xlarge 234 m7g.16xlarge 737 @@ -568,6 +571,7 @@ r6idn.32xlarge 345 r6idn.4xlarge 234 r6idn.8xlarge 234 r6idn.large 29 +r6idn.metal 345 r6idn.xlarge 58 r6in.12xlarge 234 r6in.16xlarge 737 @@ -577,6 +581,7 @@ r6in.32xlarge 345 r6in.4xlarge 234 r6in.8xlarge 234 r6in.large 29 +r6in.metal 345 r6in.xlarge 58 r7g.12xlarge 234 r7g.16xlarge 737 From 1d17dd0f5c755963cc8944f00c20ab37ced1cc05 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 22 Mar 2023 14:04:57 -0700 Subject: [PATCH 180/232] Don't pass URL to --content (#1227) --- log-collector-script/linux/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index efa100fa5..69bc088b3 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -102,10 +102,11 @@ Trying to archive gathered information... 1. Create the SSM document named "EKSLogCollector" using the following commands: ``` +curl -O https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json aws ssm create-document \ --name "EKSLogCollectorLinux" \ --document-type "Command" \ - --content https://raw.githubusercontent.com/awslabs/amazon-eks-ami/master/log-collector-script/linux/eks-ssm-content.json + --content file://eks-ssm-content.json ``` 2. To execute the bash script in the SSM document and to collect the logs from worker, run the following command: From ec01efcedbc4ff8ab0390eacb6f75db90675ea65 Mon Sep 17 00:00:00 2001 From: jacobwolfaws <113703057+jacobwolfaws@users.noreply.github.com> Date: Mon, 27 Mar 2023 16:25:40 -0400 Subject: [PATCH 181/232] Collect logs for fsx & file-cache (#1232) --- log-collector-script/linux/eks-log-collector.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 50f759461..351c3f103 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.3" +readonly PROGRAM_VERSION="0.7.4" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -352,6 +352,8 @@ get_common_logs() { cp --force --dereference --recursive /var/log/containers/kube-proxy* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/ebs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null cp --force --dereference --recursive /var/log/containers/efs-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/fsx-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null + cp --force --dereference --recursive /var/log/containers/file-cache-csi* "${COLLECT_DIR}"/var_log/ 2> /dev/null continue fi if [[ "${entry}" == "pods" ]]; then From 1dde199bb06f8f68c563ac96f8e3a630e0728b28 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 28 Mar 2023 09:56:52 -0700 Subject: [PATCH 182/232] Update binaries to 2023-03-17 (#1233) --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 28ab0c3cd..27db56a86 100644 --- a/Makefile +++ b/Makefile @@ -90,19 +90,19 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-01-30 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-03-17 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.16 kubernetes_build_date=2023-01-30 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-03-17 pull_cni_from_github=true .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.10 kubernetes_build_date=2023-01-30 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.11 kubernetes_build_date=2023-03-17 pull_cni_from_github=true .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.6 kubernetes_build_date=2023-01-30 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.25.7 kubernetes_build_date=2023-03-17 pull_cni_from_github=true .PHONY: help help: ## Display help From e535e78282f603b4f968154f212c7caff0cf1c9a Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 28 Mar 2023 14:16:38 -0700 Subject: [PATCH 183/232] Add clean target to Makefile (#1236) --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 27db56a86..3a287c94a 100644 --- a/Makefile +++ b/Makefile @@ -104,6 +104,11 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 $(MAKE) k8s kubernetes_version=1.25.7 kubernetes_build_date=2023-03-17 pull_cni_from_github=true +.PHONY: clean +clean: + rm *-manifest.json + rm *-version-info.json + .PHONY: help help: ## Display help @awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m\033[0m\n"} /^[\.a-zA-Z_0-9\-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) From 4376066eb037b1557e20177d726a8eeafa8ee421 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 29 Mar 2023 14:10:49 -0700 Subject: [PATCH 184/232] Mount eBPF filesystem by default on 1.27+ (#1223) --- files/bin/mount-bpf-fs | 50 ++++++++++++++++++++ files/bootstrap.sh | 35 ++++++++++---- test/cases/mount-bpf-fs.sh | 96 ++++++++++++++++++++++++++++++++++++++ test/mocks/aws | 2 + test/mocks/iptables-save | 3 +- test/mocks/kubelet | 5 +- test/mocks/mount | 23 +++++++++ test/mocks/sudo | 2 +- test/mocks/systemctl | 3 +- 9 files changed, 202 insertions(+), 17 deletions(-) create mode 100755 files/bin/mount-bpf-fs create mode 100755 test/cases/mount-bpf-fs.sh create mode 100755 test/mocks/mount diff --git a/files/bin/mount-bpf-fs b/files/bin/mount-bpf-fs new file mode 100755 index 000000000..df5767e99 --- /dev/null +++ b/files/bin/mount-bpf-fs @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset + +SYSTEMD_UNIT_DIR="/etc/systemd/system" +SYSTEMD_UNIT="sys-fs-bpf.mount" +SYSTEMD_UNIT_PATH="$SYSTEMD_UNIT_DIR/$SYSTEMD_UNIT" +MOUNT_POINT="/sys/fs/bpf" +FS_TYPE="bpf" + +MOUNT_BPF_FS_DEBUG=${MOUNT_BPF_FS_DEBUG:-false} +function debug() { + if [ "$MOUNT_BPF_FS_DEBUG" = "true" ]; then + echo >&2 "DEBUG:" "$@" + fi +} + +if [ $(mount --types "$FS_TYPE" | wc -l) -gt 0 ]; then + debug "$FS_TYPE filesystem already mounted!" + exit 0 +elif mount | awk '{print $3}' | grep "$MOUNT_POINT"; then + debug "mount point at $MOUNT_POINT already exists!" + exit 0 +elif [ -f "$SYSTEMD_UNIT_PATH" ]; then + debug "systemd unit at $SYSTEMD_UNIT_PATH already exists!" + exit 0 +fi + +mkdir -p "$SYSTEMD_UNIT_DIR" +cat > "$SYSTEMD_UNIT_PATH" << EOL +[Unit] +Description=BPF mounts +Documentation=https://docs.kernel.org/bpf/index.html +DefaultDependencies=no +Before=local-fs.target umount.target +After=swap.target + +[Mount] +What=bpffs +Where=$MOUNT_POINT +Type=bpf +Options=rw,nosuid,nodev,noexec,relatime,mode=700 + +[Install] +WantedBy=multi-user.target +EOL + +systemctl enable "$SYSTEMD_UNIT" +systemctl start "$SYSTEMD_UNIT" diff --git a/files/bootstrap.sh b/files/bootstrap.sh index e817e7731..0d17f1141 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -16,22 +16,24 @@ function print_help { echo "Bootstraps an instance into an EKS cluster" echo "" echo "-h,--help print this help" - echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" - echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" + echo echo "--apiserver-endpoint The EKS cluster API Server endpoint. Only valid when used with --b64-cluster-ca. Bypasses calling \"aws eks describe-cluster\"" - echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." - echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" - echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" + echo "--cluster-id Specify the id of EKS cluster" + echo "--container-runtime Specify a container runtime (default: dockerd)" echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" + echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" + echo "--enable-docker-bridge Restores the docker default bridge network. (default: false)" + echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" + echo "--ip-family Specify ip family of the cluster" + echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." + echo "--mount-bfs-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.27+; false otherwise)" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" - echo "--container-runtime Specify a container runtime (default: dockerd)" - echo "--ip-family Specify ip family of the cluster" echo "--service-ipv6-cidr ipv6 cidr range of the cluster" - echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" - echo "--cluster-id Specify the id of EKS cluster" + echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" } POSITIONAL=() @@ -123,6 +125,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --mount-bpf-fs) + MOUNT_BPF_FS=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -178,6 +185,12 @@ SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" +DEFAULT_MOUNT_BPF_FS="true" +if vercmp "$KUBELET_VERSION" lt "1.27.0"; then + DEFAULT_MOUNT_BPF_FS="false" +fi +MOUNT_BPF_FS="${MOUNT_BPF_FS:-$DEFAULT_MOUNT_BPF_FS}" + # Helper function which calculates the amount of the given resource (either CPU or memory) # to reserve in a given resource range, specified by a start and end of the range and a percentage # of the resource to reserve. Note that we return zero if the start of the resource range is @@ -269,6 +282,10 @@ if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then exit 1 fi +if [ "$MOUNT_BPF_FS" = "true" ]; then + sudo mount-bpf-fs +fi + ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$ECR_URI/eks/pause} PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" diff --git a/test/cases/mount-bpf-fs.sh b/test/cases/mount-bpf-fs.sh new file mode 100755 index 000000000..e8ef5da99 --- /dev/null +++ b/test/cases/mount-bpf-fs.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +export MOUNT_BPF_FS_DEBUG=true + +echo "--> Should succeed if bpf type fs already exists" +function mount() { + echo "none on /foo/bar type bpf (rw,nosuid,nodev,noexec,relatime,mode=700)" +} +export -f mount +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount + +echo "--> Should succeed if mount point already exists" +function mount() { + echo "none on /sys/fs/bpf type foo (rw,nosuid,nodev,noexec,relatime,mode=700)" +} +export -f mount +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount + +echo "--> Should succeed if systemd unit already exists" +function mount() { + echo "foo" +} +export -f mount +SYSTEMD_UNIT=/etc/systemd/system/sys-fs-bpf.mount +mkdir -p $(dirname $SYSTEMD_UNIT) +echo "foo" > $SYSTEMD_UNIT +EXIT_CODE=0 +mount-bpf-fs || EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got: $EXIT_CODE" + exit 1 +fi +export -nf mount +rm $SYSTEMD_UNIT + +echo "--> Should default to true on 1.27+" +export KUBELET_VERSION=v1.27.0-eks-ba74326 +MOUNT_BPF_FS_MOCK=$(mktemp) +function mount-bpf-fs() { + echo "called" >> $MOUNT_BPF_FS_MOCK +} +export MOUNT_BPF_FS_MOCK +export -f mount-bpf-fs +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then + echo "❌ Test Failed: expected mount-bpf-fs to be called once but it was not!" + exit 1 +fi +export -nf mount-bpf-fs + +echo "--> Should default to false on 1.26-" +export KUBELET_VERSION=v1.26.0-eks-ba74326 +MOUNT_BPF_FS_MOCK=$(mktemp) +function mount-bpf-fs() { + echo "called" >> $MOUNT_BPF_FS_MOCK +} +export MOUNT_BPF_FS_MOCK +export -f mount-bpf-fs +EXIT_CODE=0 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" + exit 1 +fi +if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then + echo "❌ Test Failed: expected mount-bpf-fs to not be called but it was!" + exit 1 +fi +export -nf mount-bpf-fs diff --git a/test/mocks/aws b/test/mocks/aws index b752fb6e3..da5f00b50 100755 --- a/test/mocks/aws +++ b/test/mocks/aws @@ -5,6 +5,8 @@ SCRIPTPATH="$( pwd -P )" +echo >&2 "mocking 'aws $@'" + if [[ $1 == "ec2" ]]; then if [[ $2 == "describe-instance-types" ]]; then diff --git a/test/mocks/iptables-save b/test/mocks/iptables-save index b4d037268..6c02baf7b 100755 --- a/test/mocks/iptables-save +++ b/test/mocks/iptables-save @@ -1,4 +1,3 @@ #!/usr/bin/env bash -set -euo pipefail -echo "mocking iptables-save with params $@" +echo >&2 "mocking 'iptables-save $@'" diff --git a/test/mocks/kubelet b/test/mocks/kubelet index b3fed23c1..3c7c5dc91 100755 --- a/test/mocks/kubelet +++ b/test/mocks/kubelet @@ -1,11 +1,10 @@ #!/usr/bin/env bash -set -euo pipefail + +echo >&2 "mocking 'kubelet $@'" # The only use of kubelet directly is to get the Kubernetes version, # so we'll set a default here to avoid test failures, and you can # override by setting the KUBELET_VERSION environment variable. if [ $# == 1 ] && [ $1 == "--version" ]; then echo "Kubernetes ${KUBELET_VERSION:-v1.23.9-eks-ba74326}" -else - echo "mocking kubelet with params $@" fi diff --git a/test/mocks/mount b/test/mocks/mount new file mode 100755 index 000000000..7a9170b84 --- /dev/null +++ b/test/mocks/mount @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +echo >&2 "mocking 'mount $@'" + +echo 'sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime) +proc on /proc type proc (rw,nosuid,nodev,noexec,relatime) +devtmpfs on /dev type devtmpfs (rw,nosuid,size=4059512k,nr_inodes=1014878,mode=755) +securityfs on /sys/kernel/security type securityfs (rw,nosuid,nodev,noexec,relatime) +tmpfs on /run type tmpfs (rw,nosuid,nodev,mode=755) +tmpfs on /sys/fs/cgroup type tmpfs (ro,nosuid,nodev,noexec,mode=755) +cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd) +pstore on /sys/fs/pstore type pstore (rw,nosuid,nodev,noexec,relatime) +cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio) +cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer) +cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event) +cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb) +cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset) +cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio) +cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct) +cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory) +cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids) +cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices) +/dev/xvda1 on / type xfs (rw,noatime,attr2,inode64,logbufs=8,logbsize=32k,noquota)' diff --git a/test/mocks/sudo b/test/mocks/sudo index 7bc3d2ffb..7b76a82a8 100755 --- a/test/mocks/sudo +++ b/test/mocks/sudo @@ -1,4 +1,4 @@ #!/usr/bin/env bash -set -euo pipefail +echo >&2 "mocking 'sudo $@'" exec "$@" diff --git a/test/mocks/systemctl b/test/mocks/systemctl index 5efdd955e..3e74c7e2e 100755 --- a/test/mocks/systemctl +++ b/test/mocks/systemctl @@ -1,4 +1,3 @@ #!/usr/bin/env bash -set -euo pipefail -echo "mocking systemctl with $@" +echo >&2 "mocking 'systemctl $@'" From 0e5fa8775b126c8526ab39f8f92a040d4f9373df Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 29 Mar 2023 17:09:50 -0700 Subject: [PATCH 185/232] AMI Release v20230322 (#1238) --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0639a2883..44e28e823 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +### AMI Release v20230322 +* amazon-eks-gpu-node-1.25-v20230322 +* amazon-eks-gpu-node-1.24-v20230322 +* amazon-eks-gpu-node-1.23-v20230322 +* amazon-eks-gpu-node-1.22-v20230322 +* amazon-eks-arm64-node-1.25-v20230322 +* amazon-eks-arm64-node-1.24-v20230322 +* amazon-eks-arm64-node-1.23-v20230322 +* amazon-eks-arm64-node-1.22-v20230322 +* amazon-eks-node-1.25-v20230322 +* amazon-eks-node-1.24-v20230322 +* amazon-eks-node-1.23-v20230322 +* amazon-eks-node-1.22-v20230322 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.25.7-20230322` +* `1.24.11-20230322` +* `1.23.17-20230322` +* `1.22.17-20230322` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.235-144.344.amzn2 + * Kubernetes 1.24 and above: 5.10.173-154.642.amzn2 + * The GPU AMI will continue to use `kernel-5.4` for all Kubernetes versions as we work to address a compatibility issue with `nvidia-driver-latest-dkms` ([#1222](https://github.com/awslabs/amazon-eks-ami/issues/1222)). +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that with Kubernetes 1.25+, Docker is only installed on GPU AMI's. This is subject to change as we remove unnecessary dependencies, and we recommend completing the migration to `containerd` immediately. +* `containerd`: 1.6.6-1.amzn2.0.2 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Validate package versionlocks ([#1195](https://github.com/awslabs/amazon-eks-ami/pull/1195)) +- Updated `kernel-5.4` to address: + - [ALASKERNEL-5.4-2023-043](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-043.html) +- Updated `kernel-5.10` to address: + - [ALASKERNEL-5.10-2023-027](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-027.html) + - [ALASKERNEL-5.10-2023-028](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-028.html) + ### AMI Release v20230304 * amazon-eks-gpu-node-1.25-v20230304 * amazon-eks-gpu-node-1.24-v20230304 From fbb0ded69742525508c5e96950041d49003fdf7f Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 30 Mar 2023 13:41:35 -0700 Subject: [PATCH 186/232] Always configure kubelet to use external ecr-credential-provider (#1240) --- files/kubelet-config.json | 3 ++- files/kubelet-containerd.service | 2 ++ files/kubelet.service | 2 ++ scripts/install-worker.sh | 15 --------------- 4 files changed, 6 insertions(+), 16 deletions(-) diff --git a/files/kubelet-config.json b/files/kubelet-config.json index b78510c6a..666350e2b 100644 --- a/files/kubelet-config.json +++ b/files/kubelet-config.json @@ -27,7 +27,8 @@ "cgroupDriver": "cgroupfs", "cgroupRoot": "/", "featureGates": { - "RotateKubeletServerCertificate": true + "RotateKubeletServerCertificate": true, + "KubeletCredentialProviders": true }, "protectKernelDefaults": true, "serializeImagePulls": false, diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index d5789530e..db1c56511 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -12,6 +12,8 @@ ExecStart=/usr/bin/kubelet \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime remote \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ + --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ + --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ $KUBELET_ARGS \ $KUBELET_EXTRA_ARGS diff --git a/files/kubelet.service b/files/kubelet.service index a4aa5a890..5002876be 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -11,6 +11,8 @@ ExecStart=/usr/bin/kubelet \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ --network-plugin cni \ + --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ + --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ $KUBELET_ARGS \ $KUBELET_EXTRA_ARGS diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index aed1c3806..e52fe2c0b 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -162,12 +162,6 @@ else sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then - # enable CredentialProviders features in kubelet-containerd service file - IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' - sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet-containerd.service -fi - sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh @@ -325,15 +319,6 @@ if [[ $KUBERNETES_VERSION == "1.20"* ]]; then echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json fi -if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then - # enable CredentialProviders feature flags in kubelet service file - IMAGE_CREDENTIAL_PROVIDER_FLAGS='\\\n --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \\\n --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider' - sudo sed -i s,"aws","aws $IMAGE_CREDENTIAL_PROVIDER_FLAGS", $TEMPLATE_DIR/kubelet.service - # enable KubeletCredentialProviders features in kubelet configuration - KUBELET_CREDENTIAL_PROVIDERS_FEATURES=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {KubeletCredentialProviders: true}') - printf "%s" "$KUBELET_CREDENTIAL_PROVIDERS_FEATURES" > "$TEMPLATE_DIR/kubelet-config.json" -fi - sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json From d66425524e8a88592c493d68216b0aad24117468 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Thu, 30 Mar 2023 17:40:15 -0500 Subject: [PATCH 187/232] Use default kubelet API QPS for 1.27+ (#1241) --- files/bootstrap.sh | 3 +- test/cases/api-qps-k8s-1.22-to-1.26.sh | 56 +++++++++++++++++++ ....22-above.sh => api-qps-k8s-1.27-above.sh} | 9 +-- 3 files changed, 63 insertions(+), 5 deletions(-) create mode 100755 test/cases/api-qps-k8s-1.22-to-1.26.sh rename test/cases/{api-qps-k8s-1.22-above.sh => api-qps-k8s-1.27-above.sh} (82%) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 0d17f1141..ce1a7f0e5 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -437,8 +437,9 @@ else fi INSTANCE_TYPE=$(imds 'latest/meta-data/instance-type') -if vercmp "$KUBELET_VERSION" gteq "1.22.0"; then +if vercmp "$KUBELET_VERSION" gteq "1.22.0" && vercmp "$KUBELET_VERSION" lt "1.27.0"; then # for K8s versions that suport API Priority & Fairness, increase our API server QPS + # in 1.27, the default is already increased to 50/100, so use the higher defaults echo $(jq ".kubeAPIQPS=( .kubeAPIQPS // 10)|.kubeAPIBurst=( .kubeAPIBurst // 20)" $KUBELET_CONFIG) > $KUBELET_CONFIG fi diff --git a/test/cases/api-qps-k8s-1.22-to-1.26.sh b/test/cases/api-qps-k8s-1.22-to-1.26.sh new file mode 100755 index 000000000..f61f1d0ac --- /dev/null +++ b/test/cases/api-qps-k8s-1.22-to-1.26.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "--> Should increase API server QPS for K8s 1.22 - 1.26" +exit_code=0 +export KUBELET_VERSION=v1.22.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_api_qps="10" +expected_api_burst="20" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi + +export KUBELET_VERSION=v1.26.0-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a non-zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_api_qps="10" +expected_api_burst="20" + +actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) +actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) +if [[ ${actual_api_qps} != ${expected_api_qps} ]]; then + echo "❌ Test Failed: expected kubeAPIQPS = '${expected_api_qps}' but got '${actual_api_qps}'" + exit 1 +fi + +if [[ ${actual_api_burst} != ${expected_api_burst} ]]; then + echo "❌ Test Failed: expected kubeAPIBurst = '${expected_api_burst}' but got '${actual_api_burst}'" + exit 1 +fi diff --git a/test/cases/api-qps-k8s-1.22-above.sh b/test/cases/api-qps-k8s-1.27-above.sh similarity index 82% rename from test/cases/api-qps-k8s-1.22-above.sh rename to test/cases/api-qps-k8s-1.27-above.sh index cbc242ac2..d25f1ac31 100755 --- a/test/cases/api-qps-k8s-1.22-above.sh +++ b/test/cases/api-qps-k8s-1.27-above.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -echo "--> Should increase API server QPS for K8s 1.22+" +echo "--> Should use default API server QPS for K8s 1.27+" exit_code=0 -export KUBELET_VERSION=v1.22.0-eks-ba74326 +export KUBELET_VERSION=v1.27.0-eks-ba74326 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -14,8 +14,9 @@ if [[ ${exit_code} -ne 0 ]]; then exit 1 fi -expected_api_qps="10" -expected_api_burst="20" +# values should not be set +expected_api_qps="null" +expected_api_burst="null" actual_api_qps=$(jq -r '.kubeAPIQPS' < /etc/kubernetes/kubelet/kubelet-config.json) actual_api_burst=$(jq -r '.kubeAPIBurst' < /etc/kubernetes/kubelet/kubelet-config.json) From a22169e55f5b5ff592f22b82f0063707fd3dd469 Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 3 Apr 2023 12:36:54 -0700 Subject: [PATCH 188/232] Add 1.26 (#1246) --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3a287c94a..9b8d6e952 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: latest -latest: 1.25 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes +latest: 1.26 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -104,6 +104,10 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 $(MAKE) k8s kubernetes_version=1.25.7 kubernetes_build_date=2023-03-17 pull_cni_from_github=true +.PHONY: 1.26 +1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 + $(MAKE) k8s kubernetes_version=1.26.2 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + .PHONY: clean clean: rm *-manifest.json From 2252d0b4e7eb80e24758d0686d4768f9cdcf41a2 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 4 Apr 2023 16:23:11 -0700 Subject: [PATCH 189/232] Remove --container-runtime kubelet flag for 1.27+ (#1250) --- files/bootstrap.sh | 8 +++++++- files/kubelet-containerd.service | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index ce1a7f0e5..c9b5da054 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -21,7 +21,7 @@ function print_help { echo "--aws-api-retry-attempts Number of retry attempts for AWS API call (DescribeCluster) (default: 3)" echo "--b64-cluster-ca The base64 encoded cluster CA content. Only valid when used with --apiserver-endpoint. Bypasses calling \"aws eks describe-cluster\"" echo "--cluster-id Specify the id of EKS cluster" - echo "--container-runtime Specify a container runtime (default: dockerd)" + echo "--container-runtime Specify a container runtime. For Kubernetes 1.23 and below, possible values are [dockerd, containerd] and the default value is dockerd. For Kubernetes 1.24 and above, containerd is the only valid value. This flag is deprecated and will be removed in a future release." echo "--containerd-config-file File containing the containerd configuration to be used in place of AMI defaults." echo "--dns-cluster-ip Overrides the IP address to use for DNS queries within the cluster. Defaults to 10.100.0.10 or 172.20.0.10 based on the IP address of the primary interface" echo "--docker-config-json The contents of the /etc/docker/daemon.json file. Useful if you want a custom config differing from the default one in the AMI" @@ -538,6 +538,12 @@ if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then sudo chown root:root /etc/systemd/system/kubelet.service # Validate containerd config sudo containerd config dump > /dev/null + + # --container-runtime flag is gone in 1.27+ + # TODO: remove this when 1.26 is EOL + if vercmp "$KUBELET_VERSION" lt "1.27.0"; then + KUBELET_ARGS="$KUBELET_ARGS --container-runtime=remote" + fi elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then mkdir -p /etc/docker bash -c "/sbin/iptables-save > /etc/sysconfig/iptables" diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index db1c56511..bd8ed1cf6 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -10,7 +10,6 @@ ExecStartPre=/sbin/iptables -P FORWARD ACCEPT -w 5 ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ - --container-runtime remote \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ From 9ea1a7a6c3857e3c6b791e5758c9a4cafc48b88d Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 4 Apr 2023 16:26:29 -0700 Subject: [PATCH 190/232] Use gp3 volume_type for 1.27+ (#1197) --- Makefile | 51 ++++++++++++++++++++++++++--------- eks-worker-al2-variables.json | 2 +- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 9b8d6e952..eb1621870 100644 --- a/Makefile +++ b/Makefile @@ -6,29 +6,54 @@ K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +# expands to 'true' if PACKER_VARIABLE_FILE is non-empty +# and the file contains the string passed as the first argument +# otherwise, expands to 'false' +packer_variable_file_contains = $(if $(PACKER_VARIABLE_FILE),$(shell grep -Fq $1 $(PACKER_VARIABLE_FILE) && echo true || echo false),false) + +# expands to 'true' if the version comparison is affirmative +# otherwise expands to 'false' +vercmp = $(shell $(MAKEFILE_DIR)/files/bin/vercmp "$1" "$2" "$3") + +# expands to 'true' if the 'aws_region' contains 'us-iso' (an isolated region) +# otherwise, expands to 'false' +in_iso_region = $(if $(findstring us-iso,$(aws_region)),true,false) + +# gp3 volumes are used by default for 1.27+ +# TODO: remove when 1.26 reaches EOL +# TODO: remove when gp3 is supported in isolated regions +ifneq ($(call packer_variable_file_contains,volume_type), true) + ifeq ($(call in_iso_region), true) + volume_type ?= gp2 + else ifeq ($(call vercmp,$(kubernetes_version),lt,1.27.0), true) + volume_type ?= gp2 + endif +endif + # Docker is not present on 1.25+ AMI's -ifeq ($(shell $(MAKEFILE_DIR)/files/bin/vercmp "$(kubernetes_version)" gteq "1.25.0"), true) -# do not tag the AMI with the Docker version -docker_version ?= none -# do not include the Docker version in the AMI description -ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) +# TODO: remove this when 1.24 reaches EOL +ifeq ($(call vercmp,$(kubernetes_version),gteq,1.25.0), true) + # do not tag the AMI with the Docker version + docker_version ?= none + # do not include the Docker version in the AMI description + ami_component_description ?= (k8s: {{ user `kubernetes_version` }}, containerd: {{ user `containerd_version` }}) endif arch ?= x86_64 ifeq ($(arch), arm64) -instance_type ?= m6g.large -ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') + instance_type ?= m6g.large + ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') else -instance_type ?= m4.large -ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') + instance_type ?= m4.large + ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') endif ifeq ($(aws_region), cn-northwest-1) -source_ami_owners ?= 141808717104 + source_ami_owners ?= 141808717104 endif ifeq ($(aws_region), us-gov-west-1) -source_ami_owners ?= 045324592363 + source_ami_owners ?= 045324592363 endif T_RED := \e[0;31m @@ -49,7 +74,7 @@ SHFMT_FLAGS := --list \ SHFMT_COMMAND := $(shell which shfmt) ifeq (, $(SHFMT_COMMAND)) -SHFMT_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) mvdan/shfmt + SHFMT_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) mvdan/shfmt endif .PHONY: fmt @@ -58,7 +83,7 @@ fmt: ## Format the source files SHELLCHECK_COMMAND := $(shell which shellcheck) ifeq (, $(SHELLCHECK_COMMAND)) -SHELLCHECK_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) koalaman/shellcheck:stable + SHELLCHECK_COMMAND = docker run --rm -v $(MAKEFILE_DIR):$(MAKEFILE_DIR) koalaman/shellcheck:stable endif SHELL_FILES := $(shell find $(MAKEFILE_DIR) -type f -name '*.sh') diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index b83543f49..456bcabf9 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -33,5 +33,5 @@ "ssh_username": "ec2-user", "subnet_id": "", "temporary_security_group_source_cidrs": "", - "volume_type": "gp2" + "volume_type": "gp3" } From f9fa3f614ba04c411eb13f3edad674b79348479b Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Tue, 4 Apr 2023 18:35:19 -0700 Subject: [PATCH 191/232] Adding inf2 and trn1n instances (#1251) Co-authored-by: ljosyula --- files/eni-max-pods.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index cde3c610e..2c0253e90 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -271,6 +271,10 @@ inf1.24xlarge 321 inf1.2xlarge 38 inf1.6xlarge 234 inf1.xlarge 38 +inf2.24xlarge 737 +inf2.48xlarge 737 +inf2.8xlarge 234 +inf2.xlarge 58 is4gen.2xlarge 58 is4gen.4xlarge 234 is4gen.8xlarge 234 @@ -623,6 +627,7 @@ t4g.small 11 t4g.xlarge 58 trn1.2xlarge 58 trn1.32xlarge 247 +trn1n.32xlarge 247 u-12tb1.112xlarge 737 u-12tb1.metal 147 u-18tb1.112xlarge 737 From 8875417bc39f7218f99bec59ae226763b9d19870 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 5 Apr 2023 14:36:01 -0700 Subject: [PATCH 192/232] Create KUBELET_ARGS dropin after configuration container runtime (#1257) --- files/bootstrap.sh | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index c9b5da054..539752d8a 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -485,19 +485,7 @@ fi KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER" -mkdir -p /etc/systemd/system/kubelet.service.d - -cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf -[Service] -Environment='KUBELET_ARGS=$KUBELET_ARGS' -EOF - -if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then - cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf -[Service] -Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' -EOF -fi +mkdir -p /etc/systemd/system if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then if $ENABLE_DOCKER_BRIDGE; then @@ -568,6 +556,21 @@ else exit 1 fi +mkdir -p /etc/systemd/system/kubelet.service.d + +cat << EOF > /etc/systemd/system/kubelet.service.d/10-kubelet-args.conf +[Service] +Environment='KUBELET_ARGS=$KUBELET_ARGS' +EOF + +if [[ -n "$KUBELET_EXTRA_ARGS" ]]; then + cat << EOF > /etc/systemd/system/kubelet.service.d/30-kubelet-extra-args.conf +[Service] +Environment='KUBELET_EXTRA_ARGS=$KUBELET_EXTRA_ARGS' +EOF +fi + +systemctl daemon-reload systemctl enable kubelet systemctl start kubelet From 8d7078a24e2804984d79ee2b5cd79acb0effec88 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 5 Apr 2023 15:14:18 -0700 Subject: [PATCH 193/232] Install latest containerd 1.6.x (#1247) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 456bcabf9..30d769e03 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -13,7 +13,7 @@ "binary_bucket_region": "us-west-2", "cache_container_images": "false", "cni_plugin_version": "v0.8.6", - "containerd_version": "1.6.6-1.amzn2.0.2", + "containerd_version": "1.6.*", "creator": "{{env `USER`}}", "docker_version": "20.10.17-1.amzn2.0.1", "encrypted": "false", From e02ca984d5c14fb3126ec59808ed9aa3c9a224fe Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Apr 2023 14:32:46 -0700 Subject: [PATCH 194/232] Remove stale issue workflow (#1256) --- .github/workflows/stale-issues.yaml | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 .github/workflows/stale-issues.yaml diff --git a/.github/workflows/stale-issues.yaml b/.github/workflows/stale-issues.yaml deleted file mode 100644 index a56181160..000000000 --- a/.github/workflows/stale-issues.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: 'Close stale issues' -on: - schedule: - # once a day at noon - - cron: '0 12 * * *' -permissions: - issues: write -jobs: - stale: - runs-on: ubuntu-latest - steps: - - uses: actions/stale@v6 - with: - days-before-stale: 90 - days-before-close: 14 - stale-issue-message: 'Please update this issue if it applies to the latest AMI release; otherwise it will be closed soon.' - stale-issue-label: 'stale' - exempt-issue-labels: 'never-stale' - # empty message will prevent PR's from being staled - stale-pr-message: '' - debug-only: true \ No newline at end of file From e56aa6502d8faf37340bbef038d9e7dafff7838f Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 7 Apr 2023 16:44:31 -0700 Subject: [PATCH 195/232] Parameterize Packer template and default variable file (#1252) --- Makefile | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index eb1621870..271faa8fb 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,13 @@ +MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +PACKER_DEFAULT_VARIABLE_FILE ?= $(MAKEFILE_DIR)/eks-worker-al2-variables.json +PACKER_TEMPLATE_FILE ?= $(MAKEFILE_DIR)/eks-worker-al2.json PACKER_BINARY ?= packer -AVAILABLE_PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable eks-worker-al2.json | grep 'template-variable' | awk -F ',' '{print $$4}') +AVAILABLE_PACKER_VARIABLES := $(shell $(PACKER_BINARY) inspect -machine-readable $(PACKER_TEMPLATE_FILE) | grep 'template-variable' | awk -F ',' '{print $$4}') K8S_VERSION_PARTS := $(subst ., ,$(kubernetes_version)) K8S_VERSION_MINOR := $(word 1,${K8S_VERSION_PARTS}).$(word 2,${K8S_VERSION_PARTS}) -MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) - # expands to 'true' if PACKER_VARIABLE_FILE is non-empty # and the file contains the string passed as the first argument # otherwise, expands to 'false' @@ -98,18 +100,18 @@ test: ## run the test-harness # include only variables which have a defined value PACKER_VARIABLES := $(foreach packerVar,$(AVAILABLE_PACKER_VARIABLES),$(if $($(packerVar)),$(packerVar))) -PACKER_VAR_FLAGS := -var-file eks-worker-al2-variables.json \ -$(if $(PACKER_VARIABLE_FILE),--var-file=$(PACKER_VARIABLE_FILE),) \ +PACKER_VAR_FLAGS := -var-file $(PACKER_DEFAULT_VARIABLE_FILE) \ +$(if $(PACKER_VARIABLE_FILE),-var-file=$(PACKER_VARIABLE_FILE),) \ $(foreach packerVar,$(PACKER_VARIABLES),-var $(packerVar)='$($(packerVar))') .PHONY: validate validate: ## Validate packer config - $(PACKER_BINARY) validate $(PACKER_VAR_FLAGS) eks-worker-al2.json + $(PACKER_BINARY) validate $(PACKER_VAR_FLAGS) $(PACKER_TEMPLATE_FILE) .PHONY: k8s k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(PACKER_BINARY) build -timestamp-ui -color=false $(PACKER_VAR_FLAGS) eks-worker-al2.json + $(PACKER_BINARY) build -timestamp-ui -color=false $(PACKER_VAR_FLAGS) $(PACKER_TEMPLATE_FILE) # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html From 1efc3d6630bac4f82c05f15faf698a481ff7129d Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Fri, 7 Apr 2023 17:49:53 -0700 Subject: [PATCH 196/232] Updating CHANGELOG.MD for 1.26 Release (#1260) * Updating CHANGELOG.MD for 1.26 Release --- CHANGELOG.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44e28e823..6859fe2db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,62 @@ # Changelog +### AMI Release v20230406 +* amazon-eks-gpu-node-1.26-v20230406 +* amazon-eks-gpu-node-1.25-v20230406 +* amazon-eks-gpu-node-1.24-v20230406 +* amazon-eks-gpu-node-1.23-v20230406 +* amazon-eks-gpu-node-1.22-v20230406 +* amazon-eks-arm64-node-1.26-v20230406 +* amazon-eks-arm64-node-1.25-v20230406 +* amazon-eks-arm64-node-1.24-v20230406 +* amazon-eks-arm64-node-1.23-v20230406 +* amazon-eks-arm64-node-1.22-v20230406 +* amazon-eks-node-1.26-v20230406 +* amazon-eks-node-1.25-v20230406 +* amazon-eks-node-1.24-v20230406 +* amazon-eks-node-1.23-v20230406 +* amazon-eks-node-1.22-v20230406 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230406` +* `1.25.7-20230406` +* `1.24.11-20230406` +* `1.23.17-20230406` +* `1.22.17-20230406` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.238-148.346.amzn2 + * Kubernetes 1.24 and above: 5.10.173-154.642.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0 + +Notable changes: +- Add support for Kubernetes 1.26 ([#1246](https://github.com/awslabs/amazon-eks-ami/pull/1246)) +- Add support `inf2`, `trn1n` instance types ([#1251](https://github.com/awslabs/amazon-eks-ami/pull/1251)) +- Updated `containerd` to address: + - [ALASDOCKER-2023-023](https://alas.aws.amazon.com/AL2/ALASDOCKER-2023-023.html) +- Fixed `ecr-credential-provider` flags not being passed correctly to `kubelet` ([#1240](https://github.com/awslabs/amazon-eks-ami/pull/1240)) + - Added `--image-credential-provider-config` and `--image-credential-provider-bin-dir` flags to the `systemd` units. + - Set `KubeletCredentialProviders` feature flag to `true` in the `kubelet` JSON config. + +Other changes: +- Use `gp3 volume_type` for 1.27+ ([#1197](https://github.com/awslabs/amazon-eks-ami/pull/1197)) +- Use default kubelet API QPS for 1.27+ ([#1241](https://github.com/awslabs/amazon-eks-ami/pull/1241)) +- Remove `--container-runtime` kubelet flag for 1.27+ ([#1250](https://github.com/awslabs/amazon-eks-ami/pull/1250)) + ### AMI Release v20230322 * amazon-eks-gpu-node-1.25-v20230322 * amazon-eks-gpu-node-1.24-v20230322 From 9456c6a51a5d78291a21ba0d0c22fd2f6bb27d33 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 10 Apr 2023 14:04:06 -0500 Subject: [PATCH 197/232] Add ethtool (#1261) --- log-collector-script/linux/eks-log-collector.sh | 10 +++++++++- scripts/install-worker.sh | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index 351c3f103..dee5654a6 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -20,7 +20,7 @@ export LANG="C" export LC_ALL="C" # Global options -readonly PROGRAM_VERSION="0.7.4" +readonly PROGRAM_VERSION="0.7.5" readonly PROGRAM_SOURCE="https://github.com/awslabs/amazon-eks-ami/blob/master/log-collector-script/" readonly PROGRAM_NAME="$(basename "$0" .sh)" readonly PROGRAM_DIR="/opt/log-collector" @@ -526,6 +526,14 @@ get_networking_info() { fi cp /etc/resolv.conf "${COLLECT_DIR}"/networking/resolv.conf + + # collect ethtool -S for all interfaces + INTERFACES=$(ip -o a | awk '{print $2}' | sort -n | uniq) + for ifc in ${INTERFACES}; do + echo "Interface ${ifc}" >> "${COLLECT_DIR}"/networking/ethtool.txt + ethtool -S ${ifc} >> "${COLLECT_DIR}"/networking/ethtool.txt 2>&1 + echo -e "\n" >> "${COLLECT_DIR}"/networking/ethtool.txt + done ok } diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e52fe2c0b..a32fda814 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -62,6 +62,7 @@ sudo yum install -y \ conntrack \ curl \ ec2-instance-connect \ + ethtool \ ipvsadm \ jq \ nfs-utils \ From db6ba5584bb3c3d434138d3e3f8ceaad0934e22e Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 19 Apr 2023 08:51:34 -0700 Subject: [PATCH 198/232] Override hostname to match EC2's PrivateDnsName (#1264) --- files/bootstrap.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 539752d8a..539628c1f 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -481,6 +481,13 @@ if vercmp "$KUBELET_VERSION" lt "1.26.0"; then else KUBELET_CLOUD_PROVIDER="external" echo "$(jq ".providerID=\"$(provider-id)\"" $KUBELET_CONFIG)" > $KUBELET_CONFIG + # When the external cloud provider is used, kubelet will use /etc/hostname as the name of the Node object. + # If the VPC has a custom `domain-name` in its DHCP options set, and the VPC has `enableDnsHostnames` set to `true`, + # then /etc/hostname is not the same as EC2's PrivateDnsName. + # The name of the Node object must be equal to EC2's PrivateDnsName for the aws-iam-authenticator to allow this kubelet to manage it. + INSTANCE_ID=$(imds /latest/meta-data/instance-id) + PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text) + KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$PRIVATE_DNS_NAME" fi KUBELET_ARGS="$KUBELET_ARGS --cloud-provider=$KUBELET_CLOUD_PROVIDER" From 42c3f524cd74d82c7ea4aaecbc0d46c1c39d2095 Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 19 Apr 2023 10:11:20 -0700 Subject: [PATCH 199/232] Use credential provider API v1 in 1.27+, v1alpha1 in 1.26- (#1269) --- doc/USER_GUIDE.md | 12 +++++ files/bootstrap.sh | 25 ++++----- files/ecr-credential-provider-config | 14 ------ files/ecr-credential-provider-config.json | 18 +++++++ files/kubelet-containerd.service | 4 +- files/kubelet.service | 4 +- scripts/install-worker.sh | 26 ++++------ test/Dockerfile | 2 +- test/cases/ecr-credential-provider-config.sh | 53 +++++++++++++++----- test/test-harness.sh | 5 +- 10 files changed, 103 insertions(+), 60 deletions(-) delete mode 100644 files/ecr-credential-provider-config create mode 100644 files/ecr-credential-provider-config.json diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index 4e7291138..b37348d60 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -11,6 +11,7 @@ This document includes details about using the AMI template and the resulting AM 1. [AL2 and Linux kernel information](#al2-and-linux-kernel-information) 1. [Updating known instance types](#updating-known-instance-types) 1. [Version-locked packages](#version-locked-packages) +1. [Image credential provider plugins](#image-credential-provider-plugins) --- @@ -309,3 +310,14 @@ sudo yum versionlock delete $PACKAGE_NAME # unlock all packages sudo yum versionlock clear ``` + +--- + +## Image credential provider plugins + +Prior to Kubernetes 1.27, the `kubelet` could obtain credentials for ECR out of the box. This legacy credential process has been removed in Kubernetes 1.27, and +ECR credentials should now be obtained via a plugin, the `ecr-credential-provider`. This plugin is installed in the AMI at `/etc/eks/image-credential-provider/ecr-credential-provider`. More information about this plugin is available in the [`cloud-provider-aws` documentation](https://cloud-provider-aws.sigs.k8s.io/credential_provider/). + +Additional image credential provider plugins may be appended to `/etc/eks/image-credential-provider/config.json`. In Kubernetes versions 1.26 and below, all plugins in this file must support `credentialprovider.kubelet.k8s.io/v1alpha1`. In Kubernetes versions 1.27 and above, they must support `credentialprovider.kubelet.k8s.io/v1`. + +For more information about image credential provider plugins, refer to the [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-credential-provider/). diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 539628c1f..a42da5f2f 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -145,28 +145,29 @@ set -u KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') echo "Using kubelet version $KUBELET_VERSION" +# ecr-credential-provider only implements credentialprovider.kubelet.k8s.io/v1alpha1 prior to 1.27.1: https://github.com/kubernetes/cloud-provider-aws/pull/597 +# TODO: remove this when 1.26 is EOL +if vercmp "$KUBELET_VERSION" lt "1.27.0"; then + IMAGE_CREDENTIAL_PROVIDER_CONFIG=/etc/eks/image-credential-provider/config.json + echo "$(jq '.apiVersion = "kubelet.config.k8s.io/v1alpha1"' $IMAGE_CREDENTIAL_PROVIDER_CONFIG)" > $IMAGE_CREDENTIAL_PROVIDER_CONFIG + echo "$(jq '.providers[].apiVersion = "credentialprovider.kubelet.k8s.io/v1alpha1"' $IMAGE_CREDENTIAL_PROVIDER_CONFIG)" > $IMAGE_CREDENTIAL_PROVIDER_CONFIG +fi + +# Set container runtime related variables +DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" +ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" + # As of Kubernetes version 1.24, we will start defaulting the container runtime to containerd # and no longer support docker as a container runtime. -IS_124_OR_GREATER=false DEFAULT_CONTAINER_RUNTIME=dockerd if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then - IS_124_OR_GREATER=true DEFAULT_CONTAINER_RUNTIME=containerd -elif vercmp "$KUBELET_VERSION" gteq "1.22.0"; then - # These APIs are only available in alpha pre-1.24. - # This can be removed when version 1.23 is no longer supported. - sed -i s,kubelet.config.k8s.io/v1beta1,kubelet.config.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config - sed -i s,credentialprovider.kubelet.k8s.io/v1beta1,credentialprovider.kubelet.k8s.io/v1alpha1,g /etc/eks/ecr-credential-provider/ecr-credential-provider-config fi - -# Set container runtime related variables -DOCKER_CONFIG_JSON="${DOCKER_CONFIG_JSON:-}" -ENABLE_DOCKER_BRIDGE="${ENABLE_DOCKER_BRIDGE:-false}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" echo "Using $CONTAINER_RUNTIME as the container runtime" -if $IS_124_OR_GREATER && [ $CONTAINER_RUNTIME != "containerd" ]; then +if vercmp "$KUBELET_VERSION" gteq "1.24.0" && [ $CONTAINER_RUNTIME != "containerd" ]; then echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" exit 1 fi diff --git a/files/ecr-credential-provider-config b/files/ecr-credential-provider-config deleted file mode 100644 index ae1f9d7a5..000000000 --- a/files/ecr-credential-provider-config +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: kubelet.config.k8s.io/v1beta1 -kind: CredentialProviderConfig -providers: - - name: ecr-credential-provider - matchImages: - - "*.dkr.ecr.*.amazonaws.com" - - "*.dkr.ecr.*.amazonaws.cn" - - "*.dkr.ecr-fips.*.amazonaws.com" - - "*.dkr.ecr.us-iso-east-1.c2s.ic.gov" - - "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" - defaultCacheDuration: "12h" - apiVersion: credentialprovider.kubelet.k8s.io/v1beta1 - args: - - get-credentials diff --git a/files/ecr-credential-provider-config.json b/files/ecr-credential-provider-config.json new file mode 100644 index 000000000..7fe687156 --- /dev/null +++ b/files/ecr-credential-provider-config.json @@ -0,0 +1,18 @@ +{ + "apiVersion": "kubelet.config.k8s.io/v1", + "kind": "CredentialProviderConfig", + "providers": [ + { + "name": "ecr-credential-provider", + "matchImages": [ + "*.dkr.ecr.*.amazonaws.com", + "*.dkr.ecr.*.amazonaws.cn", + "*.dkr.ecr-fips.*.amazonaws.com", + "*.dkr.ecr.us-iso-east-1.c2s.ic.gov", + "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" + ], + "defaultCacheDuration": "12h", + "apiVersion": "credentialprovider.kubelet.k8s.io/v1" + } + ] +} diff --git a/files/kubelet-containerd.service b/files/kubelet-containerd.service index bd8ed1cf6..946fb1c28 100644 --- a/files/kubelet-containerd.service +++ b/files/kubelet-containerd.service @@ -11,8 +11,8 @@ ExecStart=/usr/bin/kubelet \ --config /etc/kubernetes/kubelet/kubelet-config.json \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime-endpoint unix:///run/containerd/containerd.sock \ - --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ - --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ + --image-credential-provider-config /etc/eks/image-credential-provider/config.json \ + --image-credential-provider-bin-dir /etc/eks/image-credential-provider \ $KUBELET_ARGS \ $KUBELET_EXTRA_ARGS diff --git a/files/kubelet.service b/files/kubelet.service index 5002876be..08c746504 100644 --- a/files/kubelet.service +++ b/files/kubelet.service @@ -11,8 +11,8 @@ ExecStart=/usr/bin/kubelet \ --kubeconfig /var/lib/kubelet/kubeconfig \ --container-runtime docker \ --network-plugin cni \ - --image-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config \ - --image-credential-provider-bin-dir /etc/eks/ecr-credential-provider \ + --image-credential-provider-config /etc/eks/image-credential-provider/config.json \ + --image-credential-provider-bin-dir /etc/eks/image-credential-provider \ $KUBELET_ARGS \ $KUBELET_EXTRA_ARGS diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index a32fda814..b40bf4d5a 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -351,22 +351,18 @@ fi ################################################################################ ### ECR CREDENTIAL PROVIDER #################################################### ################################################################################ -if vercmp "$KUBERNETES_VERSION" gteq "1.22.0"; then - ECR_BINARY="ecr-credential-provider" - if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then - echo "AWS cli present - using it to copy ecr-credential-provider binaries from s3." - aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_BINARY . - else - echo "AWS cli missing - using wget to fetch ecr-credential-provider binaries from s3. Note: This won't work for private bucket." - sudo wget "$S3_URL_BASE/$ECR_BINARY" - fi - sudo chmod +x $ECR_BINARY - sudo mkdir -p /etc/eks/ecr-credential-provider - sudo mv $ECR_BINARY /etc/eks/ecr-credential-provider - - # copying credential provider config file to eks folder - sudo mv $TEMPLATE_DIR/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config +ECR_CREDENTIAL_PROVIDER_BINARY="ecr-credential-provider" +if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then + echo "AWS cli present - using it to copy ${ECR_CREDENTIAL_PROVIDER_BINARY} from s3." + aws s3 cp --region $BINARY_BUCKET_REGION $S3_PATH/$ECR_CREDENTIAL_PROVIDER_BINARY . +else + echo "AWS cli missing - using wget to fetch ${ECR_CREDENTIAL_PROVIDER_BINARY} from s3. Note: This won't work for private bucket." + sudo wget "$S3_URL_BASE/$ECR_CREDENTIAL_PROVIDER_BINARY" fi +sudo chmod +x $ECR_CREDENTIAL_PROVIDER_BINARY +sudo mkdir -p /etc/eks/image-credential-provider +sudo mv $ECR_CREDENTIAL_PROVIDER_BINARY /etc/eks/image-credential-provider/ +sudo mv $TEMPLATE_DIR/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json ################################################################################ ### Cache Images ############################################################### diff --git a/test/Dockerfile b/test/Dockerfile index bab93ee84..5470ad965 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -12,7 +12,7 @@ COPY files/ /etc/eks/ COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ COPY files/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json COPY files/kubelet-kubeconfig /var/lib/kubelet/kubeconfig -COPY files/ecr-credential-provider-config /etc/eks/ecr-credential-provider/ecr-credential-provider-config +COPY files/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json COPY test/entrypoint.sh /entrypoint.sh COPY files/bin/* /usr/bin/ COPY test/mocks/ /sbin/ diff --git a/test/cases/ecr-credential-provider-config.sh b/test/cases/ecr-credential-provider-config.sh index 5d4856ed1..4eb74a761 100755 --- a/test/cases/ecr-credential-provider-config.sh +++ b/test/cases/ecr-credential-provider-config.sh @@ -4,7 +4,7 @@ set -euo pipefail exit_code=0 TEMP_DIR=$(mktemp -d) -export CRED_PROVIDER_FILE="/etc/eks/ecr-credential-provider/ecr-credential-provider-config" +export CRED_PROVIDER_FILE="/etc/eks/image-credential-provider/config.json" export CRED_PROVIDER_RESET_FILE="./cred-provider-config" # Store the original version of the config @@ -15,7 +15,7 @@ function reset_scenario { cp $CRED_PROVIDER_RESET_FILE $CRED_PROVIDER_FILE } -echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.24" +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.27" reset_scenario # This variable is used to override the default value in the kubelet mock @@ -31,23 +31,24 @@ if [[ ${exit_code} -ne 0 ]]; then fi expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1alpha1" -actual=$(yq e '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) if [[ "$expected_cred_provider_api" != "$actual" ]]; then echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_cred_provider_api" exit 1 fi expected_kubelet_config_api="kubelet.config.k8s.io/v1alpha1" -actual=$(yq e '.apiVersion' $CRED_PROVIDER_FILE) +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) if [[ "$expected_kubelet_config_api" != "$actual" ]]; then echo "❌ Test Failed: expected 1.22 credential provider file to contain $expected_kubelet_config_api" exit 1 fi -echo "--> Should default to credentialprovider.kubelet.k8s.io/v1beta1 and kubelet.config.k8s.io/v1beta1 when at or above k8s version 1.24" +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1alpha1 and kubelet.config.k8s.io/v1alpha1 when below k8s version 1.27" reset_scenario -export KUBELET_VERSION=v1.24.15-eks-ba74326 +# This variable is used to override the default value in the kubelet mock +export KUBELET_VERSION=v1.26.0-eks-ba74326 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ @@ -58,18 +59,44 @@ if [[ ${exit_code} -ne 0 ]]; then exit 1 fi -expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1beta1" -actual=$(yq e '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1alpha1" +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) if [[ "$expected_cred_provider_api" != "$actual" ]]; then - echo "❌ Test Failed: expected 1.24 credential provider file to contain $expected_cred_provider_api" + echo "❌ Test Failed: expected 1.26 credential provider file to contain $expected_cred_provider_api" exit 1 fi -expected_kubelet_config_api="kubelet.config.k8s.io/v1beta1" -actual=$(yq e '.apiVersion' $CRED_PROVIDER_FILE) +expected_kubelet_config_api="kubelet.config.k8s.io/v1alpha1" +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) if [[ "$expected_kubelet_config_api" != "$actual" ]]; then - echo "❌ Test Failed: expected 1.24 credential provider file to contain $expected_kubelet_config_api" + echo "❌ Test Failed: expected 1.26 credential provider file to contain $expected_kubelet_config_api" exit 1 fi -exit_code=0 +echo "--> Should default to credentialprovider.kubelet.k8s.io/v1 and kubelet.config.k8s.io/v1 when at or above k8s version 1.27" +reset_scenario + +export KUBELET_VERSION=v1.27.1-eks-ba74326 +/etc/eks/bootstrap.sh \ + --b64-cluster-ca dGVzdA== \ + --apiserver-endpoint http://my-api-endpoint \ + test || exit_code=$? + +if [[ ${exit_code} -ne 0 ]]; then + echo "❌ Test Failed: expected a zero exit code but got '${exit_code}'" + exit 1 +fi + +expected_cred_provider_api="credentialprovider.kubelet.k8s.io/v1" +actual=$(jq -r '.providers[0].apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_cred_provider_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.27 credential provider file to contain $expected_cred_provider_api" + exit 1 +fi + +expected_kubelet_config_api="kubelet.config.k8s.io/v1" +actual=$(jq -r '.apiVersion' $CRED_PROVIDER_FILE) +if [[ "$expected_kubelet_config_api" != "$actual" ]]; then + echo "❌ Test Failed: expected 1.27 credential provider file to contain $expected_kubelet_config_api" + exit 1 +fi diff --git a/test/test-harness.sh b/test/test-harness.sh index b0cc2180f..c253f562e 100755 --- a/test/test-harness.sh +++ b/test/test-harness.sh @@ -37,12 +37,14 @@ done docker build -t eks-optimized-ami -f "${SCRIPTPATH}/Dockerfile" "${SCRIPTPATH}/../" overall_status=0 +test_run_log_file=$(mktemp) + function run() { docker run -v "$(realpath $1):/test.sh" \ --attach STDOUT \ --attach STDERR \ --rm \ - eks-optimized-ami + eks-optimized-ami > $test_run_log_file 2>&1 } if [[ ! -z ${TEST_CASE_SCRIPT} ]]; then @@ -59,6 +61,7 @@ for case in "${test_cases[@]}"; do if [[ ${status} -eq 0 ]]; then echo "✅ ✅ $(basename ${case}) Tests Passed! ✅ ✅" else + cat $test_run_log_file echo "❌ ❌ $(basename ${case}) Tests Failed! ❌ ❌" overall_status=1 fi From 76c1b1b8eaa9bd6912c504b00fc2c067cb5b21b8 Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Wed, 19 Apr 2023 17:24:35 -0700 Subject: [PATCH 200/232] AMI Release v20230411 (#1270) --- CHANGELOG.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6859fe2db..acafb611e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,51 @@ # Changelog +### AMI Release v20230411 +* amazon-eks-gpu-node-1.26-v20230411 +* amazon-eks-gpu-node-1.25-v20230411 +* amazon-eks-gpu-node-1.24-v20230411 +* amazon-eks-gpu-node-1.23-v20230411 +* amazon-eks-gpu-node-1.22-v20230411 +* amazon-eks-arm64-node-1.26-v20230411 +* amazon-eks-arm64-node-1.25-v20230411 +* amazon-eks-arm64-node-1.24-v20230411 +* amazon-eks-arm64-node-1.23-v20230411 +* amazon-eks-arm64-node-1.22-v20230411 +* amazon-eks-node-1.26-v20230411 +* amazon-eks-node-1.25-v20230411 +* amazon-eks-node-1.24-v20230411 +* amazon-eks-node-1.23-v20230411 +* amazon-eks-node-1.22-v20230411 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230411` +* `1.25.7-20230411` +* `1.24.11-20230411` +* `1.23.17-20230411` +* `1.22.17-20230411` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.238-148.347.amzn2 + * Kubernetes 1.24 and above: 5.10.176-157.645.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0 + +Notable changes: +- The AMI changes include update for 5.4 kernel version from `5.4.238-148.346.amzn2` to `kernel-5.4.238-148.347.amzn2`. `kernel-5.4.238-148.346` had a fatal issue affecting SMB mounts in which a null pointer dereference caused a panic. As a result, this package was removed from the Amazon Linux 2 repositories. + ### AMI Release v20230406 * amazon-eks-gpu-node-1.26-v20230406 * amazon-eks-gpu-node-1.25-v20230406 From 0711325f3517b2db1a6cd0cf9eb42cd79ed1358f Mon Sep 17 00:00:00 2001 From: guessi Date: Thu, 20 Apr 2023 08:29:48 +0800 Subject: [PATCH 201/232] Fix mount-bpf-fs test cases (#1271) --- test/cases/mount-bpf-fs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cases/mount-bpf-fs.sh b/test/cases/mount-bpf-fs.sh index e8ef5da99..61b2f3844 100755 --- a/test/cases/mount-bpf-fs.sh +++ b/test/cases/mount-bpf-fs.sh @@ -61,7 +61,7 @@ EXIT_CODE=0 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + test || EXIT_CODE=$? if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 @@ -84,7 +84,7 @@ EXIT_CODE=0 /etc/eks/bootstrap.sh \ --b64-cluster-ca dGVzdA== \ --apiserver-endpoint http://my-api-endpoint \ - test || exit_code=$? + test || EXIT_CODE=$? if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 From e39d71f6832221409cd9990ad85e870f6d621698 Mon Sep 17 00:00:00 2001 From: guessi Date: Thu, 20 Apr 2023 10:07:30 +0800 Subject: [PATCH 202/232] Keep actions up-to-date (#1243) --- .github/workflows/ci.yaml | 4 ++-- .github/workflows/sync-eni-max-pods.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dfc7f6804..7f780e683 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,12 +13,12 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - run: echo "$(go env GOPATH)/bin" >> $GITHUB_PATH - run: go install mvdan.cc/sh/v3/cmd/shfmt@latest - run: make lint test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - run: make test diff --git a/.github/workflows/sync-eni-max-pods.yaml b/.github/workflows/sync-eni-max-pods.yaml index 76f02addf..9bb3275bc 100644 --- a/.github/workflows/sync-eni-max-pods.yaml +++ b/.github/workflows/sync-eni-max-pods.yaml @@ -14,7 +14,7 @@ jobs: if: github.repository == 'awslabs/amazon-eks-ami' runs-on: ubuntu-latest steps: - - uses: aws-actions/configure-aws-credentials@v1 + - uses: aws-actions/configure-aws-credentials@v2 with: aws-region: ${{ secrets.AWS_REGION }} role-to-assume: ${{ secrets.AWS_ROLE_ARN }} From 406954de1f7a4cdfabb9d9cc2b7ddfd1e2e08b23 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 21 Apr 2023 15:40:49 -0700 Subject: [PATCH 203/232] Make imds-token directory world-writeable (#1267) --- files/bin/imds | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bin/imds b/files/bin/imds index 7619ee3fb..2d23801ba 100755 --- a/files/bin/imds +++ b/files/bin/imds @@ -50,7 +50,7 @@ function imdscurl() { function get-token() { local TOKEN_DIR=/tmp/imds-tokens - mkdir -p $TOKEN_DIR + mkdir -p -m a+wrx $TOKEN_DIR # cleanup expired tokens local DELETED_TOKENS=0 From 1ec4d5cfb209992901253bf07b3c0dba34cdf70d Mon Sep 17 00:00:00 2001 From: Carter Date: Mon, 24 Apr 2023 16:45:41 -0700 Subject: [PATCH 204/232] Improve bootstrap logging (#1276) --- files/bin/vercmp | 5 +++- files/bootstrap.sh | 67 +++++++++++++++++++++++++++++++++------------- 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/files/bin/vercmp b/files/bin/vercmp index 8edf7b920..5bb467854 100755 --- a/files/bin/vercmp +++ b/files/bin/vercmp @@ -81,7 +81,10 @@ case $OPERATOR in ;; esac -echo "$OUTCOME" +VERCMP_QUIET="${VERCMP_QUIET:-false}" +if [ ! "$VERCMP_QUIET" = "true" ]; then + echo "$OUTCOME" +fi if [ "$OUTCOME" = "true" ]; then exit 0 diff --git a/files/bootstrap.sh b/files/bootstrap.sh index a42da5f2f..205cb5458 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -11,6 +11,9 @@ trap 'err_report $LINENO' ERR IFS=$'\n\t' +# mute stdout from vercmp +export VERCMP_QUIET=true + function print_help { echo "usage: $0 [options] " echo "Bootstraps an instance into an EKS cluster" @@ -36,6 +39,12 @@ function print_help { echo "--use-max-pods Sets --max-pods for the kubelet when true. (default: true)" } +function log { + echo >&2 "$(date '+%Y-%m-%dT%H:%M:%S%z')" "[eks-bootstrap]" "$@" +} + +log "INFO: starting..." + POSITIONAL=() while [[ $# -gt 0 ]]; do @@ -47,86 +56,103 @@ while [[ $# -gt 0 ]]; do ;; --use-max-pods) USE_MAX_PODS="$2" + log "INFO: --use-max-pods='${USE_MAX_PODS}'" shift shift ;; --b64-cluster-ca) B64_CLUSTER_CA=$2 + log "INFO: --b64-cluster-ca='${B64_CLUSTER_CA}'" shift shift ;; --apiserver-endpoint) APISERVER_ENDPOINT=$2 + log "INFO: --apiserver-endpoint='${APISERVER_ENDPOINT}'" shift shift ;; --kubelet-extra-args) KUBELET_EXTRA_ARGS=$2 + log "INFO: --kubelet-extra-args='${KUBELET_EXTRA_ARGS}'" shift shift ;; --enable-docker-bridge) ENABLE_DOCKER_BRIDGE=$2 + log "INFO: --enable-docker-bridge='${ENABLE_DOCKER_BRIDGE}'" shift shift ;; --aws-api-retry-attempts) API_RETRY_ATTEMPTS=$2 + log "INFO: --aws-api-retry-attempts='${API_RETRY_ATTEMPTS}'" shift shift ;; --docker-config-json) DOCKER_CONFIG_JSON=$2 + log "INFO: --docker-config-json='${DOCKER_CONFIG_JSON}'" shift shift ;; --containerd-config-file) CONTAINERD_CONFIG_FILE=$2 + log "INFO: --containerd-config-file='${CONTAINERD_CONFIG_FILE}'" shift shift ;; --pause-container-account) PAUSE_CONTAINER_ACCOUNT=$2 + log "INFO: --pause-container-accounte='${PAUSE_CONTAINER_ACCOUNT}'" shift shift ;; --pause-container-version) PAUSE_CONTAINER_VERSION=$2 + log "INFO: --pause-container-version='${PAUSE_CONTAINER_VERSION}'" shift shift ;; --dns-cluster-ip) DNS_CLUSTER_IP=$2 + log "INFO: --dns-cluster-ip='${DNS_CLUSTER_IP}'" shift shift ;; --container-runtime) CONTAINER_RUNTIME=$2 + log "INFO: --container-runtime='${CONTAINER_RUNTIME}'" shift shift ;; --ip-family) IP_FAMILY=$2 + log "INFO: --ip-family='${IP_FAMILY}'" shift shift ;; --service-ipv6-cidr) SERVICE_IPV6_CIDR=$2 + log "INFO: --service-ipv6-cidr='${SERVICE_IPV6_CIDR}'" shift shift ;; --enable-local-outpost) ENABLE_LOCAL_OUTPOST=$2 + log "INFO: --enable-local-outpost='${ENABLE_LOCAL_OUTPOST}'" shift shift ;; --cluster-id) CLUSTER_ID=$2 + log "INFO: --cluster-id='${CLUSTER_ID}'" shift shift ;; --mount-bpf-fs) MOUNT_BPF_FS=$2 + log "INFO: --mount-bpf-fs='${MOUNT_BPF_FS}'" shift shift ;; @@ -143,7 +169,7 @@ CLUSTER_NAME="$1" set -u KUBELET_VERSION=$(kubelet --version | grep -Eo '[0-9]\.[0-9]+\.[0-9]+') -echo "Using kubelet version $KUBELET_VERSION" +log "INFO: Using kubelet version $KUBELET_VERSION" # ecr-credential-provider only implements credentialprovider.kubelet.k8s.io/v1alpha1 prior to 1.27.1: https://github.com/kubernetes/cloud-provider-aws/pull/597 # TODO: remove this when 1.26 is EOL @@ -165,10 +191,10 @@ if vercmp "$KUBELET_VERSION" gteq "1.24.0"; then fi CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-$DEFAULT_CONTAINER_RUNTIME}" -echo "Using $CONTAINER_RUNTIME as the container runtime" +log "INFO: Using $CONTAINER_RUNTIME as the container runtime" if vercmp "$KUBELET_VERSION" gteq "1.24.0" && [ $CONTAINER_RUNTIME != "containerd" ]; then - echo "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" + log "ERROR: containerd is the only supported container runtime as of Kubernetes version 1.24" exit 1 fi @@ -254,21 +280,21 @@ get_cpu_millicores_to_reserve() { } if [ -z "$CLUSTER_NAME" ]; then - echo "CLUSTER_NAME is not defined" + log "ERROR: cluster name is not defined!" exit 1 fi if [[ ! -z "${IP_FAMILY}" ]]; then IP_FAMILY="$(tr [A-Z] [a-z] <<< "$IP_FAMILY")" if [[ "${IP_FAMILY}" != "ipv4" ]] && [[ "${IP_FAMILY}" != "ipv6" ]]; then - echo "Invalid IpFamily. Only ipv4 or ipv6 are allowed" + log "ERROR: Invalid --ip-family. Only ipv4 or ipv6 are allowed" exit 1 fi fi if [[ ! -z "${SERVICE_IPV6_CIDR}" ]]; then if [[ "${IP_FAMILY}" == "ipv4" ]]; then - echo "ip-family should be ipv6 when service-ipv6-cidr is specified" + log "ERROR: --ip-family should be ipv6 when --service-ipv6-cidr is specified" exit 1 fi IP_FAMILY="ipv6" @@ -279,7 +305,7 @@ AWS_SERVICES_DOMAIN=$(imds 'latest/meta-data/services/domain') MACHINE=$(uname -m) if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then - echo "Unknown machine architecture '$MACHINE'" >&2 + log "ERROR: Unknown machine architecture: '$MACHINE'" exit 1 fi @@ -297,13 +323,14 @@ CA_CERTIFICATE_DIRECTORY=/etc/kubernetes/pki CA_CERTIFICATE_FILE_PATH=$CA_CERTIFICATE_DIRECTORY/ca.crt mkdir -p $CA_CERTIFICATE_DIRECTORY if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then + log "INFO: --cluster-ca or --api-server-endpoint is not defined, describing cluster..." DESCRIBE_CLUSTER_RESULT="/tmp/describe_cluster_result.txt" # Retry the DescribeCluster API for API_RETRY_ATTEMPTS for attempt in $(seq 0 $API_RETRY_ATTEMPTS); do rc=0 if [[ $attempt -gt 0 ]]; then - echo "Attempt $attempt of $API_RETRY_ATTEMPTS" + log "INFO: Attempt $attempt of $API_RETRY_ATTEMPTS" fi aws eks wait cluster-active \ @@ -319,6 +346,7 @@ if [[ -z "${B64_CLUSTER_CA}" ]] || [[ -z "${APISERVER_ENDPOINT}" ]]; then break fi if [[ $attempt -eq $API_RETRY_ATTEMPTS ]]; then + log "ERROR: Exhausted retries while describing cluster!" exit $rc fi jitter=$((1 + RANDOM % 10)) @@ -355,6 +383,8 @@ if [[ -z "${IP_FAMILY}" ]] || [[ "${IP_FAMILY}" == "None" ]]; then IP_FAMILY="ipv4" fi +log "INFO: Using IP family: ${IP_FAMILY}" + echo $B64_CLUSTER_CA | base64 -d > $CA_CERTIFICATE_FILE_PATH sed -i s,MASTER_ENDPOINT,$APISERVER_ENDPOINT,g /var/lib/kubelet/kubeconfig @@ -385,7 +415,7 @@ if [[ "${ENABLE_LOCAL_OUTPOST}" == "true" ]]; then ### - if "aws eks describe-cluster" is bypassed, for local outpost, the value of CLUSTER_NAME parameter will be cluster id. ### - otherwise, the cluster id will use the id returned by "aws eks describe-cluster". if [[ -z "${CLUSTER_ID}" ]]; then - echo "Cluster ID is required when local outpost support is enabled" + log "ERROR: Cluster ID is required when local outpost support is enabled" exit 1 else sed -i s,CLUSTER_NAME,$CLUSTER_ID,g /var/lib/kubelet/kubeconfig @@ -405,7 +435,7 @@ MAC=$(imds 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$/ if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ "${IP_FAMILY}" == "ipv6" ]]; then if [[ -z "${SERVICE_IPV6_CIDR}" ]]; then - echo "One of --service-ipv6-cidr or --dns-cluster-ip must be provided when ip-family is specified as ipv6" + log "ERROR: One of --service-ipv6-cidr or --dns-cluster-ip must be provided when --ip-family is ipv6" exit 1 fi DNS_CLUSTER_IP=$(awk -F/ '{print $1}' <<< $SERVICE_IPV6_CIDR)a @@ -455,7 +485,7 @@ set +o pipefail MAX_PODS=$(cat $MAX_PODS_FILE | awk "/^${INSTANCE_TYPE:-unset}/"' { print $2 }') set -o pipefail if [ -z "$MAX_PODS" ] || [ -z "$INSTANCE_TYPE" ]; then - echo "No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." + log "INFO: No entry for type '$INSTANCE_TYPE' in $MAX_PODS_FILE. Will attempt to auto-discover value." # When determining the value of maxPods, we're using the legacy calculation by default since it's more restrictive than # the PrefixDelegation based alternative and is likely to be in-use by more customers. # The legacy numbers also maintain backwards compatibility when used to calculate `kubeReserved.memory` @@ -497,11 +527,11 @@ mkdir -p /etc/systemd/system if [[ "$CONTAINER_RUNTIME" = "containerd" ]]; then if $ENABLE_DOCKER_BRIDGE; then - echo "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" + log "WARNING: Flag --enable-docker-bridge was set but will be ignored as it's not relevant to containerd" fi if [ ! -z "$DOCKER_CONFIG_JSON" ]; then - echo "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" + log "WARNING: Flag --docker-config-json was set but will be ignored as it's not relevant to containerd" fi sudo mkdir -p /etc/containerd @@ -560,7 +590,7 @@ elif [[ "$CONTAINER_RUNTIME" = "dockerd" ]]; then systemctl enable docker systemctl restart docker else - echo "Container runtime ${CONTAINER_RUNTIME} is not supported." + log "ERROR: unsupported container runtime: '${CONTAINER_RUNTIME}'" exit 1 fi @@ -584,7 +614,7 @@ systemctl start kubelet # gpu boost clock if command -v nvidia-smi &> /dev/null; then - echo "nvidia-smi found" + log "INFO: nvidia-smi found" nvidia-smi -q > /tmp/nvidia-smi-check if [[ "$?" == "0" ]]; then @@ -592,7 +622,7 @@ if command -v nvidia-smi &> /dev/null; then sudo nvidia-smi --auto-boost-default=0 GPUNAME=$(nvidia-smi -L | head -n1) - echo $GPUNAME + log "INFO: GPU name: $GPUNAME" # set application clock to maximum if [[ $GPUNAME == *"A100"* ]]; then @@ -609,8 +639,9 @@ if command -v nvidia-smi &> /dev/null; then echo "unsupported gpu" fi else + log "ERROR: nvidia-smi check failed!" cat /tmp/nvidia-smi-check fi -else - echo "nvidia-smi not found" fi + +log "INFO: complete!" From fcfca678d8b7088d94e7bf5f07fb71abfc353b46 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Tue, 25 Apr 2023 12:11:10 -0500 Subject: [PATCH 205/232] add bootstrap option to create a local NVMe raid0 or individual volume mounts (#1171) --- doc/USER_GUIDE.md | 19 ++++ files/bin/setup-local-disks | 220 ++++++++++++++++++++++++++++++++++++ files/bootstrap.sh | 11 ++ scripts/install-worker.sh | 3 +- 4 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 files/bin/setup-local-disks diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index b37348d60..b2271c9a9 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -12,6 +12,7 @@ This document includes details about using the AMI template and the resulting AM 1. [Updating known instance types](#updating-known-instance-types) 1. [Version-locked packages](#version-locked-packages) 1. [Image credential provider plugins](#image-credential-provider-plugins) +1. [Ephemeral Storage](#ephemeral-storage) --- @@ -321,3 +322,21 @@ ECR credentials should now be obtained via a plugin, the `ecr-credential-provide Additional image credential provider plugins may be appended to `/etc/eks/image-credential-provider/config.json`. In Kubernetes versions 1.26 and below, all plugins in this file must support `credentialprovider.kubelet.k8s.io/v1alpha1`. In Kubernetes versions 1.27 and above, they must support `credentialprovider.kubelet.k8s.io/v1`. For more information about image credential provider plugins, refer to the [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-credential-provider/). + +--- + +## Ephemeral Storage + +Some instance types launch with ephemeral NVMe instance storage (i3, i4i, c5d, c6id, etc). There are two main ways of utilizing this storage within Kubernetes: a single RAID-0 array for use by kubelet and containerd or mounting the individual disks for pod usage. + +The EKS Optimized AMI includes a utility script to configure ephemeral storage. The script can be invoked by passing the `--local-disks ` flag to the `/etc/eks/bootstrap.sh` script or the script can be invoked directly at `/bin/setup-local-disks`. All disks are formatted with an XFS file system. + +Below are details on the two disk setup options: + +### RAID-0 for Kubelet and Containerd (raid0) + +A RAID-0 array is setup that includes all ephemeral NVMe instance storage disks. The containerd and kubelet state directories (`/var/lib/containerd` and `/var/lib/kubelet`) will then use the ephemeral storage for more and faster node ephemeral-storage. The node's ephemeral storage can be shared among pods that request ephemeral storage and container images that are downloaded to the node. + +### Mount for Persistent Volumes (mount) + +Another way of utilizing the ephemeral disks is to format and mount the individual disks. Mounting individual disks allows the [local-static-provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner) DaemonSet to create Persistent Volume Claims that pods can utilize. diff --git a/files/bin/setup-local-disks b/files/bin/setup-local-disks new file mode 100644 index 000000000..9cdb18dae --- /dev/null +++ b/files/bin/setup-local-disks @@ -0,0 +1,220 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +err_report() { + echo "Exited with error on line $1" +} +trap 'err_report $LINENO' ERR + +print_help() { + echo "usage: $0 " + echo "Sets up Amazon EC2 Instance Store NVMe disks" + echo "" + echo "-d, --dir directory to mount the filesystem(s) (default: /mnt/k8s-disks/)" + echo "-h, --help print this help" +} + +# Sets up a RAID-0 of NVMe instance storage disks, moves +# the contents of /var/lib/kubelet and /var/lib/containerd +# to the new mounted RAID, and bind mounts the kubelet and +# containerd state directories. +maybe_raid0() { + local md_name="kubernetes" + local md_device="/dev/md/${md_name}" + local md_config="/.aws/mdadm.conf" + local array_mount_point="${MNT_DIR}/0" + mkdir -p "$(dirname "${md_config}")" + + if [[ ! -s "${md_config}" ]]; then + mdadm --create --force --verbose \ + "${md_device}" \ + --level=0 \ + --name="${md_name}" \ + --raid-devices="${#EPHEMERAL_DISKS[@]}" \ + "${EPHEMERAL_DISKS[@]}" + while [ -n "$(mdadm --detail "${md_device}" | grep -ioE 'State :.*resyncing')" ]; do + echo "Raid is resyncing..." + sleep 1 + done + mdadm --detail --scan > "${md_config}" + fi + + ## Check if the device symlink has changed on reboot to include a homehost identifier + local current_md_device=$(find /dev/md/ -type l -regex ".*/${md_name}_?[0-9a-z]*$" | tail -n1) + if [[ ! -z ${current_md_device} ]]; then + md_device="${current_md_device}" + fi + + # Format the array if not already formatted. + if [[ -z "$(lsblk "${md_device}" -o fstype --noheadings)" ]]; then + ## By default, mkfs tries to use the stripe unit of the array (512k), + ## for the log stripe unit, but the max log stripe unit is 256k. + ## So instead, we use 32k (8 blocks) to avoid a warning of breaching the max. + ## mkfs.xfs defaults to 32k after logging the warning since the default log buffer size is 32k. + mkfs.xfs -l su=8b "${md_device}" + fi + + ## Create the mount directory + mkdir -p "${array_mount_point}" + + local dev_uuid=$(blkid -s UUID -o value "${md_device}") + local mount_unit_name="$(systemd-escape --path --suffix=mount "${array_mount_point}")" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount EC2 Instance Store NVMe disk RAID0 + [Mount] + What=UUID=${dev_uuid} + Where=${array_mount_point} + Type=xfs + Options=defaults,noatime + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + + prev_running="" + needs_linked="" + for unit in "kubelet" "containerd"; do + ## Check if the bind mount from the RAID already exists + if [[ "$(systemctl is-active var-lib-${unit}.mount)" != "active" ]]; then + # Check if components that depend on the RAID are running and, if so, stop them + if systemctl is-active "${unit}" > /dev/null 2>&1; then + prev_running+=" ${unit}" + fi + needs_linked+=" /var/lib/${unit}" + fi + done + + ## Check if /var/log/pods has been bind mounted and make sure kubelet is stopped + if [[ "$(systemctl is-active var-log-pods.mount)" != "active" ]]; then + if systemctl is-active "kubelet" > /dev/null 2>&1; then + prev_running+=" ${unit}" + fi + needs_linked+=" /var/log/pods" + fi + + if [[ ! -z "${prev_running}" ]]; then + systemctl stop ${prev_running} + fi + + # Transfer state directories to the array, if they exist. + for mount_point in ${needs_linked}; do + local unit="$(basename "${mount_point}")" + local array_mount_point_unit="${array_mount_point}/${unit}" + mkdir -p "${mount_point}" + echo "Copying ${mount_point}/ to ${array_mount_point_unit}/" + cp -a "${mount_point}/" "${array_mount_point_unit}/" + local mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount ${unit} on EC2 Instance Store NVMe RAID0 + [Mount] + What=${array_mount_point_unit} + Where=${mount_point} + Type=none + Options=bind + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + done + + if [[ ! -z "${prev_running}" ]]; then + systemctl start ${prev_running} + fi +} + +# Mounts and creates xfs file systems on all EC2 instance store NVMe disks +# without existing file systems. Mounts in /mnt/k8s-disks/{1..} by default +maybe_mount() { + idx=1 + for dev in "${EPHEMERAL_DISKS[@]}"; do + if [[ -z "$(lsblk "${dev}" -o fstype --noheadings)" ]]; then + mkfs.xfs -l su=8b "${dev}" + fi + if [[ ! -z "$(lsblk "${dev}" -o MOUNTPOINT --noheadings)" ]]; then + echo "${dev} is already mounted." + continue + fi + local mount_point="${MNT_DIR}/${idx}" + local mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")" + mkdir -p "${mount_point}" + cat > "/etc/systemd/system/${mount_unit_name}" << EOF + [Unit] + Description=Mount EC2 Instance Store NVMe disk ${idx} + [Mount] + What=${dev} + Where=${mount_point} + Type=xfs + Options=defaults,noatime + [Install] + WantedBy=multi-user.target +EOF + systemd-analyze verify "${mount_unit_name}" + systemctl enable "${mount_unit_name}" --now + idx=$((idx + 1)) + done +} + +## Main logic +MNT_DIR="/mnt/k8s-disks" + +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -h | --help) + print_help + exit 0 + ;; + -d | --dir) + MNT_DIR="$2" + shift + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac +done + +set +u +set -- "${POSITIONAL[@]}" # restore positional parameters +DISK_SETUP="$1" +set -u + +if [[ "${DISK_SETUP}" != "raid0" && "${DISK_SETUP}" != "mount" ]]; then + echo "Valid disk setup options are: raid0 or mount" + exit 1 +fi + +disks=($(find -L /dev/disk/by-id/ -xtype l -name '*NVMe_Instance_Storage_*')) +## Bail early if there are no ephemeral disks to setup +if [[ "${#disks[@]}" -eq 0 ]]; then + echo "no ephemeral disks found, skipping disk setup" + exit 0 +fi + +if [ "$(id --user)" -ne 0 ]; then + echo "Must be run as root" + exit 1 +fi + +## Get devices of NVMe instance storage ephemeral disks +EPHEMERAL_DISKS=($(realpath "${disks[@]}" | sort -u)) + +case "${DISK_SETUP}" in + "raid0") + maybe_raid0 + echo "Successfully setup RAID-0 consisting of ${EPHEMERAL_DISKS[@]}" + ;; + "mount") + maybe_mount + echo "Successfully setup disk mounts consisting of ${EPHEMERAL_DISKS[@]}" + ;; +esac diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 205cb5458..38f1894ea 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -32,6 +32,7 @@ function print_help { echo "--enable-local-outpost Enable support for worker nodes to communicate with the local control plane when running on a disconnected Outpost. (true or false)" echo "--ip-family Specify ip family of the cluster" echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." + echo "--local-disks Setup instance storage NVMe disks in raid0 or mount the individual disks for use by pods [mount | raid0]" echo "--mount-bfs-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.27+; false otherwise)" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" @@ -156,6 +157,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --local-disks) + LOCAL_DISKS=$2 + shift + shift + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -211,6 +217,11 @@ IP_FAMILY="${IP_FAMILY:-}" SERVICE_IPV6_CIDR="${SERVICE_IPV6_CIDR:-}" ENABLE_LOCAL_OUTPOST="${ENABLE_LOCAL_OUTPOST:-}" CLUSTER_ID="${CLUSTER_ID:-}" +LOCAL_DISKS="${LOCAL_DISKS:-}" + +if [[ ! -z ${LOCAL_DISKS} ]]; then + setup-local-disks "${LOCAL_DISKS}" +fi DEFAULT_MOUNT_BPF_FS="true" if vercmp "$KUBELET_VERSION" lt "1.27.0"; then diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index b40bf4d5a..11a631b21 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -70,7 +70,8 @@ sudo yum install -y \ unzip \ wget \ yum-utils \ - yum-plugin-versionlock + yum-plugin-versionlock \ + mdadm # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" sudo package-cleanup --oldkernels --count=1 -y From a6f718674cdcfdcdbbca9dee6d7163729402a6d4 Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 25 Apr 2023 17:32:01 -0700 Subject: [PATCH 206/232] Define region so CLI uses correct endpoint for localzones (#1284) --- files/bootstrap.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 38f1894ea..c6aedcdd5 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -528,7 +528,10 @@ else # then /etc/hostname is not the same as EC2's PrivateDnsName. # The name of the Node object must be equal to EC2's PrivateDnsName for the aws-iam-authenticator to allow this kubelet to manage it. INSTANCE_ID=$(imds /latest/meta-data/instance-id) - PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text) + # the AWS CLI currently constructs the wrong endpoint URL on localzones (the availability zone group will be used instead of the parent region) + # more info: https://github.com/aws/aws-cli/issues/7043 + REGION=$(imds /latest/meta-data/placement/region) + PRIVATE_DNS_NAME=$(AWS_RETRY_MODE=standard AWS_MAX_ATTEMPTS=10 aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID --query 'Reservations[].Instances[].PrivateDnsName' --output text) KUBELET_ARGS="$KUBELET_ARGS --hostname-override=$PRIVATE_DNS_NAME" fi From 3986eb0653f89428b87020661dd154f174df2a45 Mon Sep 17 00:00:00 2001 From: Steven Davidovitz Date: Wed, 26 Apr 2023 18:58:39 -0700 Subject: [PATCH 207/232] Add pigz (#1283) --- scripts/install-worker.sh | 3 ++- scripts/validate.sh | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 11a631b21..0cdceff0c 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -71,7 +71,8 @@ sudo yum install -y \ wget \ yum-utils \ yum-plugin-versionlock \ - mdadm + mdadm \ + pigz # Remove any old kernel versions. `--count=1` here means "only leave 1 kernel version installed" sudo package-cleanup --oldkernels --count=1 -y diff --git a/scripts/validate.sh b/scripts/validate.sh index ae329005e..0b007e386 100644 --- a/scripts/validate.sh +++ b/scripts/validate.sh @@ -73,3 +73,14 @@ if [ $LOCKED_PACKAGES -ne $UNIQUE_LOCKED_PACKAGES ]; then fi echo "Package versionlocks are correct!" + +REQUIRED_COMMANDS=(unpigz) + +for ENTRY in "${REQUIRED_COMMANDS[@]}"; do + if ! command -v "$ENTRY" > /dev/null; then + echo "Required command does not exist: '$ENTRY'" + exit 1 + fi +done + +echo "Required commands were found: ${REQUIRED_COMMANDS[*]}" From f247c96cf1d9a659d6203db16cdd011862521349 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Tue, 2 May 2023 14:41:33 -0700 Subject: [PATCH 208/232] Adding inf2 and trn1n instances (#1290) Co-authored-by: ljosyula --- files/get-ecr-uri.sh | 3 +++ scripts/install-worker.sh | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/files/get-ecr-uri.sh b/files/get-ecr-uri.sh index 134dc39cc..ba719ac06 100755 --- a/files/get-ecr-uri.sh +++ b/files/get-ecr-uri.sh @@ -27,6 +27,9 @@ else us-gov-east-1) acct="151742754352" ;; + us-iso-west-1) + acct="608367168043" + ;; us-iso-east-1) acct="725322719131" ;; diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 0cdceff0c..e58c3a733 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -122,7 +122,9 @@ sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service ### awscli ##################################################### ################################################################################ -if [[ "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]]; then +### isolated regions can't communicate to awscli.amazonaws.com so installing awscli through yum +ISOLATED_REGIONS=(us-iso-east-1 us-iso-west-1 us-isob-east-1) +if ! [[ " ${ISOLATED_REGIONS[*]} " =~ " ${BINARY_BUCKET_REGION} " ]]; then # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html echo "Installing awscli v2 bundle" AWSCLI_DIR=$(mktemp -d) @@ -247,7 +249,7 @@ echo "Downloading binaries from: s3://$BINARY_BUCKET_NAME" S3_DOMAIN="amazonaws.com" if [ "$BINARY_BUCKET_REGION" = "cn-north-1" ] || [ "$BINARY_BUCKET_REGION" = "cn-northwest-1" ]; then S3_DOMAIN="amazonaws.com.cn" -elif [ "$BINARY_BUCKET_REGION" = "us-iso-east-1" ]; then +elif [ "$BINARY_BUCKET_REGION" = "us-iso-east-1" ] || [ "$BINARY_BUCKET_REGION" = "us-iso-west-1" ]; then S3_DOMAIN="c2s.ic.gov" elif [ "$BINARY_BUCKET_REGION" = "us-isob-east-1" ]; then S3_DOMAIN="sc2s.sgov.gov" @@ -369,7 +371,8 @@ sudo mv $TEMPLATE_DIR/ecr-credential-provider-config.json /etc/eks/image-credent ################################################################################ ### Cache Images ############################################################### ################################################################################ -if [[ "$CACHE_CONTAINER_IMAGES" == "true" && "$BINARY_BUCKET_REGION" != "us-iso-east-1" && "$BINARY_BUCKET_REGION" != "us-isob-east-1" ]]; then + +if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ " ${ISOLATED_REGIONS[*]} " =~ " ${BINARY_BUCKET_REGION} " ]]; then AWS_DOMAIN=$(imds 'latest/meta-data/services/domain') ECR_URI=$(/etc/eks/get-ecr-uri.sh "${BINARY_BUCKET_REGION}" "${AWS_DOMAIN}") From c70686c8e41b27a3e716cf4a4c2f99798b1729c8 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Wed, 3 May 2023 21:06:51 -0700 Subject: [PATCH 209/232] Update CHANGELOG.md (#1292) Co-authored-by: ljosyula --- CHANGELOG.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index acafb611e..6b776796c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,58 @@ # Changelog +### AMI Release v20230501 +* amazon-eks-gpu-node-1.26-v20230501 +* amazon-eks-gpu-node-1.25-v20230501 +* amazon-eks-gpu-node-1.24-v20230501 +* amazon-eks-gpu-node-1.23-v20230501 +* amazon-eks-gpu-node-1.22-v20230501 +* amazon-eks-arm64-node-1.26-v20230501 +* amazon-eks-arm64-node-1.25-v20230501 +* amazon-eks-arm64-node-1.24-v20230501 +* amazon-eks-arm64-node-1.23-v20230501 +* amazon-eks-arm64-node-1.22-v20230501 +* amazon-eks-node-1.26-v20230501 +* amazon-eks-node-1.25-v20230501 +* amazon-eks-node-1.24-v20230501 +* amazon-eks-node-1.23-v20230501 +* amazon-eks-node-1.22-v20230501 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230501` +* `1.25.7-20230501` +* `1.24.11-20230501` +* `1.23.17-20230501` +* `1.22.17-20230501` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.17-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- Add bootstrap option to create a local NVMe raid0 or individual volume mounts ([#1171](https://github.com/awslabs/amazon-eks-ami/pull/1171)) +- Improve bootstrap logging ([#1276](https://github.com/awslabs/amazon-eks-ami/pull/1276)) +- Use credential provider API v1 in 1.27+, v1alpha1 in 1.26- ([#1269](https://github.com/awslabs/amazon-eks-ami/pull/1269)) +- Override hostname to match EC2's PrivateDnsName ([#1264](https://github.com/awslabs/amazon-eks-ami/pull/1264)) +- Add ethtool ([#1261](https://github.com/awslabs/amazon-eks-ami/pull/1261)) +- Update `kernel-5.10` for [ALASKERNEL-5.10-2023-031](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-031.html) +- Kernel version upgrade to `5.10.178-162.673.amzn2` fixes the [Containers failing to create and probe exec errors related to seccomp on recent kernel-5.10 versions](https://github.com/awslabs/amazon-eks-ami/issues/1219) issue + + ### AMI Release v20230411 * amazon-eks-gpu-node-1.26-v20230411 * amazon-eks-gpu-node-1.25-v20230411 From 3e40a8bb13173b781fde4171e2de7e2e5031b411 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 5 May 2023 12:31:07 -0700 Subject: [PATCH 210/232] Upgrades docker to 20.10.23-1.amzn2.0.1 (#1293) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 30d769e03..68cbad7eb 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -15,7 +15,7 @@ "cni_plugin_version": "v0.8.6", "containerd_version": "1.6.*", "creator": "{{env `USER`}}", - "docker_version": "20.10.17-1.amzn2.0.1", + "docker_version": "20.10.23-1.amzn2.0.1", "encrypted": "false", "kernel_version": "", "kms_key_id": "", From e4dd2abb52a43477f9aa32134fa6683dcc8de048 Mon Sep 17 00:00:00 2001 From: Laxmi Soumya Josyula <42261978+ljosyula@users.noreply.github.com> Date: Mon, 8 May 2023 16:43:49 -0700 Subject: [PATCH 211/232] Updating instance type for AMI build and making changes for us-iso-west-1 support (#1294) Co-authored-by: ljosyula --- Makefile | 2 +- scripts/install-worker.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 271faa8fb..6ff6ba3ec 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ ifeq ($(arch), arm64) instance_type ?= m6g.large ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') else - instance_type ?= m4.large + instance_type ?= m5.large ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d') endif diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index e58c3a733..d8bdafdcd 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -123,8 +123,8 @@ sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service ################################################################################ ### isolated regions can't communicate to awscli.amazonaws.com so installing awscli through yum -ISOLATED_REGIONS=(us-iso-east-1 us-iso-west-1 us-isob-east-1) -if ! [[ " ${ISOLATED_REGIONS[*]} " =~ " ${BINARY_BUCKET_REGION} " ]]; then +ISOLATED_REGIONS="${ISOLATED_REGIONS:-us-iso-east-1 us-iso-west-1 us-isob-east-1}" +if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html echo "Installing awscli v2 bundle" AWSCLI_DIR=$(mktemp -d) @@ -372,7 +372,7 @@ sudo mv $TEMPLATE_DIR/ecr-credential-provider-config.json /etc/eks/image-credent ### Cache Images ############################################################### ################################################################################ -if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ " ${ISOLATED_REGIONS[*]} " =~ " ${BINARY_BUCKET_REGION} " ]]; then +if [[ "$CACHE_CONTAINER_IMAGES" == "true" ]] && ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then AWS_DOMAIN=$(imds 'latest/meta-data/services/domain') ECR_URI=$(/etc/eks/get-ecr-uri.sh "${BINARY_BUCKET_REGION}" "${AWS_DOMAIN}") From 59212d4a8ec42929f19b1c47e8f9a61168e48faf Mon Sep 17 00:00:00 2001 From: Carter Date: Tue, 9 May 2023 18:15:17 +0300 Subject: [PATCH 212/232] Fix ECR pattern for aws-cn (#1280) --- files/ecr-credential-provider-config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/ecr-credential-provider-config.json b/files/ecr-credential-provider-config.json index 7fe687156..21581c4e9 100644 --- a/files/ecr-credential-provider-config.json +++ b/files/ecr-credential-provider-config.json @@ -6,7 +6,7 @@ "name": "ecr-credential-provider", "matchImages": [ "*.dkr.ecr.*.amazonaws.com", - "*.dkr.ecr.*.amazonaws.cn", + "*.dkr.ecr.*.amazonaws.com.cn", "*.dkr.ecr-fips.*.amazonaws.com", "*.dkr.ecr.us-iso-east-1.c2s.ic.gov", "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" From f68e0e6afa2cc61c481aa6a8ce6c9e4ab692491d Mon Sep 17 00:00:00 2001 From: willgleich <22464726+willgleich@users.noreply.github.com> Date: Tue, 9 May 2023 11:51:44 -0600 Subject: [PATCH 213/232] Fix imds setting for multiple enis on ipv6 (#1275) --- files/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index c6aedcdd5..d200eb431 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -441,7 +441,7 @@ fi ### kubelet.service configuration -MAC=$(imds 'latest/meta-data/network/interfaces/macs/' | head -n 1 | sed 's/\/$//') +MAC=$(imds 'latest/meta-data/mac') if [[ -z "${DNS_CLUSTER_IP}" ]]; then if [[ "${IP_FAMILY}" == "ipv6" ]]; then From 2bf329314672f7b82e3efc9de8a02d45d3d01bf2 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 11 May 2023 19:31:43 +0300 Subject: [PATCH 214/232] Revert gp3 volume_type due to lack of Local Zone support (#1295) --- Makefile | 15 --------------- eks-worker-al2-variables.json | 6 +++--- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 6ff6ba3ec..c6b13f43e 100644 --- a/Makefile +++ b/Makefile @@ -17,21 +17,6 @@ packer_variable_file_contains = $(if $(PACKER_VARIABLE_FILE),$(shell grep -Fq $1 # otherwise expands to 'false' vercmp = $(shell $(MAKEFILE_DIR)/files/bin/vercmp "$1" "$2" "$3") -# expands to 'true' if the 'aws_region' contains 'us-iso' (an isolated region) -# otherwise, expands to 'false' -in_iso_region = $(if $(findstring us-iso,$(aws_region)),true,false) - -# gp3 volumes are used by default for 1.27+ -# TODO: remove when 1.26 reaches EOL -# TODO: remove when gp3 is supported in isolated regions -ifneq ($(call packer_variable_file_contains,volume_type), true) - ifeq ($(call in_iso_region), true) - volume_type ?= gp2 - else ifeq ($(call vercmp,$(kubernetes_version),lt,1.27.0), true) - volume_type ?= gp2 - endif -endif - # Docker is not present on 1.25+ AMI's # TODO: remove this when 1.24 reaches EOL ifeq ($(call vercmp,$(kubernetes_version),gteq,1.25.0), true) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 68cbad7eb..075702c65 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -11,7 +11,7 @@ "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", "binary_bucket_name": "amazon-eks", "binary_bucket_region": "us-west-2", - "cache_container_images": "false", + "cache_container_images": "false", "cni_plugin_version": "v0.8.6", "containerd_version": "1.6.*", "creator": "{{env `USER`}}", @@ -20,7 +20,7 @@ "kernel_version": "", "kms_key_id": "", "launch_block_device_mappings_volume_size": "4", - "pause_container_version": "3.5", + "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", "runc_version": "1.1.4-1.amzn2", @@ -33,5 +33,5 @@ "ssh_username": "ec2-user", "subnet_id": "", "temporary_security_group_source_cidrs": "", - "volume_type": "gp3" + "volume_type": "gp2" } From 07194608a40a341bb37271804d1b30bb66aebd6d Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Thu, 11 May 2023 19:17:42 -0500 Subject: [PATCH 215/232] fix: add local-disk info log and fix typo (#1298) --- files/bootstrap.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index d200eb431..8178fcb06 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -105,7 +105,7 @@ while [[ $# -gt 0 ]]; do ;; --pause-container-account) PAUSE_CONTAINER_ACCOUNT=$2 - log "INFO: --pause-container-accounte='${PAUSE_CONTAINER_ACCOUNT}'" + log "INFO: --pause-container-account='${PAUSE_CONTAINER_ACCOUNT}'" shift shift ;; @@ -159,6 +159,7 @@ while [[ $# -gt 0 ]]; do ;; --local-disks) LOCAL_DISKS=$2 + log "INFO: --local-disks='${LOCAL_DISKS}'" shift shift ;; From 0de475c5f802acd470d9a2f1fdd521b7949a25ec Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 12 May 2023 01:36:28 -0700 Subject: [PATCH 216/232] Update eni-max-pods.txt (#1296) Co-authored-by: cartermckinnon --- files/eni-max-pods.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index 2c0253e90..d24d8d28f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -253,6 +253,12 @@ i3en.6xlarge 234 i3en.large 29 i3en.metal 737 i3en.xlarge 58 +i4g.16xlarge 737 +i4g.2xlarge 58 +i4g.4xlarge 234 +i4g.8xlarge 234 +i4g.large 29 +i4g.xlarge 58 i4i.16xlarge 737 i4i.2xlarge 58 i4i.32xlarge 737 From e2618f9504e85d84153e7ee9a147415a032584ac Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Fri, 12 May 2023 14:12:02 -0700 Subject: [PATCH 217/232] Update Makefile for 2023-05-11 Binaries (#1300) --- Makefile | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c6b13f43e..d5e58398f 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,7 @@ T_YELLOW := \e[0;33m T_RESET := \e[0m .PHONY: latest -latest: 1.26 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes +latest: 1.27 ## Build EKS Optimized AL2 AMI with the latest supported version of Kubernetes # ensure that these flags are equivalent to the rules in the .editorconfig SHFMT_FLAGS := --list \ @@ -102,24 +102,28 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 - $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-05-11 pull_cni_from_github=true .PHONY: 1.23 1.23: ## Build EKS Optimized AL2 AMI - K8s 1.23 - $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.23.17 kubernetes_build_date=2023-05-11 pull_cni_from_github=true .PHONY: 1.24 1.24: ## Build EKS Optimized AL2 AMI - K8s 1.24 - $(MAKE) k8s kubernetes_version=1.24.11 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.24.13 kubernetes_build_date=2023-05-11 pull_cni_from_github=true .PHONY: 1.25 1.25: ## Build EKS Optimized AL2 AMI - K8s 1.25 - $(MAKE) k8s kubernetes_version=1.25.7 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.25.9 kubernetes_build_date=2023-05-11 pull_cni_from_github=true .PHONY: 1.26 1.26: ## Build EKS Optimized AL2 AMI - K8s 1.26 - $(MAKE) k8s kubernetes_version=1.26.2 kubernetes_build_date=2023-03-17 pull_cni_from_github=true + $(MAKE) k8s kubernetes_version=1.26.4 kubernetes_build_date=2023-05-11 pull_cni_from_github=true +.PHONY: 1.27 +1.27: ## Build EKS Optimized AL2 AMI - K8s 1.27 + $(MAKE) k8s kubernetes_version=1.27.1 kubernetes_build_date=2023-04-19 pull_cni_from_github=true + .PHONY: clean clean: rm *-manifest.json From 7465fc8287626e4d261a28d20d7a8ead7afb75d1 Mon Sep 17 00:00:00 2001 From: Zaid Ahmed Farooq <38226823+zaf6862@users.noreply.github.com> Date: Fri, 12 May 2023 18:18:03 -0700 Subject: [PATCH 218/232] AMI Release v20230509 (#1299) * AMI Release v20230509 --------- Co-authored-by: Zaid Farooq --- CHANGELOG.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b776796c..f5b7a5459 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,54 @@ # Changelog +### AMI Release v20230509 +* amazon-eks-gpu-node-1.26-v20230509 +* amazon-eks-gpu-node-1.25-v20230509 +* amazon-eks-gpu-node-1.24-v20230509 +* amazon-eks-gpu-node-1.23-v20230509 +* amazon-eks-gpu-node-1.22-v20230509 +* amazon-eks-arm64-node-1.26-v20230509 +* amazon-eks-arm64-node-1.25-v20230509 +* amazon-eks-arm64-node-1.24-v20230509 +* amazon-eks-arm64-node-1.23-v20230509 +* amazon-eks-arm64-node-1.22-v20230509 +* amazon-eks-node-1.26-v20230509 +* amazon-eks-node-1.25-v20230509 +* amazon-eks-node-1.24-v20230509 +* amazon-eks-node-1.23-v20230509 +* amazon-eks-node-1.22-v20230509 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.26.2-20230509` +* `1.25.7-20230509` +* `1.24.11-20230509` +* `1.23.17-20230509` +* `1.22.17-20230509` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.26.2/2023-03-17/ +* s3://amazon-eks/1.25.7/2023-03-17/ +* s3://amazon-eks/1.24.11/2023-03-17/ +* s3://amazon-eks/1.23.17/2023-03-17/ +* s3://amazon-eks/1.22.17/2023-03-17/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +- The new AMIs have updated docker version 20.10.23-1.amzn2.0.1 that addresses two docker CVEs; [CVE-2022-36109 - docker](https://alas.aws.amazon.com/cve/html/CVE-2022-36109.html) and [CVE-2022-37708 - docker](https://alas.aws.amazon.com/cve/html/CVE-2022-37708.html). +- For the GPU Variants of these AMIs, the Nvidia Fabric Manager version is upgraded from 470.161.03-1 to 470.182.03-1. +- Fix ECR pattern for aws-cn ([#1280](https://github.com/awslabs/amazon-eks-ami/pull/1280)) +- Fix imds setting for multiple enis on ipv6 ([1275](https://github.com/awslabs/amazon-eks-ami/pull/1275)) + ### AMI Release v20230501 * amazon-eks-gpu-node-1.26-v20230501 * amazon-eks-gpu-node-1.25-v20230501 From 3e27dbc3b24c87fdf53fbe6bef610d23732f03a6 Mon Sep 17 00:00:00 2001 From: camrakin <113552683+camrakin@users.noreply.github.com> Date: Thu, 18 May 2023 16:30:34 -0700 Subject: [PATCH 219/232] Update CHANGELOG for AMI Release v20230513 (#1305) Update the CHANGELOG.md for the v20230513 AMI Release --- CHANGELOG.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5b7a5459..3200e89eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,59 @@ # Changelog +### AMI Release v20230513 +* amazon-eks-gpu-node-1.27-v20230513 +* amazon-eks-gpu-node-1.26-v20230513 +* amazon-eks-gpu-node-1.25-v20230513 +* amazon-eks-gpu-node-1.24-v20230513 +* amazon-eks-gpu-node-1.23-v20230513 +* amazon-eks-gpu-node-1.22-v20230513 +* amazon-eks-arm64-node-1.27-v20230513 +* amazon-eks-arm64-node-1.26-v20230513 +* amazon-eks-arm64-node-1.25-v20230513 +* amazon-eks-arm64-node-1.24-v20230513 +* amazon-eks-arm64-node-1.23-v20230513 +* amazon-eks-arm64-node-1.22-v20230513 +* amazon-eks-node-1.27-v20230513 +* amazon-eks-node-1.26-v20230513 +* amazon-eks-node-1.25-v20230513 +* amazon-eks-node-1.24-v20230513 +* amazon-eks-node-1.23-v20230513 +* amazon-eks-node-1.22-v20230513 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230513` +* `1.26.4-20230513` +* `1.25.9-20230513` +* `1.24.13-20230513` +* `1.23.17-20230513` +* `1.22.17-20230513` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.241-150.347.amzn2 + * Kubernetes 1.24 and above: 5.10.178-162.673.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: + - Add support for Kubernetes 1.27 ([#1300](https://github.com/awslabs/amazon-eks-ami/pull/1300)) + +Other changes: + - Updated max pods for i4g instance types ([#1296](https://github.com/awslabs/amazon-eks-ami/commit/0de475c5f802acd470d9a2f1fdd521b7949a25ec)) + ### AMI Release v20230509 * amazon-eks-gpu-node-1.26-v20230509 * amazon-eks-gpu-node-1.25-v20230509 From 63334e86e02ade2140c740689ead89dacf055d89 Mon Sep 17 00:00:00 2001 From: Kulwant Singh Date: Fri, 19 May 2023 19:18:31 +0000 Subject: [PATCH 220/232] Collect vpc-bridge CNI conf on Windows (#1306) --- log-collector-script/windows/eks-log-collector.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/log-collector-script/windows/eks-log-collector.ps1 b/log-collector-script/windows/eks-log-collector.ps1 index f96916e7e..31fa84ba2 100644 --- a/log-collector-script/windows/eks-log-collector.ps1 +++ b/log-collector-script/windows/eks-log-collector.ps1 @@ -289,7 +289,7 @@ Function get_k8s_info{ Write-Host "Collecting kubelet information" copy C:\ProgramData\kubernetes\kubeconfig $info_system\kubelet\ copy C:\ProgramData\kubernetes\kubelet-config.json $info_system\kubelet\ - copy C:\ProgramData\Amazon\EKS\cni\config\vpc-shared-eni.conf $info_system\cni\ + copy C:\ProgramData\Amazon\EKS\cni\config\* $info_system\cni\ Write-Host "OK" -foregroundcolor "green" } catch { From 43cc4599720511700fe732b67938a0146ce7119c Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 24 May 2023 09:18:53 -0700 Subject: [PATCH 221/232] Collect Karpenter user-data.log (#1310) --- log-collector-script/linux/eks-log-collector.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/log-collector-script/linux/eks-log-collector.sh b/log-collector-script/linux/eks-log-collector.sh index dee5654a6..75eada625 100644 --- a/log-collector-script/linux/eks-log-collector.sh +++ b/log-collector-script/linux/eks-log-collector.sh @@ -71,6 +71,7 @@ COMMON_LOGS=( pods # eks cloud-init.log cloud-init-output.log + user-data.log kube-proxy.log ) From 8369d06e5ae2a9c4c8cb99531ce525a767080019 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 26 May 2023 15:43:07 -0700 Subject: [PATCH 222/232] Update managed policy name for SSM (#1311) --- log-collector-script/linux/README.md | 2 +- log-collector-script/windows/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/log-collector-script/linux/README.md b/log-collector-script/linux/README.md index 69bc088b3..4119e4410 100644 --- a/log-collector-script/linux/README.md +++ b/log-collector-script/linux/README.md @@ -91,7 +91,7 @@ Trying to archive gathered information... * SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonSSMManagedInstanceCore` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonSSMManagedInstanceCore` has `S3:PutObject` permission to all S3 resources. *Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) diff --git a/log-collector-script/windows/README.md b/log-collector-script/windows/README.md index 374a4053b..945211c14 100644 --- a/log-collector-script/windows/README.md +++ b/log-collector-script/windows/README.md @@ -84,7 +84,7 @@ Done... your bundled logs are located in C:\log-collector\eks_i-0b318f704c74b6a * SSM agent should be installed and running on Worker Node(s). [How to Install SSM Agent link](https://docs.aws.amazon.com/systems-manager/latest/userguide/sysman-manual-agent-install.html) -* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonEC2RoleforSSM` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonEC2RoleforSSM` has `S3:PutObject` permission to all S3 resources. +* Worker Node(s) should have required permissions to communicate with SSM service. IAM managed role `AmazonSSMManagedInstanceCore` will have all the required permission for SSM agent to run on EC2 instances. The IAM managed role `AmazonSSMManagedInstanceCore` has `S3:PutObject` permission to all S3 resources. *Note:* For more granular control of the IAM permission check [Actions defined by AWS Systems Manager](https://docs.aws.amazon.com/IAM/latest/UserGuide/list_awssystemsmanager.html%23awssystemsmanager-actions-as-permissions) From cee3aadb6dad8e5c4a89b690600ac846a77c136c Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 1 Jun 2023 08:38:30 -0700 Subject: [PATCH 223/232] Update CHANGELOG.md `v20230526` release (#1314) Update changelog for `v20230526` release --- CHANGELOG.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3200e89eb..419f1240f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,58 @@ # Changelog +### AMI Release v20230526 +* amazon-eks-gpu-node-1.27-v20230526 +* amazon-eks-gpu-node-1.26-v20230526 +* amazon-eks-gpu-node-1.25-v20230526 +* amazon-eks-gpu-node-1.24-v20230526 +* amazon-eks-gpu-node-1.23-v20230526 +* amazon-eks-gpu-node-1.22-v20230526 +* amazon-eks-arm64-node-1.27-v20230526 +* amazon-eks-arm64-node-1.26-v20230526 +* amazon-eks-arm64-node-1.25-v20230526 +* amazon-eks-arm64-node-1.24-v20230526 +* amazon-eks-arm64-node-1.23-v20230526 +* amazon-eks-arm64-node-1.22-v20230526 +* amazon-eks-node-1.27-v20230526 +* amazon-eks-node-1.26-v20230526 +* amazon-eks-node-1.25-v20230526 +* amazon-eks-node-1.24-v20230526 +* amazon-eks-node-1.23-v20230526 +* amazon-eks-node-1.22-v20230526 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230526` +* `1.26.4-20230526` +* `1.25.9-20230526` +* `1.24.13-20230526` +* `1.23.17-20230526` +* `1.22.17-20230526` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.242-155.348.amzn2 + * Kubernetes 1.24 and above: 5.10.179-166.674.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.4-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +* `5.4` kernel update to `5.4.242-155.348.amzn2` addresses CVE [ALAS2KERNEL-5.4-2023-045](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.4-2023-045.html) +* `5.10` kernel update to `5.10.179-166.674.amzn2` addresses [ALAS2KERNEL-5.10-2023-032](https://alas.aws.amazon.com/AL2/ALASKERNEL-5.10-2023-032.html) +* `Glib` update to `glib2-2.56.1-9.amzn2` addresses [ALAS-2023-2049](https://alas.aws.amazon.com/AL2/ALAS-2023-2049.html) + ### AMI Release v20230513 * amazon-eks-gpu-node-1.27-v20230513 * amazon-eks-gpu-node-1.26-v20230513 From 487830668b1f556e530fd1d6310763b337cfb74e Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 6 Jun 2023 18:42:07 -0700 Subject: [PATCH 224/232] Updates runc to 1.1.5 (#1319) --- eks-worker-al2-variables.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index 075702c65..f5b3e616b 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -23,7 +23,7 @@ "pause_container_version": "3.5", "pull_cni_from_github": "true", "remote_folder": "", - "runc_version": "1.1.4-1.amzn2", + "runc_version": "1.1.5-1.amzn2", "security_group_id": "", "sonobuoy_e2e_registry": "", "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", From 3cd35c7833c2e79a310470fa18b7ae3bcffc28aa Mon Sep 17 00:00:00 2001 From: ravisinha0506 Date: Sat, 10 Jun 2023 00:30:23 -0700 Subject: [PATCH 225/232] Updating changelog for release (#1323) Co-authored-by: Ravi Sinha --- CHANGELOG.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 419f1240f..9532c7419 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,57 @@ # Changelog +### AMI Release v20230607 +* amazon-eks-gpu-node-1.27-v20230607 +* amazon-eks-gpu-node-1.26-v20230607 +* amazon-eks-gpu-node-1.25-v20230607 +* amazon-eks-gpu-node-1.24-v20230607 +* amazon-eks-gpu-node-1.23-v20230607 +* amazon-eks-gpu-node-1.22-v20230607 +* amazon-eks-arm64-node-1.27-v20230607 +* amazon-eks-arm64-node-1.26-v20230607 +* amazon-eks-arm64-node-1.25-v20230607 +* amazon-eks-arm64-node-1.24-v20230607 +* amazon-eks-arm64-node-1.23-v20230607 +* amazon-eks-arm64-node-1.22-v20230607 +* amazon-eks-node-1.27-v20230607 +* amazon-eks-node-1.26-v20230607 +* amazon-eks-node-1.25-v20230607 +* amazon-eks-node-1.24-v20230607 +* amazon-eks-node-1.23-v20230607 +* amazon-eks-node-1.22-v20230607 + +[Release versions](https://docs.aws.amazon.com/eks/latest/userguide/eks-linux-ami-versions.html) for these AMIs: +* `1.27.1-20230607` +* `1.26.4-20230607` +* `1.25.9-20230607` +* `1.24.13-20230607` +* `1.23.17-20230607` +* `1.22.17-20230607` + +Binaries used to build these AMIs are published: +* s3://amazon-eks/1.27.1/2023-04-19/ +* s3://amazon-eks/1.26.4/2023-05-11/ +* s3://amazon-eks/1.25.9/2023-05-11/ +* s3://amazon-eks/1.24.13/2023-05-11/ +* s3://amazon-eks/1.23.17/2023-05-11/ +* s3://amazon-eks/1.22.17/2023-05-11/ + +AMI details: +* `kernel`: + * Kubernetes 1.23 and below: 5.4.242-156.349.amzn2 + * Kubernetes 1.24 and above: 5.10.179-168.710.amzn2 +* `dockerd`: 20.10.23-1.amzn2.0.1 + * **Note** that Docker is not installed on AMI's with Kubernetes 1.25+. +* `containerd`: 1.6.19-1.amzn2.0.1 +* `runc`: 1.1.5-1.amzn2 +* `cuda`: 11.4.0-1 +* `nvidia-container-runtime-hook`: 1.4.0-1.amzn2 +* `amazon-ssm-agent`: 3.1.1732.0-1.amzn2 + +Notable changes: +* `5.4` kernel update to `5.4.242-156.349.amzn2` and `5.10` kernel update to `5.10.179-168.710.amzn2` address [CVE-2023-32233](https://alas.aws.amazon.com/cve/html/CVE-2023-32233.html) +* Updating `runc` version to `1.1.5-1.amzn2` which contains fixes for [CVE-2023-28642](https://explore.alas.aws.amazon.com/CVE-2023-27561.html) and [CVE-2023-27561](https://explore.alas.aws.amazon.com/CVE-2023-28642.html). + ### AMI Release v20230526 * amazon-eks-gpu-node-1.27-v20230526 * amazon-eks-gpu-node-1.26-v20230526 From 05f1146017927a043e12bbc369eeab28f96bcdde Mon Sep 17 00:00:00 2001 From: Carter Date: Wed, 14 Jun 2023 10:26:07 -0700 Subject: [PATCH 226/232] Mount bpffs by default on 1.25+ (#1320) --- files/bootstrap.sh | 6 +++--- test/cases/mount-bpf-fs.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 8178fcb06..3729dad2c 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -33,7 +33,7 @@ function print_help { echo "--ip-family Specify ip family of the cluster" echo "--kubelet-extra-args Extra arguments to add to the kubelet. Useful for adding labels or taints." echo "--local-disks Setup instance storage NVMe disks in raid0 or mount the individual disks for use by pods [mount | raid0]" - echo "--mount-bfs-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.27+; false otherwise)" + echo "--mount-bpf-fs Mount a bpffs at /sys/fs/bpf (default: true, for Kubernetes 1.25+; false otherwise)" echo "--pause-container-account The AWS account (number) to pull the pause container from" echo "--pause-container-version The tag of the pause container" echo "--service-ipv6-cidr ipv6 cidr range of the cluster" @@ -225,7 +225,7 @@ if [[ ! -z ${LOCAL_DISKS} ]]; then fi DEFAULT_MOUNT_BPF_FS="true" -if vercmp "$KUBELET_VERSION" lt "1.27.0"; then +if vercmp "$KUBELET_VERSION" lt "1.25.0"; then DEFAULT_MOUNT_BPF_FS="false" fi MOUNT_BPF_FS="${MOUNT_BPF_FS:-$DEFAULT_MOUNT_BPF_FS}" @@ -322,7 +322,7 @@ if [[ "$MACHINE" != "x86_64" && "$MACHINE" != "aarch64" ]]; then fi if [ "$MOUNT_BPF_FS" = "true" ]; then - sudo mount-bpf-fs + mount-bpf-fs fi ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") diff --git a/test/cases/mount-bpf-fs.sh b/test/cases/mount-bpf-fs.sh index 61b2f3844..c5281d4e2 100755 --- a/test/cases/mount-bpf-fs.sh +++ b/test/cases/mount-bpf-fs.sh @@ -66,14 +66,14 @@ if [[ ${EXIT_CODE} -ne 0 ]]; then echo "❌ Test Failed: expected a zero exit code but got '${EXIT_CODE}'" exit 1 fi -if [ "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then +if [ ! "$(cat $MOUNT_BPF_FS_MOCK)" = "called" ]; then echo "❌ Test Failed: expected mount-bpf-fs to be called once but it was not!" exit 1 fi export -nf mount-bpf-fs -echo "--> Should default to false on 1.26-" -export KUBELET_VERSION=v1.26.0-eks-ba74326 +echo "--> Should default to false on 1.24-" +export KUBELET_VERSION=v1.24.0-eks-ba74326 MOUNT_BPF_FS_MOCK=$(mktemp) function mount-bpf-fs() { echo "called" >> $MOUNT_BPF_FS_MOCK From 6412beeede80cdd09c9a0437a7a8856e062777f7 Mon Sep 17 00:00:00 2001 From: Carter Date: Fri, 16 Jun 2023 10:26:05 -0700 Subject: [PATCH 227/232] Add configurable working directory (#1231) --- doc/USER_GUIDE.md | 53 +++++++++++++++++++++- eks-worker-al2-variables.json | 5 ++- eks-worker-al2.json | 36 +++++++++------ hack/generate-template-variable-doc.py | 62 ++++++++++++++++++++++++++ scripts/cleanup.sh | 3 -- scripts/install-worker.sh | 54 +++++++++++----------- 6 files changed, 165 insertions(+), 48 deletions(-) create mode 100755 hack/generate-template-variable-doc.py diff --git a/doc/USER_GUIDE.md b/doc/USER_GUIDE.md index b2271c9a9..c8f79a5bf 100644 --- a/doc/USER_GUIDE.md +++ b/doc/USER_GUIDE.md @@ -23,9 +23,58 @@ Default values for most variables are defined in [a default variable file](eks-w Users have the following options for specifying their own values: 1. Provide a variable file with the `PACKER_VARIABLE_FILE` argument to `make`. Values in this file will override values in the default variable file. Your variable file does not need to include all possible variables, as it will be merged with the default variable file. -2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. +2. Pass a key-value pair for any template variable to `make`. These values will override any values that were specified with the first method. In the table below, these variables have a default value of "None". -**Note** that some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. +> **Note** +> Some variables (such as `arch` and `kubernetes_version`) do not have a sensible, static default, and are satisfied by the Makefile. +> Such variables do not appear in the default variable file, and must be overridden (if necessary) by the second method described above. + + + +| Variable | Default value | Description | +| - | - | - | +| `additional_yum_repos` | `""` | | +| `ami_component_description` | ```{{user `remote_folder`}}/worker``` | | +| `ami_description` | ```{{user `remote_folder`}}/worker``` | | +| `ami_name` | None | | +| `ami_regions` | `""` | | +| `ami_users` | `""` | | +| `arch` | None | | +| `associate_public_ip_address` | `""` | | +| `aws_access_key_id` | ```{{user `remote_folder`}}/worker``` | | +| `aws_region` | ```{{user `remote_folder`}}/worker``` | | +| `aws_secret_access_key` | ```{{user `remote_folder`}}/worker``` | | +| `aws_session_token` | ```{{user `remote_folder`}}/worker``` | | +| `binary_bucket_name` | ```{{user `remote_folder`}}/worker``` | | +| `binary_bucket_region` | ```{{user `remote_folder`}}/worker``` | | +| `cache_container_images` | ```{{user `remote_folder`}}/worker``` | | +| `cni_plugin_version` | ```{{user `remote_folder`}}/worker``` | | +| `containerd_version` | ```{{user `remote_folder`}}/worker``` | | +| `creator` | ```{{user `remote_folder`}}/worker``` | | +| `docker_version` | ```{{user `remote_folder`}}/worker``` | | +| `encrypted` | ```{{user `remote_folder`}}/worker``` | | +| `instance_type` | None | | +| `kernel_version` | `""` | | +| `kms_key_id` | `""` | | +| `kubernetes_build_date` | None | | +| `kubernetes_version` | None | | +| `launch_block_device_mappings_volume_size` | ```{{user `remote_folder`}}/worker``` | | +| `pause_container_version` | ```{{user `remote_folder`}}/worker``` | | +| `pull_cni_from_github` | ```{{user `remote_folder`}}/worker``` | | +| `remote_folder` | ```{{user `remote_folder`}}/worker``` | Directory path for shell provisioner scripts on the builder instance | +| `runc_version` | ```{{user `remote_folder`}}/worker``` | | +| `security_group_id` | `""` | | +| `sonobuoy_e2e_registry` | `""` | | +| `source_ami_filter_name` | ```{{user `remote_folder`}}/worker``` | | +| `source_ami_id` | `""` | | +| `source_ami_owners` | ```{{user `remote_folder`}}/worker``` | | +| `ssh_interface` | `""` | | +| `ssh_username` | ```{{user `remote_folder`}}/worker``` | | +| `subnet_id` | `""` | | +| `temporary_security_group_source_cidrs` | `""` | | +| `volume_type` | ```{{user `remote_folder`}}/worker``` | | +| `working_dir` | ```{{user `remote_folder`}}/worker``` | Directory path for ephemeral resources on the builder instance | + --- diff --git a/eks-worker-al2-variables.json b/eks-worker-al2-variables.json index f5b3e616b..1f30250c4 100644 --- a/eks-worker-al2-variables.json +++ b/eks-worker-al2-variables.json @@ -22,7 +22,7 @@ "launch_block_device_mappings_volume_size": "4", "pause_container_version": "3.5", "pull_cni_from_github": "true", - "remote_folder": "", + "remote_folder": "/tmp", "runc_version": "1.1.5-1.amzn2", "security_group_id": "", "sonobuoy_e2e_registry": "", @@ -33,5 +33,6 @@ "ssh_username": "ec2-user", "subnet_id": "", "temporary_security_group_source_cidrs": "", - "volume_type": "gp2" + "volume_type": "gp2", + "working_dir": "{{user `remote_folder`}}/worker" } diff --git a/eks-worker-al2.json b/eks-worker-al2.json index 1f805c85b..c9385ddc6 100644 --- a/eks-worker-al2.json +++ b/eks-worker-al2.json @@ -40,7 +40,8 @@ "ssh_username": null, "subnet_id": null, "temporary_security_group_source_cidrs": null, - "volume_type": null + "volume_type": null, + "working_dir": null }, "builders": [ { @@ -114,34 +115,34 @@ "provisioners": [ { "type": "shell", - "remote_folder": "{{ user `remote_folder`}}", - "script": "{{template_dir}}/scripts/install_additional_repos.sh", - "environment_vars": [ - "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" + "inline": [ + "mkdir -p {{user `working_dir`}}", + "mkdir -p {{user `working_dir`}}/log-collector-script" ] }, { "type": "shell", "remote_folder": "{{ user `remote_folder`}}", - "inline": [ - "mkdir -p /tmp/worker/log-collector-script/" + "script": "{{template_dir}}/scripts/install_additional_repos.sh", + "environment_vars": [ + "ADDITIONAL_YUM_REPOS={{user `additional_yum_repos`}}" ] }, { "type": "file", "source": "{{template_dir}}/files/", - "destination": "/tmp/worker/" + "destination": "{{user `working_dir`}}" }, { "type": "file", "source": "{{template_dir}}/log-collector-script/linux/", - "destination": "/tmp/worker/log-collector-script/" + "destination": "{{user `working_dir`}}/log-collector-script/" }, { "type": "shell", "inline": [ - "sudo chmod -R a+x /tmp/worker/bin/", - "sudo mv /tmp/worker/bin/* /usr/bin/" + "sudo chmod -R a+x {{user `working_dir`}}/bin/", + "sudo mv {{user `working_dir`}}/bin/* /usr/bin/" ] }, { @@ -174,7 +175,8 @@ "AWS_SESSION_TOKEN={{user `aws_session_token`}}", "SONOBUOY_E2E_REGISTRY={{user `sonobuoy_e2e_registry`}}", "PAUSE_CONTAINER_VERSION={{user `pause_container_version`}}", - "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}" + "CACHE_CONTAINER_IMAGES={{user `cache_container_images`}}", + "WORKING_DIR={{user `working_dir`}}" ] }, { @@ -202,13 +204,19 @@ "type": "shell", "remote_folder": "{{ user `remote_folder`}}", "script": "{{template_dir}}/scripts/generate-version-info.sh", - "execute_command": "chmod +x {{ .Path }}; {{ .Path }} /tmp/version-info.json" + "execute_command": "chmod +x {{ .Path }}; {{ .Path }} {{user `working_dir`}}/version-info.json" }, { "type": "file", "direction": "download", - "source": "/tmp/version-info.json", + "source": "{{user `working_dir`}}/version-info.json", "destination": "{{ user `ami_name` }}-version-info.json" + }, + { + "type": "shell", + "inline": [ + "rm -rf {{user `working_dir`}}" + ] } ], "post-processors": [ diff --git a/hack/generate-template-variable-doc.py b/hack/generate-template-variable-doc.py new file mode 100755 index 000000000..35cdde476 --- /dev/null +++ b/hack/generate-template-variable-doc.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +import json +import os +import re + +whereami = os.path.abspath(__file__) +os.chdir(os.path.dirname(whereami)) + +template = {} +with open('../eks-worker-al2.json') as template_file: + template = json.load(template_file) + +default_vars = {} +with open('../eks-worker-al2-variables.json') as default_var_file: + default_vars = json.load(default_var_file) + +all_vars = {} + +for var in template['variables']: + all_vars[var] = None +for var, default_val in default_vars.items(): + all_vars[var] = default_val + +doc_file_name = '../doc/USER_GUIDE.md' +doc = None +with open(doc_file_name) as doc_file: + doc = doc_file.read() + +table_boundary = '' +existing_table_pattern = f"{table_boundary}([\S\s]*){table_boundary}" +existing_table_matches = re.search(existing_table_pattern, doc) +existing_table_lines = existing_table_matches.group(1).splitlines() + +new_table = f"{table_boundary}\n" +new_table += f"{existing_table_lines[1]}\n" +new_table += f"{existing_table_lines[2]}\n" + +existing_descriptions = {} +for line in existing_table_lines[3:]: + columns = line.split('|') + var = columns[1].strip(" `") + existing_descriptions[var] = columns[3].strip(" `") + +for var, val in all_vars.items(): + if val is not None: + if val == "": + val = f"`\"\"`" + else: + val = f"```{default_val}```" + description = "" + if var in existing_descriptions: + description = existing_descriptions[var] + new_table += f"| `{var}` | {val} | {description} |\n" + +new_table += table_boundary + +replace_doc_pattern = f"{table_boundary}[\S\s]*{table_boundary}" +new_doc = re.sub(replace_doc_pattern, new_table, doc) + +with open(doc_file_name, 'w') as doc_file: + doc_file.write(new_doc) diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh index 24861c3e9..f99893412 100644 --- a/scripts/cleanup.sh +++ b/scripts/cleanup.sh @@ -4,9 +4,6 @@ sudo yum clean all sudo rm -rf /var/cache/yum -# Clean up build artifacts -sudo rm -rf /tmp/worker - # Clean up files to reduce confusion during debug sudo rm -rf \ /etc/hostname \ diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index d8bdafdcd..6255069ef 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -6,8 +6,6 @@ set -o errexit IFS=$'\n\t' export AWS_DEFAULT_OUTPUT="json" -TEMPLATE_DIR=${TEMPLATE_DIR:-/tmp/worker} - ################################################################################ ### Validate Required Arguments ################################################ ################################################################################ @@ -33,6 +31,7 @@ validate_env_set KUBERNETES_BUILD_DATE validate_env_set PULL_CNI_FROM_GITHUB validate_env_set PAUSE_CONTAINER_VERSION validate_env_set CACHE_CONTAINER_IMAGES +validate_env_set WORKING_DIR ################################################################################ ### Machine Architecture ####################################################### @@ -116,7 +115,7 @@ sudo systemctl restart sshd.service ### iptables ################################################################### ################################################################################ sudo mkdir -p /etc/eks -sudo mv $TEMPLATE_DIR/iptables-restore.service /etc/eks/iptables-restore.service +sudo mv $WORKING_DIR/iptables-restore.service /etc/eks/iptables-restore.service ################################################################################ ### awscli ##################################################### @@ -127,7 +126,8 @@ ISOLATED_REGIONS="${ISOLATED_REGIONS:-us-iso-east-1 us-iso-west-1 us-isob-east-1 if ! [[ ${ISOLATED_REGIONS} =~ $BINARY_BUCKET_REGION ]]; then # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html echo "Installing awscli v2 bundle" - AWSCLI_DIR=$(mktemp -d) + AWSCLI_DIR="${WORKING_DIR}/awscli-install" + mkdir "${AWSCLI_DIR}" curl \ --silent \ --show-error \ @@ -145,7 +145,7 @@ fi ### systemd #################################################################### ################################################################################ -sudo mv "${TEMPLATE_DIR}/runtime.slice" /etc/systemd/system/runtime.slice +sudo mv "${WORKING_DIR}/runtime.slice" /etc/systemd/system/runtime.slice ############################################################################### ### Containerd setup ########################################################## @@ -164,13 +164,13 @@ if [ -f "/etc/eks/containerd/containerd-config.toml" ]; then ## this means we are building a gpu ami and have already placed a containerd configuration file in /etc/eks echo "containerd config is already present" else - sudo mv $TEMPLATE_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml + sudo mv $WORKING_DIR/containerd-config.toml /etc/eks/containerd/containerd-config.toml fi -sudo mv $TEMPLATE_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service -sudo mv $TEMPLATE_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service -sudo mv $TEMPLATE_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh -sudo mv $TEMPLATE_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh +sudo mv $WORKING_DIR/kubelet-containerd.service /etc/eks/containerd/kubelet-containerd.service +sudo mv $WORKING_DIR/sandbox-image.service /etc/eks/containerd/sandbox-image.service +sudo mv $WORKING_DIR/pull-sandbox-image.sh /etc/eks/containerd/pull-sandbox-image.sh +sudo mv $WORKING_DIR/pull-image.sh /etc/eks/containerd/pull-image.sh sudo chmod +x /etc/eks/containerd/pull-sandbox-image.sh sudo chmod +x /etc/eks/containerd/pull-image.sh @@ -217,7 +217,7 @@ if [[ "$INSTALL_DOCKER" == "true" ]]; then sudo sed -i '/OPTIONS/d' /etc/sysconfig/docker sudo mkdir -p /etc/docker - sudo mv $TEMPLATE_DIR/docker-daemon.json /etc/docker/daemon.json + sudo mv $WORKING_DIR/docker-daemon.json /etc/docker/daemon.json sudo chown root:root /etc/docker/daemon.json # Enable docker daemon to start on boot. @@ -230,8 +230,8 @@ fi # kubelet uses journald which has built-in rotation and capped size. # See man 5 journald.conf -sudo mv $TEMPLATE_DIR/logrotate-kube-proxy /etc/logrotate.d/kube-proxy -sudo mv $TEMPLATE_DIR/logrotate.conf /etc/logrotate.conf +sudo mv $WORKING_DIR/logrotate-kube-proxy /etc/logrotate.d/kube-proxy +sudo mv $WORKING_DIR/logrotate.conf /etc/logrotate.conf sudo chown root:root /etc/logrotate.d/kube-proxy sudo chown root:root /etc/logrotate.conf sudo mkdir -p /var/log/journal @@ -314,19 +314,19 @@ sudo rm ./*.sha256 sudo mkdir -p /etc/kubernetes/kubelet sudo mkdir -p /etc/systemd/system/kubelet.service.d -sudo mv $TEMPLATE_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig +sudo mv $WORKING_DIR/kubelet-kubeconfig /var/lib/kubelet/kubeconfig sudo chown root:root /var/lib/kubelet/kubeconfig # Inject CSIServiceAccountToken feature gate to kubelet config if kubernetes version starts with 1.20. # This is only injected for 1.20 since CSIServiceAccountToken will be moved to beta starting 1.21. if [[ $KUBERNETES_VERSION == "1.20"* ]]; then - KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $TEMPLATE_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') - echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $TEMPLATE_DIR/kubelet-config.json + KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED=$(cat $WORKING_DIR/kubelet-config.json | jq '.featureGates += {CSIServiceAccountToken: true}') + echo $KUBELET_CONFIG_WITH_CSI_SERVICE_ACCOUNT_TOKEN_ENABLED > $WORKING_DIR/kubelet-config.json fi -sudo mv $TEMPLATE_DIR/kubelet.service /etc/systemd/system/kubelet.service +sudo mv $WORKING_DIR/kubelet.service /etc/systemd/system/kubelet.service sudo chown root:root /etc/systemd/system/kubelet.service -sudo mv $TEMPLATE_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json +sudo mv $WORKING_DIR/kubelet-config.json /etc/kubernetes/kubelet/kubelet-config.json sudo chown root:root /etc/kubernetes/kubelet/kubelet-config.json sudo systemctl daemon-reload @@ -338,17 +338,17 @@ sudo systemctl disable kubelet ################################################################################ sudo mkdir -p /etc/eks -sudo mv $TEMPLATE_DIR/get-ecr-uri.sh /etc/eks/get-ecr-uri.sh +sudo mv $WORKING_DIR/get-ecr-uri.sh /etc/eks/get-ecr-uri.sh sudo chmod +x /etc/eks/get-ecr-uri.sh -sudo mv $TEMPLATE_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt -sudo mv $TEMPLATE_DIR/bootstrap.sh /etc/eks/bootstrap.sh +sudo mv $WORKING_DIR/eni-max-pods.txt /etc/eks/eni-max-pods.txt +sudo mv $WORKING_DIR/bootstrap.sh /etc/eks/bootstrap.sh sudo chmod +x /etc/eks/bootstrap.sh -sudo mv $TEMPLATE_DIR/max-pods-calculator.sh /etc/eks/max-pods-calculator.sh +sudo mv $WORKING_DIR/max-pods-calculator.sh /etc/eks/max-pods-calculator.sh sudo chmod +x /etc/eks/max-pods-calculator.sh SONOBUOY_E2E_REGISTRY="${SONOBUOY_E2E_REGISTRY:-}" if [[ -n "$SONOBUOY_E2E_REGISTRY" ]]; then - sudo mv $TEMPLATE_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config + sudo mv $WORKING_DIR/sonobuoy-e2e-registry-config /etc/eks/sonobuoy-e2e-registry-config sudo sed -i s,SONOBUOY_E2E_REGISTRY,$SONOBUOY_E2E_REGISTRY,g /etc/eks/sonobuoy-e2e-registry-config fi @@ -366,7 +366,7 @@ fi sudo chmod +x $ECR_CREDENTIAL_PROVIDER_BINARY sudo mkdir -p /etc/eks/image-credential-provider sudo mv $ECR_CREDENTIAL_PROVIDER_BINARY /etc/eks/image-credential-provider/ -sudo mv $TEMPLATE_DIR/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json +sudo mv $WORKING_DIR/ecr-credential-provider-config.json /etc/eks/image-credential-provider/config.json ################################################################################ ### Cache Images ############################################################### @@ -489,13 +489,13 @@ sudo yum install -y amazon-ssm-agent ################################################################################ BASE_AMI_ID=$(imds /latest/meta-data/ami-id) -cat << EOF > /tmp/release +cat << EOF > "${WORKING_DIR}/release" BASE_AMI_ID="$BASE_AMI_ID" BUILD_TIME="$(date)" BUILD_KERNEL="$(uname -r)" ARCH="$(uname -m)" EOF -sudo mv /tmp/release /etc/eks/release +sudo mv "${WORKING_DIR}/release" /etc/eks/release sudo chown -R root:root /etc/eks ################################################################################ @@ -520,7 +520,7 @@ echo vm.max_map_count=524288 | sudo tee -a /etc/sysctl.conf ### adding log-collector-script ################################################ ################################################################################ sudo mkdir -p /etc/eks/log-collector-script/ -sudo cp $TEMPLATE_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ +sudo cp $WORKING_DIR/log-collector-script/eks-log-collector.sh /etc/eks/log-collector-script/ ################################################################################ ### Remove Yum Update from cloud-init config ################################### From 7c45ddef58bbb50c869095eeb2185e41a745db6f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Jun 2023 17:06:29 -0700 Subject: [PATCH 228/232] Update eni-max-pods.txt (#1330) --- files/eni-max-pods.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/files/eni-max-pods.txt b/files/eni-max-pods.txt index d24d8d28f..f82b87d9f 100644 --- a/files/eni-max-pods.txt +++ b/files/eni-max-pods.txt @@ -176,6 +176,14 @@ c7g.large 29 c7g.medium 8 c7g.metal 737 c7g.xlarge 58 +c7gn.12xlarge 234 +c7gn.16xlarge 737 +c7gn.2xlarge 58 +c7gn.4xlarge 234 +c7gn.8xlarge 234 +c7gn.large 29 +c7gn.medium 8 +c7gn.xlarge 58 cr1.8xlarge 234 d2.2xlarge 58 d2.4xlarge 234 @@ -233,6 +241,9 @@ h1.4xlarge 234 h1.8xlarge 234 hpc6a.48xlarge 100 hpc6id.32xlarge 51 +hpc7g.16xlarge 198 +hpc7g.4xlarge 198 +hpc7g.8xlarge 198 hs1.8xlarge 234 i2.2xlarge 58 i2.4xlarge 234 From 056e31f8c7477e893424abce468cb32bbcd1f079 Mon Sep 17 00:00:00 2001 From: Carter Date: Thu, 29 Jun 2023 09:15:21 -0700 Subject: [PATCH 229/232] Use recommended clocksources (#1328) --- files/bin/configure-clocksource | 46 +++++++++++++++++++++++++++++ files/bootstrap.sh | 5 ++++ files/configure-clocksource.service | 8 +++++ scripts/install-worker.sh | 22 +++----------- scripts/upgrade_kernel.sh | 6 ++++ test/Dockerfile | 1 + 6 files changed, 70 insertions(+), 18 deletions(-) create mode 100755 files/bin/configure-clocksource create mode 100644 files/configure-clocksource.service diff --git a/files/bin/configure-clocksource b/files/bin/configure-clocksource new file mode 100755 index 000000000..9815401f8 --- /dev/null +++ b/files/bin/configure-clocksource @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail +set -o nounset + +CLOCK_PATH="/sys/devices/system/clocksource/clocksource0" + +function log() { + echo >&2 "$@" +} + +function current-clocksource() { + cat "${CLOCK_PATH}/current_clocksource" +} + +function check-available-clocksource() { + grep --quiet "${1}" "${CLOCK_PATH}/available_clocksource" +} + +function try-set-clocksource() { + if check-available-clocksource "${1}"; then + echo "${1}" > "${CLOCK_PATH}/current_clocksource" + log "configured clocksource: ${1}" + else + log "clocksource not available: ${1}" + fi +} + +case "$(imds /latest/meta-data/system)" in + nitro) + CLOCKSOURCE="kvm-clock" + ;; + + **) + CLOCKSOURCE="tsc" + ;; +esac + +log "desired clocksource: ${CLOCKSOURCE}" + +if [ ! "$(current-clocksource)" = "${CLOCKSOURCE}" ]; then + try-set-clocksource "${CLOCKSOURCE}" +fi + +log "final clocksource: $(current-clocksource)" diff --git a/files/bootstrap.sh b/files/bootstrap.sh index 3729dad2c..8937784bb 100755 --- a/files/bootstrap.sh +++ b/files/bootstrap.sh @@ -325,6 +325,11 @@ if [ "$MOUNT_BPF_FS" = "true" ]; then mount-bpf-fs fi +cp -v /etc/eks/configure-clocksource.service /etc/systemd/system/configure-clocksource.service +chown root:root /etc/systemd/system/configure-clocksource.service +systemctl daemon-reload +systemctl enable --now configure-clocksource + ECR_URI=$(/etc/eks/get-ecr-uri.sh "${AWS_DEFAULT_REGION}" "${AWS_SERVICES_DOMAIN}" "${PAUSE_CONTAINER_ACCOUNT:-}") PAUSE_CONTAINER_IMAGE=${PAUSE_CONTAINER_IMAGE:-$ECR_URI/eks/pause} PAUSE_CONTAINER="$PAUSE_CONTAINER_IMAGE:$PAUSE_CONTAINER_VERSION" diff --git a/files/configure-clocksource.service b/files/configure-clocksource.service new file mode 100644 index 000000000..5274ca041 --- /dev/null +++ b/files/configure-clocksource.service @@ -0,0 +1,8 @@ +[Unit] +Description=Configure kernel clocksource + +[Service] +ExecStart=/usr/bin/configure-clocksource + +[Install] +WantedBy=multi-user.target diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 6255069ef..b62f81394 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -81,27 +81,13 @@ sudo yum versionlock kernel-$(uname -r) # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi +sudo mkdir -p /etc/eks/ + ################################################################################ ### Time ####################################################################### ################################################################################ -# Make sure Amazon Time Sync Service starts on boot. -sudo chkconfig chronyd on - -# Make sure that chronyd syncs RTC clock to the kernel. -cat << EOF | sudo tee -a /etc/chrony.conf -# This directive enables kernel synchronisation (every 11 minutes) of the -# real-time clock. Note that it can’t be used along with the 'rtcfile' directive. -rtcsync -EOF - -# If current clocksource is xen, switch to tsc -if grep --quiet xen /sys/devices/system/clocksource/clocksource0/current_clocksource \ - && grep --quiet tsc /sys/devices/system/clocksource/clocksource0/available_clocksource; then - echo "tsc" | sudo tee /sys/devices/system/clocksource/clocksource0/current_clocksource -else - echo "tsc as a clock source is not applicable, skipping." -fi +sudo mv $WORKING_DIR/configure-clocksource.service /etc/eks/configure-clocksource.service ################################################################################ ### SSH ######################################################################## @@ -114,7 +100,7 @@ sudo systemctl restart sshd.service ################################################################################ ### iptables ################################################################### ################################################################################ -sudo mkdir -p /etc/eks + sudo mv $WORKING_DIR/iptables-restore.service /etc/eks/iptables-restore.service ################################################################################ diff --git a/scripts/upgrade_kernel.sh b/scripts/upgrade_kernel.sh index 67e509caa..52d696056 100755 --- a/scripts/upgrade_kernel.sh +++ b/scripts/upgrade_kernel.sh @@ -24,4 +24,10 @@ sudo grubby \ --update-kernel=ALL \ --args="psi=1" +# use the tsc clocksource by default +# https://repost.aws/knowledge-center/manage-ec2-linux-clock-source +sudo grubby \ + --update-kernel=ALL \ + --args="clocksource=tsc tsc=reliable" + sudo reboot diff --git a/test/Dockerfile b/test/Dockerfile index 5470ad965..d00837c3e 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -7,6 +7,7 @@ RUN amazon-linux-extras enable docker && \ ENV IMDS_ENDPOINT=127.0.0.1:1338 COPY --from=aemm /ec2-metadata-mock /sbin/ec2-metadata-mock +RUN mkdir -p /etc/systemd/system RUN mkdir -p /etc/eks/containerd COPY files/ /etc/eks/ COPY files/containerd-config.toml files/kubelet-containerd.service files/pull-sandbox-image.sh files/sandbox-image.service /etc/eks/containerd/ From ce86185017f4b5de53fe9803875fc26bfddeef13 Mon Sep 17 00:00:00 2001 From: Filip Honckiewicz Date: Mon, 17 Jul 2023 18:11:47 +0200 Subject: [PATCH 230/232] Test Packer 1.9.1 --- .circleci/config.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e12a3f87c..ee62dbe36 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ jobs: prepare_env: # job to prepare build environment docker: - - image: 411466821576.dkr.ecr.us-west-2.amazonaws.com/devops/ci/packer:0.0.1 + - image: 411466821576.dkr.ecr.us-west-2.amazonaws.com/devops/ci/packer:1.9.1 working_directory: ~/aws_ecr_ami steps: - checkout: @@ -16,7 +16,7 @@ jobs: - aws_ecr_ami validate: docker: - - image: 411466821576.dkr.ecr.us-west-2.amazonaws.com/devops/ci/packer:0.0.1 + - image: 411466821576.dkr.ecr.us-west-2.amazonaws.com/devops/ci/packer:1.9.1 steps: - attach_workspace: at: . @@ -25,7 +25,7 @@ jobs: command: cd aws_ecr_ami && make -e -j2 all-validate build: docker: - - image: 411466821576.dkr.ecr.us-west-2.amazonaws.com/devops/ci/packer:0.0.1 + - image: 411466821576.dkr.ecr.us-west-2.amazonaws.com/devops/ci/packer:1.9.1 steps: - attach_workspace: at: . @@ -66,6 +66,7 @@ workflows: branches: only: - dev + - test_packer_1_9_1 requires: - prepare_env - build: @@ -90,3 +91,4 @@ workflows: branches: only: - dev + - test_packer_1_9_1 From 4b31498ab333b07abaa608454561c9c0cb5b11d0 Mon Sep 17 00:00:00 2001 From: Filip Honckiewicz Date: Tue, 1 Aug 2023 18:15:50 +0200 Subject: [PATCH 231/232] Changes after merge with upstream: * Simplify CircleCI pipeline * Use upstream Makefile * Use fluence-eks-worker-al2-variable.json to set variables * Cleanups. Delete healthchecks and service configurations not existing in the upstream repo --- .circleci/config.yml | 41 +------- CHANGELOG_AMS.md => CHANGELOG_FLUENCE.md | 4 + Makefile | 63 +------------ eks-worker-bionic.json | 98 -------------------- files/health-monitor.sh | 96 ------------------- files/iptables-restore.service | 2 +- files/kube-container-runtime-monitor.service | 14 --- files/kubelet-monitor.service | 14 --- fluence-eks-worker-al2-variables.json | 38 ++++++++ 9 files changed, 51 insertions(+), 319 deletions(-) rename CHANGELOG_AMS.md => CHANGELOG_FLUENCE.md (84%) delete mode 100644 eks-worker-bionic.json delete mode 100644 files/health-monitor.sh delete mode 100644 files/kube-container-runtime-monitor.service delete mode 100644 files/kubelet-monitor.service create mode 100644 fluence-eks-worker-al2-variables.json diff --git a/.circleci/config.yml b/.circleci/config.yml index ee62dbe36..fb9711121 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,15 +14,6 @@ jobs: root: "~" paths: - aws_ecr_ami - validate: - docker: - - image: 411466821576.dkr.ecr.us-west-2.amazonaws.com/devops/ci/packer:1.9.1 - steps: - - attach_workspace: - at: . - - run: - name: Packer Validate - command: cd aws_ecr_ami && make -e -j2 all-validate build: docker: - image: 411466821576.dkr.ecr.us-west-2.amazonaws.com/devops/ci/packer:1.9.1 @@ -35,47 +26,24 @@ jobs: NORMALIZED_BRANCH_NAME=$(echo ${CIRCLE_BRANCH} | sed 's/[^a-zA-Z0-9]/-/g' | cut -c 1-50) echo "export BUILD_TAG=${NORMALIZED_BRANCH_NAME}-${CIRCLE_BUILD_NUM}-${CIRCLE_SHA1::7}" >> ${BASH_ENV} - run: - name: Packer build + name: Packer validate and build AMI # AWS sometimes take really long to finish ami build. # By default, Circle CI have 10 minutes timeout without response, # so we must extend that timeout limit to be sure that ami build can pass no_output_timeout: 60m - command: cd aws_ecr_ami && make -e -j2 all + command: cd aws_ecr_ami && PACKER_VARIABLE_FILE=fluence-eks-worker-al2-variables.json make -e -j2 1.23 workflows: aws_eks_ami: jobs: - prepare_env: context: AWS_OPERATIONS_DEV_0 - - validate: - name: validate main - context: - - AWS_OPERATIONS_0 - - PACKER_OPERATIONS_0 - filters: - branches: - only: - - main - requires: - - prepare_env - - validate: - name: validate dev - context: - - AWS_OPERATIONS_DEV_0 - - PACKER_OPERATIONS_DEV_0 - filters: - branches: - only: - - dev - - test_packer_1_9_1 - requires: - - prepare_env - build: name: build main context: - AWS_OPERATIONS_0 - PACKER_OPERATIONS_0 requires: - - validate main + - prepare_env filters: branches: only: @@ -86,9 +54,8 @@ workflows: - AWS_OPERATIONS_DEV_0 - PACKER_OPERATIONS_DEV_0 requires: - - validate dev + - prepare_env filters: branches: only: - dev - - test_packer_1_9_1 diff --git a/CHANGELOG_AMS.md b/CHANGELOG_FLUENCE.md similarity index 84% rename from CHANGELOG_AMS.md rename to CHANGELOG_FLUENCE.md index acfed4077..6499c5eb2 100644 --- a/CHANGELOG_AMS.md +++ b/CHANGELOG_FLUENCE.md @@ -1,5 +1,9 @@ # Changelog +### 2023-08-07 + +* Upgrade repository with upstream repo: `awslabs/amazon-eks-ami`, with tag `v20230607` + ### 2020-05-07 * Switch to Docker 19.03.6 diff --git a/Makefile b/Makefile index 0818b0412..d2b9d92f2 100644 --- a/Makefile +++ b/Makefile @@ -1,34 +1,3 @@ -# commands: -P := packer - -#Fluence Edited Variables -AWS_DEFAULT_REGION = us-west-2 -build_tag := $(or $(BUILD_TAG), $(shell date +%s)) -encrypted := true -PACKER_BINARY = docker run -v /mnt/credentials:/root/.aws/credentials \ - -e AWS_SHARED_CREDENTIALS_FILE=/root/.aws/credentials \ - -v `pwd`/:/workspace -w /workspace \ - 876270261134.dkr.ecr.us-west-2.amazonaws.com/devops/packer:1.6.1 -PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry build_tag encrypted - -ifndef VPC_ID - $(error VPC_ID is undefined) -endif - -ifndef SUBNET_ID - $(error SUBNET_ID is undefined) -endif - -ifndef AMI_USERS - $(error AMI_USERS is undefined) -endif - -ifndef KMS_KEY_ID - $(error KMS_KEY_ID is undefined) -endif - -#PACKER_BINARY ?= packer -#PACKER_VARIABLES := aws_region ami_name binary_bucket_name binary_bucket_region kubernetes_version kubernetes_build_date kernel_version docker_version containerd_version runc_version cni_plugin_version source_ami_id source_ami_owners source_ami_filter_name arch instance_type security_group_id additional_yum_repos pull_cni_from_github sonobuoy_e2e_registry MAKEFILE_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) PACKER_DEFAULT_VARIABLE_FILE ?= $(MAKEFILE_DIR)/eks-worker-al2-variables.json @@ -59,11 +28,11 @@ endif arch ?= x86_64 ifeq ($(arch), arm64) -instance_type ?= m6g.large -ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d%H%M%S') + instance_type ?= m6g.large + ami_name ?= amazon-eks-arm64-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d%H%M%S') else -instance_type ?= m4.large -ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d%H%M%S') + instance_type ?= m5.large + ami_name ?= amazon-eks-node-$(K8S_VERSION_MINOR)-v$(shell date +'%Y%m%d%H%M%S') endif ifeq ($(aws_region), cn-northwest-1) @@ -120,8 +89,6 @@ PACKER_VAR_FLAGS := -var-file $(PACKER_DEFAULT_VARIABLE_FILE) \ $(if $(PACKER_VARIABLE_FILE),-var-file=$(PACKER_VARIABLE_FILE),) \ $(foreach packerVar,$(PACKER_VARIABLES),-var $(packerVar)='$($(packerVar))') -all-validate: 1.22-validate - .PHONY: validate validate: ## Validate packer config $(PACKER_BINARY) validate $(PACKER_VAR_FLAGS) $(PACKER_TEMPLATE_FILE) @@ -133,7 +100,6 @@ k8s: validate ## Build default K8s version of EKS Optimized AL2 AMI # Build dates and versions taken from https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html - .PHONY: 1.22 1.22: ## Build EKS Optimized AL2 AMI - K8s 1.22 $(MAKE) k8s kubernetes_version=1.22.17 kubernetes_build_date=2023-05-11 pull_cni_from_github=true @@ -166,24 +132,3 @@ clean: .PHONY: help help: ## Display help @awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m\033[0m\n"} /^[\.a-zA-Z_0-9\-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) -.PHONY: 1.21-build -1.21-build: - $(MAKE) ci-build kubernetes_version=1.21.14 kubernetes_build_date=2022-10-31 pull_cni_from_github=true - -.PHONY: 1.22-validate -1.22-validate: - $(MAKE) ci-validate kubernetes_version=1.22.17 kubernetes_build_date=2023-01-30 pull_cni_from_github=true - -.PHONY: 1.22-build -1.22-build: - $(MAKE) ci-build kubernetes_version=1.22.17 kubernetes_build_date=2023-01-30 pull_cni_from_github=true - -# Circle CI pipeline -.PHONY: ci-validate -ci-validate: - $(P) validate $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json - -.PHONY: ci-build -ci-build: - @echo "$(T_GREEN)Building AMI for version $(T_YELLOW)$(kubernetes_version)$(T_GREEN) on $(T_YELLOW)$(arch)$(T_RESET)" - $(P) build $(foreach packerVar,$(PACKER_VARIABLES), $(if $($(packerVar)),--var $(packerVar)='$($(packerVar))',)) eks-worker-al2.json \ No newline at end of file diff --git a/eks-worker-bionic.json b/eks-worker-bionic.json deleted file mode 100644 index f6283d84b..000000000 --- a/eks-worker-bionic.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "variables": { - "kubernetes_version": "1.12", - "aws_region": "us-west-2", - "aws_region_az": "us-west-2a", - "build_tag": "", - "ami_name": "bionic-eks-node", - "binary_bucket_name": "amazon-eks", - "binary_bucket_region": "us-west-2", - "binary_bucket_path": "", - "docker_version": "20.10.7-0ubuntu5~18.04.3", - "creator": "{{env `USER`}}", - "instance_type": "m5.large", - "source_ami_id": "", - "source_ami_owners": "099720109477", - "source_ami_filter_name": "ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-*", - "encrypted": "false", - "kms_key_id": "", - "cni_version": "v0.6.0", - "cni_plugin_version": "v0.7.1", - "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", - "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", - "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}" - }, - - "builders": [ - { - "type": "amazon-ebs", - "region": "{{user `aws_region`}}", - "availability_zone": "{{user `aws_region_az`}}", - "source_ami": "{{user `source_ami_id`}}", - "source_ami_filter": { - "filters": { - "architecture": "x86_64", - "name": "{{user `source_ami_filter_name`}}", - "root-device-type": "ebs", - "state": "available", - "virtualization-type": "hvm" - }, - "owners": [ "{{user `source_ami_owners`}}" ], - "most_recent": true - }, - "instance_type": "{{user `instance_type`}}", - "ssh_username": "ubuntu", - "ssh_pty": true, - "encrypt_boot": "{{user `encrypted`}}", - "kms_key_id": "{{user `kms_key_id`}}", - "run_tags": { - "creator": "{{user `creator`}}" - }, - "tags": { - "created": "{{timestamp}}", - "docker_version": "{{ user `docker_version`}}", - "source_ami_id": "{{ user `source_ami_id`}}", - "kubernetes": "{{ user `binary_bucket_path`}}", - "cni_version": "{{ user `cni_version`}}", - "cni_plugin_version": "{{ user `cni_plugin_version`}}" - }, - "ami_name": "{{user `ami_name`}}-{{user `kubernetes_version`}}-{{user `build_tag`}}", - "ami_description": "EKS Kubernetes {{user `kubernetes_version`}} Worker AMI based on Ubuntu 18.04 Bionic" - } - ], - - "provisioners": [ - { - "type": "shell", - "inline": ["mkdir -p /tmp/worker/"] - }, - { - "type": "file", - "source": "./files/", - "destination": "/tmp/worker/" - }, - { - "type": "shell", - "script": "install-worker.sh", - "environment_vars": [ - "KUBERNETES_VERSION={{user `kubernetes_version`}}", - "BINARY_BUCKET_NAME={{user `binary_bucket_name`}}", - "BINARY_BUCKET_PATH={{user `binary_bucket_path`}}", - "BINARY_BUCKET_REGION={{user `binary_bucket_region`}}", - "DOCKER_VERSION={{user `docker_version`}}", - "CNI_VERSION={{user `cni_version`}}", - "CNI_PLUGIN_VERSION={{user `cni_plugin_version`}}", - "AWS_ACCESS_KEY_ID={{user `aws_access_key_id`}}", - "AWS_SECRET_ACCESS_KEY={{user `aws_secret_access_key`}}", - "AWS_SESSION_TOKEN={{user `aws_session_token`}}" - ] - } - ], - "post-processors": [ - { - "type": "manifest", - "output": "manifest.json", - "strip_path": true - } - ] -} diff --git a/files/health-monitor.sh b/files/health-monitor.sh deleted file mode 100644 index 2086877b8..000000000 --- a/files/health-monitor.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2016 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script is for master and node instance health monitoring, which is -# packed in kube-manifest tarball. It is executed through a systemd service -# in cluster/gce/gci/.yaml. The env variables come from an env -# file provided by the systemd service. - -set -o nounset -set -o pipefail - -# We simply kill the process when there is a failure. Another systemd service will -# automatically restart the process. -function container_runtime_monitoring { - local -r max_attempts=5 - local attempt=1 - local -r container_runtime_name="${CONTAINER_RUNTIME_NAME:-docker}" - local healthcheck_command="docker ps" - # Container runtime startup takes time. Make initial attempts before starting - # killing the container runtime. - until timeout 60 ${healthcheck_command} > /dev/null; do - if (( attempt == max_attempts )); then - echo "Max attempt ${max_attempts} reached! Proceeding to monitor container runtime healthiness." - break - fi - echo "$attempt initial attempt \"${healthcheck_command}\"! Trying again in $attempt seconds..." - sleep "$(( 2 ** attempt++ ))" - done - while true; do - if ! timeout 60 ${healthcheck_command} > /dev/null; then - echo "Container runtime ${container_runtime_name} failed!" - if [[ "$container_runtime_name" == "docker" ]]; then - # Dump stack of docker daemon for investigation. - # Log fle name looks like goroutine-stacks-TIMESTAMP and will be saved to - # the exec root directory, which is /var/run/docker/ on Ubuntu and COS. - pkill -SIGUSR1 dockerd - fi - systemctl kill --kill-who=main "${container_runtime_name}" - # Wait for a while, as we don't want to kill it again before it is really up. - sleep 120 - else - sleep "${SLEEP_SECONDS}" - fi - done -} - -function kubelet_monitoring { - echo "Wait for 2 minutes for kubelet to be functional" - # TODO(andyzheng0831): replace it with a more reliable method if possible. - sleep 120 - local -r max_seconds=10 - local output="" - while [ 1 ]; do - if ! output=$(curl -m "${max_seconds}" -f -s -S http://127.0.0.1:10256/healthz 2>&1); then - # Print the response and/or errors. - echo $output - echo "Kubelet is unhealthy!" - systemctl restart kubelet - # Wait for a while, as we don't want to kill it again before it is really up. - sleep 60 - else - sleep "${SLEEP_SECONDS}" - fi - done -} - - -############## Main Function ################ -if [[ "$#" -ne 1 ]]; then - echo "Usage: health-monitor.sh " - exit 1 -fi - -SLEEP_SECONDS=10 -component=$1 -echo "Start kubernetes health monitoring for ${component}" -if [[ "${component}" == "container-runtime" ]]; then - container_runtime_monitoring -elif [[ "${component}" == "kubelet" ]]; then - kubelet_monitoring -else - echo "Health monitoring for component "${component}" is not supported!" -fi diff --git a/files/iptables-restore.service b/files/iptables-restore.service index eb9d0a3ff..6ede7bb67 100644 --- a/files/iptables-restore.service +++ b/files/iptables-restore.service @@ -6,7 +6,7 @@ After=docker.service [Service] Type=oneshot -ExecStart=/bin/bash -c "/sbin/iptables-restore < /etc/iptables.rules" +ExecStart=/bin/bash -c "/sbin/iptables-restore < /etc/sysconfig/iptables" [Install] WantedBy=multi-user.target diff --git a/files/kube-container-runtime-monitor.service b/files/kube-container-runtime-monitor.service deleted file mode 100644 index 2a84a1a56..000000000 --- a/files/kube-container-runtime-monitor.service +++ /dev/null @@ -1,14 +0,0 @@ -[Unit] -Description=Kubernetes health monitoring for container runtime -After=docker.service - -[Service] -Restart=always -RestartSec=10 -RemainAfterExit=yes -RemainAfterExit=yes -ExecStartPre=/bin/chmod 544 /usr/local/bin/health-monitor.sh -ExecStart=/usr/local/bin/health-monitor.sh container-runtime - -[Install] -WantedBy=multi-user.target diff --git a/files/kubelet-monitor.service b/files/kubelet-monitor.service deleted file mode 100644 index 4f857b248..000000000 --- a/files/kubelet-monitor.service +++ /dev/null @@ -1,14 +0,0 @@ -[Unit] -Description=Kubernetes health monitoring for kubelet -After=kubelet.service - -[Service] -Restart=always -RestartSec=10 -RemainAfterExit=yes -RemainAfterExit=yes -ExecStartPre=/bin/chmod 544 /usr/local/bin/health-monitor.sh -ExecStart=/usr/local/bin/health-monitor.sh kubelet - -[Install] -WantedBy=multi-user.target diff --git a/fluence-eks-worker-al2-variables.json b/fluence-eks-worker-al2-variables.json new file mode 100644 index 000000000..e23a4824e --- /dev/null +++ b/fluence-eks-worker-al2-variables.json @@ -0,0 +1,38 @@ +{ + "additional_yum_repos": "", + "ami_component_description": "(k8s: {{ user `kubernetes_version` }}, docker: {{ user `docker_version` }}, containerd: {{ user `containerd_version` }})", + "ami_description": "EKS Kubernetes Worker AMI with AmazonLinux2 image", + "ami_regions": "", + "ami_users": "{{env `AMI_USERS`}}", + "associate_public_ip_address": "", + "aws_access_key_id": "{{env `AWS_ACCESS_KEY_ID`}}", + "aws_region": "us-west-2", + "aws_secret_access_key": "{{env `AWS_SECRET_ACCESS_KEY`}}", + "aws_session_token": "{{env `AWS_SESSION_TOKEN`}}", + "binary_bucket_name": "amazon-eks", + "binary_bucket_region": "us-west-2", + "cache_container_images": "false", + "cni_plugin_version": "v0.8.6", + "containerd_version": "1.6.*", + "creator": "{{env `USER`}}", + "docker_version": "20.10.23-1.amzn2.0.1", + "encrypted": "true", + "kernel_version": "", + "kms_key_id": "{{env `KMS_KEY_ID`}}", + "launch_block_device_mappings_volume_size": "4", + "pause_container_version": "3.5", + "pull_cni_from_github": "true", + "remote_folder": "/tmp", + "runc_version": "1.1.5-1.amzn2", + "security_group_id": "", + "sonobuoy_e2e_registry": "", + "source_ami_filter_name": "amzn2-ami-minimal-hvm-*", + "source_ami_id": "", + "source_ami_owners": "137112412989", + "ssh_interface": "", + "ssh_username": "ec2-user", + "subnet_id": "{{env `SUBNET_ID`}}", + "temporary_security_group_source_cidrs": "", + "volume_type": "gp2", + "working_dir": "{{user `remote_folder`}}/worker" +} From 9d5a4c42026eee53c0532be7f1a6c665ae2f7b19 Mon Sep 17 00:00:00 2001 From: Filip Honckiewicz Date: Tue, 8 Aug 2023 18:14:09 +0200 Subject: [PATCH 232/232] Remove Fluence installation of the htop, screen and nmap from install-worker.sh script --- scripts/install-worker.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/install-worker.sh b/scripts/install-worker.sh index 01cc43da4..ba5a69422 100644 --- a/scripts/install-worker.sh +++ b/scripts/install-worker.sh @@ -78,12 +78,6 @@ sudo package-cleanup --oldkernels --count=1 -y sudo yum versionlock kernel-$(uname -r) -# Fluence packages -sudo yum install -y \ - htop \ - screen \ - nmap - # Remove the ec2-net-utils package, if it's installed. This package interferes with the route setup on the instance. if yum list installed | grep ec2-net-utils; then sudo yum remove ec2-net-utils -y -q; fi