From cab5114a6054b5f3e5efea07e36d6f0c65ff8d9a Mon Sep 17 00:00:00 2001 From: Nuru Date: Mon, 14 Sep 2020 11:25:43 -0700 Subject: [PATCH] Refactor for clarity (#33) --- README.md | 4 +- docs/terraform.md | 4 +- iam.tf | 85 ++++++++++++ launch-template.tf | 104 +++++++++++++++ main.tf | 266 ++++++------------------------------- sg.tf => security-group.tf | 0 variables.tf | 17 ++- 7 files changed, 253 insertions(+), 227 deletions(-) create mode 100644 iam.tf create mode 100644 launch-template.tf rename sg.tf => security-group.tf (100%) diff --git a/README.md b/README.md index c082c17..b438f4e 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,7 @@ Available targets: | attributes | Additional attributes (e.g. `1`) | `list(string)` | `[]` | no | | before\_cluster\_joining\_userdata | Additional `bash` commands to execute on each worker node before joining the EKS cluster (before executing the `bootstrap.sh` script). For more info, see https://kubedex.com/90-days-of-aws-eks-in-production | `string` | `""` | no | | bootstrap\_additional\_options | Additional options to bootstrap.sh. DO NOT include `--kubelet-additional-args`, use `kubelet_additional_args` var instead. | `string` | `""` | no | +| cluster\_autoscaler\_enabled | Set true to label the node group so that the [Kubernetes Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#auto-discovery-setup) will discover and autoscale it | `bool` | `null` | no | | cluster\_name | The name of the EKS cluster | `string` | n/a | yes | | context | Single object for setting entire context at once.
See description of individual variables for details.
Leave string and numeric variables as `null` to use default value.
Individual variable settings (non-null) override settings in context object,
except for attributes, tags, and additional\_tag\_map, which are merged. |
object({
enabled = bool
namespace = string
environment = string
stage = string
name = string
delimiter = string
attributes = list(string)
tags = map(string)
additional_tag_map = map(string)
regex_replace_chars = string
label_order = list(string)
id_length_limit = number
})
|
{
"additional_tag_map": {},
"attributes": [],
"delimiter": null,
"enabled": true,
"environment": null,
"id_length_limit": null,
"label_order": [],
"name": null,
"namespace": null,
"regex_replace_chars": null,
"stage": null,
"tags": {}
}
| no | | create\_before\_destroy | Set true in order to create the new node group before destroying the old one.
If false, the old node group will be destroyed first, causing downtime.
Changing this setting will always cause node group to be replaced. | `bool` | `false` | no | @@ -209,7 +210,7 @@ Available targets: | desired\_size | Initial desired number of worker nodes (external changes ignored) | `number` | n/a | yes | | disk\_size | Disk size in GiB for worker nodes. Defaults to 20. Ignored it `launch_template_id` is supplied.
Terraform will only perform drift detection if a configuration value is provided. | `number` | `20` | no | | ec2\_ssh\_key | SSH key pair name to use to access the worker nodes | `string` | `null` | no | -| enable\_cluster\_autoscaler | Set true to allow Kubernetes Cluster Auto Scaler to scale the node group | `bool` | `false` | no | +| enable\_cluster\_autoscaler | (Deprecated, use `cluster_autoscaler_enabled`) Set true to allow Kubernetes Cluster Auto Scaler to scale the node group | `bool` | `null` | no | | enabled | Set to false to prevent the module from creating any resources | `bool` | `null` | no | | environment | Environment, e.g. 'uw2', 'us-west-2', OR 'prod', 'staging', 'dev', 'UAT' | `string` | `null` | no | | existing\_workers\_role\_policy\_arns | List of existing policy ARNs that will be attached to the workers default role on creation | `list(string)` | `[]` | no | @@ -235,6 +236,7 @@ Available targets: | subnet\_ids | A list of subnet IDs to launch resources in | `list(string)` | n/a | yes | | tags | Additional tags (e.g. `map('BusinessUnit','XYZ')` | `map(string)` | `{}` | no | | userdata\_override\_base64 | Many features of this module rely on the `bootstrap.sh` provided with Amazon Linux, and this module
may generate "user data" that expects to find that script. If you want to use an AMI that is not
compatible with the Amazon Linux `bootstrap.sh` initialization, then use `userdata_override_base64` to provide
your own (Base64 encoded) user data. Use "" to prevent any user data from being set.

Setting `userdata_override_base64` disables `kubernetes_taints`, `kubelet_additional_options`,
`before_cluster_joining_userdata`, `after_cluster_joining_userdata`, and `bootstrap_additional_options`. | `string` | `null` | no | +| worker\_role\_autoscale\_iam\_enabled | If true, the worker IAM role will be authorized to perform autoscaling operations. Not recommended.
Use [EKS IAM role for cluster autoscaler service account](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) instead. | `bool` | `false` | no | ## Outputs diff --git a/docs/terraform.md b/docs/terraform.md index 7598155..4f30392 100644 --- a/docs/terraform.md +++ b/docs/terraform.md @@ -28,6 +28,7 @@ | attributes | Additional attributes (e.g. `1`) | `list(string)` | `[]` | no | | before\_cluster\_joining\_userdata | Additional `bash` commands to execute on each worker node before joining the EKS cluster (before executing the `bootstrap.sh` script). For more info, see https://kubedex.com/90-days-of-aws-eks-in-production | `string` | `""` | no | | bootstrap\_additional\_options | Additional options to bootstrap.sh. DO NOT include `--kubelet-additional-args`, use `kubelet_additional_args` var instead. | `string` | `""` | no | +| cluster\_autoscaler\_enabled | Set true to label the node group so that the [Kubernetes Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#auto-discovery-setup) will discover and autoscale it | `bool` | `null` | no | | cluster\_name | The name of the EKS cluster | `string` | n/a | yes | | context | Single object for setting entire context at once.
See description of individual variables for details.
Leave string and numeric variables as `null` to use default value.
Individual variable settings (non-null) override settings in context object,
except for attributes, tags, and additional\_tag\_map, which are merged. |
object({
enabled = bool
namespace = string
environment = string
stage = string
name = string
delimiter = string
attributes = list(string)
tags = map(string)
additional_tag_map = map(string)
regex_replace_chars = string
label_order = list(string)
id_length_limit = number
})
|
{
"additional_tag_map": {},
"attributes": [],
"delimiter": null,
"enabled": true,
"environment": null,
"id_length_limit": null,
"label_order": [],
"name": null,
"namespace": null,
"regex_replace_chars": null,
"stage": null,
"tags": {}
}
| no | | create\_before\_destroy | Set true in order to create the new node group before destroying the old one.
If false, the old node group will be destroyed first, causing downtime.
Changing this setting will always cause node group to be replaced. | `bool` | `false` | no | @@ -35,7 +36,7 @@ | desired\_size | Initial desired number of worker nodes (external changes ignored) | `number` | n/a | yes | | disk\_size | Disk size in GiB for worker nodes. Defaults to 20. Ignored it `launch_template_id` is supplied.
Terraform will only perform drift detection if a configuration value is provided. | `number` | `20` | no | | ec2\_ssh\_key | SSH key pair name to use to access the worker nodes | `string` | `null` | no | -| enable\_cluster\_autoscaler | Set true to allow Kubernetes Cluster Auto Scaler to scale the node group | `bool` | `false` | no | +| enable\_cluster\_autoscaler | (Deprecated, use `cluster_autoscaler_enabled`) Set true to allow Kubernetes Cluster Auto Scaler to scale the node group | `bool` | `null` | no | | enabled | Set to false to prevent the module from creating any resources | `bool` | `null` | no | | environment | Environment, e.g. 'uw2', 'us-west-2', OR 'prod', 'staging', 'dev', 'UAT' | `string` | `null` | no | | existing\_workers\_role\_policy\_arns | List of existing policy ARNs that will be attached to the workers default role on creation | `list(string)` | `[]` | no | @@ -61,6 +62,7 @@ | subnet\_ids | A list of subnet IDs to launch resources in | `list(string)` | n/a | yes | | tags | Additional tags (e.g. `map('BusinessUnit','XYZ')` | `map(string)` | `{}` | no | | userdata\_override\_base64 | Many features of this module rely on the `bootstrap.sh` provided with Amazon Linux, and this module
may generate "user data" that expects to find that script. If you want to use an AMI that is not
compatible with the Amazon Linux `bootstrap.sh` initialization, then use `userdata_override_base64` to provide
your own (Base64 encoded) user data. Use "" to prevent any user data from being set.

Setting `userdata_override_base64` disables `kubernetes_taints`, `kubelet_additional_options`,
`before_cluster_joining_userdata`, `after_cluster_joining_userdata`, and `bootstrap_additional_options`. | `string` | `null` | no | +| worker\_role\_autoscale\_iam\_enabled | If true, the worker IAM role will be authorized to perform autoscaling operations. Not recommended.
Use [EKS IAM role for cluster autoscaler service account](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) instead. | `bool` | `false` | no | ## Outputs diff --git a/iam.tf b/iam.tf new file mode 100644 index 0000000..b6c6947 --- /dev/null +++ b/iam.tf @@ -0,0 +1,85 @@ +locals { + aws_policy_prefix = format("arn:%s:iam::aws:policy", join("", data.aws_partition.current.*.partition)) +} + +data "aws_partition" "current" { + count = local.enabled ? 1 : 0 +} + +data "aws_iam_policy_document" "assume_role" { + count = local.enabled ? 1 : 0 + + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +data "aws_iam_policy_document" "amazon_eks_worker_node_autoscale_policy" { + count = (local.enabled && var.worker_role_autoscale_iam_enabled) ? 1 : 0 + statement { + sid = "AllowToScaleEKSNodeGroupAutoScalingGroup" + + actions = [ + "autoscaling:DescribeAutoScalingGroups", + "autoscaling:DescribeAutoScalingInstances", + "autoscaling:DescribeLaunchConfigurations", + "autoscaling:DescribeTags", + "autoscaling:SetDesiredCapacity", + "autoscaling:TerminateInstanceInAutoScalingGroup", + "ec2:DescribeLaunchTemplateVersions" + ] + + resources = [ + "*" + ] + } +} + +resource "aws_iam_policy" "amazon_eks_worker_node_autoscale_policy" { + count = (local.enabled && var.worker_role_autoscale_iam_enabled) ? 1 : 0 + name = "${module.label.id}-autoscale" + policy = join("", data.aws_iam_policy_document.amazon_eks_worker_node_autoscale_policy.*.json) +} + +resource "aws_iam_role" "default" { + count = local.enabled ? 1 : 0 + name = module.label.id + assume_role_policy = join("", data.aws_iam_policy_document.assume_role.*.json) + tags = module.label.tags +} + +resource "aws_iam_role_policy_attachment" "amazon_eks_worker_node_policy" { + count = local.enabled ? 1 : 0 + policy_arn = format("%s/%s", local.aws_policy_prefix, "AmazonEKSWorkerNodePolicy") + role = join("", aws_iam_role.default.*.name) +} + +resource "aws_iam_role_policy_attachment" "amazon_eks_worker_node_autoscale_policy" { + count = (local.enabled && var.worker_role_autoscale_iam_enabled) ? 1 : 0 + policy_arn = join("", aws_iam_policy.amazon_eks_worker_node_autoscale_policy.*.arn) + role = join("", aws_iam_role.default.*.name) +} + +resource "aws_iam_role_policy_attachment" "amazon_eks_cni_policy" { + count = local.enabled ? 1 : 0 + policy_arn = format("%s/%s", local.aws_policy_prefix, "AmazonEKS_CNI_Policy") + role = join("", aws_iam_role.default.*.name) +} + +resource "aws_iam_role_policy_attachment" "amazon_ec2_container_registry_read_only" { + count = local.enabled ? 1 : 0 + policy_arn = format("%s/%s", local.aws_policy_prefix, "AmazonEC2ContainerRegistryReadOnly") + role = join("", aws_iam_role.default.*.name) +} + +resource "aws_iam_role_policy_attachment" "existing_policies_for_eks_workers_role" { + for_each = local.enabled ? toset(var.existing_workers_role_policy_arns) : [] + policy_arn = each.value + role = join("", aws_iam_role.default.*.name) +} diff --git a/launch-template.tf b/launch-template.tf new file mode 100644 index 0000000..0e01979 --- /dev/null +++ b/launch-template.tf @@ -0,0 +1,104 @@ +locals { + # The heavy use of the ternary operator `? :` is because it is one of the few ways to avoid + # evaluating expressions. The unused expression is not evaluated and so it does not have to be valid. + # This allows us to refer to resources that are only conditionally created and avoid creating + # dependencies on them that would not be avoided by using expressions like `join("",expr)`. + # + # We use this pattern with enabled for every boolean that begins with `need_` even though + # it is sometimes redundant, to ensure that ever `need_` is false and every dependent + # expression is not evaluated when enabled is false. Avoiding expression evaluations + # is also why, even for boolean expressions, we use + # local.enabled ? expression : false + # rather than + # local.enabled && expression + # + # The expression + # length(compact([var.launch_template_version])) > 0 + # is a shorter way of accomplishing the same test as + # var.launch_template_version != null && var.launch_template_version != "" + # and as an idiom has the added benefit of being extensible: + # length(compact([x, y])) > 0 + # is the same as + # x != null && x != "" && y != null && y != "" + + configured_launch_template_name = var.launch_template_name == null ? "" : var.launch_template_name + configured_launch_template_version = length(local.configured_launch_template_name) > 0 && length(compact([var.launch_template_version])) > 0 ? var.launch_template_version : "" + + generate_launch_template = local.enabled ? local.features_require_launch_template && length(local.configured_launch_template_name) == 0 : false + use_launch_template = local.enabled ? local.features_require_launch_template || length(local.configured_launch_template_name) > 0 : false + + launch_template_id = local.use_launch_template ? (length(local.configured_launch_template_name) > 0 ? data.aws_launch_template.this[0].id : aws_launch_template.default[0].id) : "" + launch_template_version = local.use_launch_template ? ( + length(local.configured_launch_template_version) > 0 ? local.configured_launch_template_version : + ( + length(local.configured_launch_template_name) > 0 ? data.aws_launch_template.this[0].latest_version : aws_launch_template.default[0].latest_version + ) + ) : "" + + launch_template_ami = length(local.configured_ami_image_id) == 0 ? (local.features_require_ami ? data.aws_ami.selected[0].image_id : "") : local.configured_ami_image_id + + launch_template_vpc_security_group_ids = ( + local.need_remote_access_sg ? + concat(data.aws_eks_cluster.this[0].vpc_config[*].cluster_security_group_id, aws_security_group.remote_access.*.id) : [] + ) + + # launch_template_key = join(":", coalescelist(local.launch_template_vpc_security_group_ids, ["closed"])) +} + +resource "aws_launch_template" "default" { + # We'll use this default if we aren't provided with a launch template during invocation + # We need to generate a new launch template every time the security group list changes + # so that we can detach the network interfaces from the security groups that we no + # longer need, so that the security groups can then be deleted. + + # As a workaround for https://github.com/hashicorp/terraform/issues/26166 we + # always create a launch template. Commented out code will be restored when the bug is fixed. + count = local.enabled ? 1 : 0 + #count = (local.enabled && local.generate_launch_template) ? 1 : 0 + #for_each = (local.enabled && local.generate_launch_template) ? toset([local.launch_template_key]) : toset([]) + + block_device_mappings { + device_name = "/dev/xvda" + + ebs { + volume_size = var.disk_size + } + } + + name_prefix = module.label.id + update_default_version = true + + instance_type = var.instance_types[0] + image_id = local.launch_template_ami == "" ? null : local.launch_template_ami + key_name = local.have_ssh_key ? var.ec2_ssh_key : null + + dynamic "tag_specifications" { + for_each = var.resources_to_tag + content { + resource_type = tag_specifications.value + tags = local.node_tags + } + } + + # See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html + # and https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html + # Note in particular: + # If any containers that you deploy to the node group use the Instance Metadata Service Version 2, + # then make sure to set the Metadata response hop limit to 2 in your launch template. + metadata_options { + http_put_response_hop_limit = 2 + # Despite being documented as "Optional", `http_endpoint` is required when `http_put_response_hop_limit` is set. + # We set it to the default setting of "enabled". + http_endpoint = "enabled" + } + + vpc_security_group_ids = local.launch_template_vpc_security_group_ids + user_data = local.userdata + tags = local.node_group_tags +} + +data "aws_launch_template" "this" { + count = local.enabled && length(local.configured_launch_template_name) > 0 ? 1 : 0 + + name = local.configured_launch_template_name +} diff --git a/main.tf b/main.tf index 1a9a4e9..69e24c1 100644 --- a/main.tf +++ b/main.tf @@ -1,58 +1,23 @@ locals { enabled = module.this.enabled - # The heavy use of the ternary operator `? :` is because it is one of the few ways to avoid - # evaluating expressions. The unused expression is not evaluated and so it does not have to be valid. - # This allows us to refer to resources that are only conditionally created and avoid creating - # dependencies on them that would not be avoided by using expressions like `join("",expr)`. - # - # We use this pattern with enabled for every boolean that begins with `need_` even though - # it is sometimes redundant, to ensure that ever `need_` is false and every dependent - # expression is not evaluated when enabled is false. Avoiding expression evaluations - # is also why, even for boolean expressions, we use - # local.enabled ? expression : false - # rather than - # local.enabled && expression - # - # The expression - # length(compact([var.launch_template_version])) > 0 - # is a shorter way of accomplishing the same test as - # var.launch_template_version != null && var.launch_template_version != "" - # and as an idiom has the added benefit of being extensible: - # length(compact([x, y])) > 0 - # is the same as - # x != null && x != "" && y != null && y != "" - - configured_launch_template_name = var.launch_template_name == null ? "" : var.launch_template_name - configured_launch_template_version = length(local.configured_launch_template_name) > 0 && length(compact([var.launch_template_version])) > 0 ? var.launch_template_version : "" - - configured_ami_image_id = var.ami_image_id == null ? "" : var.ami_image_id - have_ssh_key = var.ec2_ssh_key != null && var.ec2_ssh_key != "" - # See https://aws.amazon.com/blogs/containers/introducing-launch-template-and-custom-ami-support-in-amazon-eks-managed-node-groups/ - features_require_ami = local.enabled && local.need_bootstrap - need_ami_id = local.enabled ? local.features_require_ami && length(local.configured_ami_image_id) == 0 : false + features_require_ami = local.enabled && local.need_bootstrap + configured_ami_image_id = var.ami_image_id == null ? "" : var.ami_image_id + need_ami_id = local.enabled ? local.features_require_ami && length(local.configured_ami_image_id) == 0 : false features_require_launch_template = local.enabled ? length(var.resources_to_tag) > 0 || local.need_userdata || local.features_require_ami : false - generate_launch_template = local.enabled ? local.features_require_launch_template && length(local.configured_launch_template_name) == 0 : false - use_launch_template = local.enabled ? local.features_require_launch_template || length(local.configured_launch_template_name) > 0 : false - launch_template_id = local.use_launch_template ? (length(local.configured_launch_template_name) > 0 ? data.aws_launch_template.this[0].id : aws_launch_template.default[0].id) : "" - launch_template_version = local.use_launch_template ? ( - length(local.configured_launch_template_version) > 0 ? local.configured_launch_template_version : - ( - length(local.configured_launch_template_name) > 0 ? data.aws_launch_template.this[0].latest_version : aws_launch_template.default[0].latest_version - ) - ) : "" - - launch_template_ami = length(local.configured_ami_image_id) == 0 ? (local.features_require_ami ? data.aws_ami.selected[0].image_id : "") : local.configured_ami_image_id + have_ssh_key = var.ec2_ssh_key != null && var.ec2_ssh_key != "" need_remote_access_sg = local.enabled && local.have_ssh_key && local.generate_launch_template - launch_template_vpc_security_group_ids = ( - local.need_remote_access_sg ? - concat(data.aws_eks_cluster.this[0].vpc_config[*].cluster_security_group_id, aws_security_group.remote_access.*.id) : null - ) + get_cluster_data = local.enabled ? (local.need_cluster_kubernetes_version || local.need_bootstrap || local.need_remote_access_sg) : false + + autoscaler_enabled = var.enable_cluster_autoscaler != null ? var.enable_cluster_autoscaler : var.cluster_autoscaler_enabled == true + # + # Set up tags for autoscaler and other resources + # autoscaler_enabled_tags = { "k8s.io/cluster-autoscaler/${var.cluster_name}" = "owned" "k8s.io/cluster-autoscaler/enabled" = "true" @@ -71,16 +36,7 @@ locals { "kubernetes.io/cluster/${var.cluster_name}" = "owned" } ) - node_group_tags = merge(local.node_tags, var.enable_cluster_autoscaler ? local.autoscaler_tags : {}) - - aws_policy_prefix = format("arn:%s:iam::aws:policy", join("", data.aws_partition.current.*.partition)) - - get_cluster_data = local.enabled ? (local.need_cluster_kubernetes_version || local.need_bootstrap || local.need_remote_access_sg) : false -} - -data "aws_eks_cluster" "this" { - count = local.get_cluster_data ? 1 : 0 - name = var.cluster_name + node_group_tags = merge(local.node_tags, local.autoscaler_enabled ? local.autoscaler_tags : {}) } module "label" { @@ -95,172 +51,11 @@ module "label" { context = module.this.context } -data "aws_partition" "current" { - count = local.enabled ? 1 : 0 -} - -data "aws_iam_policy_document" "assume_role" { - count = local.enabled ? 1 : 0 - - statement { - effect = "Allow" - actions = ["sts:AssumeRole"] - - principals { - type = "Service" - identifiers = ["ec2.amazonaws.com"] - } - } -} - -data "aws_iam_policy_document" "amazon_eks_worker_node_autoscaler_policy" { - count = (local.enabled && var.enable_cluster_autoscaler) ? 1 : 0 - statement { - sid = "AllowToScaleEKSNodeGroupAutoScalingGroup" - - actions = [ - "autoscaling:DescribeAutoScalingGroups", - "autoscaling:DescribeAutoScalingInstances", - "autoscaling:DescribeLaunchConfigurations", - "autoscaling:DescribeTags", - "autoscaling:SetDesiredCapacity", - "autoscaling:TerminateInstanceInAutoScalingGroup", - "ec2:DescribeLaunchTemplateVersions" - ] - - resources = [ - "*" - ] - } -} - -resource "aws_iam_policy" "amazon_eks_worker_node_autoscaler_policy" { - count = (local.enabled && var.enable_cluster_autoscaler) ? 1 : 0 - name = "${module.label.id}-autoscaler" - path = "/" - policy = join("", data.aws_iam_policy_document.amazon_eks_worker_node_autoscaler_policy.*.json) -} - -resource "aws_iam_role" "default" { - count = local.enabled ? 1 : 0 - name = module.label.id - assume_role_policy = join("", data.aws_iam_policy_document.assume_role.*.json) - tags = module.label.tags -} - -resource "aws_iam_role_policy_attachment" "amazon_eks_worker_node_policy" { - count = local.enabled ? 1 : 0 - policy_arn = format("%s/%s", local.aws_policy_prefix, "AmazonEKSWorkerNodePolicy") - role = join("", aws_iam_role.default.*.name) -} - -resource "aws_iam_role_policy_attachment" "amazon_eks_worker_node_autoscaler_policy" { - count = (local.enabled && var.enable_cluster_autoscaler) ? 1 : 0 - policy_arn = join("", aws_iam_policy.amazon_eks_worker_node_autoscaler_policy.*.arn) - role = join("", aws_iam_role.default.*.name) -} - -resource "aws_iam_role_policy_attachment" "amazon_eks_cni_policy" { - count = local.enabled ? 1 : 0 - policy_arn = format("%s/%s", local.aws_policy_prefix, "AmazonEKS_CNI_Policy") - role = join("", aws_iam_role.default.*.name) -} - -resource "aws_iam_role_policy_attachment" "amazon_ec2_container_registry_read_only" { - count = local.enabled ? 1 : 0 - policy_arn = format("%s/%s", local.aws_policy_prefix, "AmazonEC2ContainerRegistryReadOnly") - role = join("", aws_iam_role.default.*.name) -} - -resource "aws_iam_role_policy_attachment" "existing_policies_for_eks_workers_role" { - for_each = local.enabled ? toset(var.existing_workers_role_policy_arns) : [] - policy_arn = each.value - role = join("", aws_iam_role.default.*.name) -} - -resource "aws_launch_template" "default" { - # We'll use this default if we aren't provided with a launch template during invocation - count = (local.enabled && local.generate_launch_template) ? 1 : 0 - - block_device_mappings { - device_name = "/dev/xvda" - - ebs { - volume_size = var.disk_size - } - } - - name_prefix = module.label.id - update_default_version = true - - instance_type = var.instance_types[0] - image_id = local.launch_template_ami == "" ? null : local.launch_template_ami - key_name = local.have_ssh_key ? var.ec2_ssh_key : null - - dynamic "tag_specifications" { - for_each = var.resources_to_tag - content { - resource_type = tag_specifications.value - tags = local.node_tags - } - } - - # See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html - # and https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html - # Note in particular: - # If any containers that you deploy to the node group use the Instance Metadata Service Version 2, - # then make sure to set the Metadata response hop limit to 2 in your launch template. - metadata_options { - http_put_response_hop_limit = 2 - # Despite being documented as "Optional", `http_endpoint` is required when `http_put_response_hop_limit` is set. - # We set it to the default setting of "enabled". - http_endpoint = "enabled" - } - - vpc_security_group_ids = local.launch_template_vpc_security_group_ids - user_data = local.userdata - tags = local.node_group_tags - - lifecycle { - create_before_destroy = true - } -} - -data "aws_launch_template" "this" { - count = local.enabled && length(local.configured_launch_template_name) > 0 ? 1 : 0 - - name = local.configured_launch_template_name -} - -resource "random_pet" "cbd" { - count = local.enabled && var.create_before_destroy ? 1 : 0 - - separator = module.label.delimiter - length = 1 - - keepers = { - ami_type = var.ami_type - ami_release_version = var.ami_release_version - kubernetes_version = var.kubernetes_version - disk_size = local.use_launch_template ? null : var.disk_size - instance_types = join(",", local.use_launch_template ? [] : var.instance_types) - node_role_arn = join("", aws_iam_role.default.*.arn) - - ec2_ssh_key = var.ec2_ssh_key == null ? "" : var.ec2_ssh_key - source_security_group_ids = join(",", var.source_security_group_ids) - subnet_ids = join(",", var.subnet_ids) - - launch_template_id = local.use_launch_template ? local.launch_template_id : null - launch_template_ami = local.use_launch_template ? local.launch_template_ami : null - } - - depends_on = [var.module_depends_on] - lifecycle { - create_before_destroy = true - } +data "aws_eks_cluster" "this" { + count = local.get_cluster_data ? 1 : 0 + name = var.cluster_name } - # Support keeping 2 node groups in sync by extracting common variable settings locals { ng = { @@ -270,7 +65,7 @@ locals { disk_size = local.use_launch_template ? null : var.disk_size instance_types = local.use_launch_template ? null : var.instance_types ami_type = local.launch_template_ami == "" ? var.ami_type : null - labels = var.kubernetes_labels + labels = var.kubernetes_labels == null ? {} : var.kubernetes_labels release_version = local.launch_template_ami == "" ? var.ami_release_version : null version = length(compact([local.launch_template_ami, var.ami_release_version])) == 0 ? var.kubernetes_version : null @@ -289,6 +84,31 @@ locals { } } +resource "random_pet" "cbd" { + count = local.enabled && var.create_before_destroy ? 1 : 0 + + separator = module.label.delimiter + length = 1 + + keepers = { + node_role_arn = local.ng.node_role_arn + subnet_ids = join(",", local.ng.subnet_ids) + disk_size = local.ng.disk_size + instance_types = local.ng.instance_types == null ? "" : local.ng.instance_types[0] + ami_type = local.ng.ami_type + release_version = local.ng.release_version + version = local.ng.version + + need_remote_access = local.ng.need_remote_access + ec2_ssh_key = local.ng.need_remote_access ? local.ng.ec2_ssh_key : "handled by launch template" + # Any change in security groups requires a new node group, because you cannot delete a security group while it is in use + # and it will not automatically disassociate itself from instances or network interfaces. + source_security_group_ids = join(",", local.ng.source_security_group_ids, local.launch_template_vpc_security_group_ids) + + launch_template_id = local.use_launch_template ? local.launch_template_id : "none" + } +} + # Because create_before_destroy is such a dramatic change, we want to make it optional. # Because lifecycle must be static, the only way to make it optional is to create # two nearly identical resources and only enable the correct one. @@ -344,10 +164,9 @@ resource "aws_eks_node_group" "default" { # Otherwise, EKS will not be able to properly delete EC2 Instances and Elastic Network Interfaces. depends_on = [ aws_iam_role_policy_attachment.amazon_eks_worker_node_policy, - aws_iam_role_policy_attachment.amazon_eks_worker_node_autoscaler_policy, + aws_iam_role_policy_attachment.amazon_eks_worker_node_autoscale_policy, aws_iam_role_policy_attachment.amazon_eks_cni_policy, aws_iam_role_policy_attachment.amazon_ec2_container_registry_read_only, - aws_launch_template.default, # Also allow calling module to create an explicit dependency # This is useful in conjunction with terraform-aws-eks-cluster to ensure # the cluster is fully created and configured before creating any node groups @@ -405,10 +224,9 @@ resource "aws_eks_node_group" "cbd" { # Otherwise, EKS will not be able to properly delete EC2 Instances and Elastic Network Interfaces. depends_on = [ aws_iam_role_policy_attachment.amazon_eks_worker_node_policy, - aws_iam_role_policy_attachment.amazon_eks_worker_node_autoscaler_policy, + aws_iam_role_policy_attachment.amazon_eks_worker_node_autoscale_policy, aws_iam_role_policy_attachment.amazon_eks_cni_policy, aws_iam_role_policy_attachment.amazon_ec2_container_registry_read_only, - aws_launch_template.default, # Also allow calling module to create an explicit dependency # This is useful in conjunction with terraform-aws-eks-cluster to ensure # the cluster is fully created and configured before creating any node groups diff --git a/sg.tf b/security-group.tf similarity index 100% rename from sg.tf rename to security-group.tf diff --git a/variables.tf b/variables.tf index f577fa2..d46e5ca 100644 --- a/variables.tf +++ b/variables.tf @@ -1,7 +1,22 @@ variable "enable_cluster_autoscaler" { type = bool - description = "Set true to allow Kubernetes Cluster Auto Scaler to scale the node group" + description = "(Deprecated, use `cluster_autoscaler_enabled`) Set true to allow Kubernetes Cluster Auto Scaler to scale the node group" + default = null +} + +variable "cluster_autoscaler_enabled" { + type = bool + description = "Set true to label the node group so that the [Kubernetes Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#auto-discovery-setup) will discover and autoscale it" + default = null +} + +variable "worker_role_autoscale_iam_enabled" { + type = bool default = false + description = <<-EOT + If true, the worker IAM role will be authorized to perform autoscaling operations. Not recommended. + Use [EKS IAM role for cluster autoscaler service account](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) instead. + EOT } variable "cluster_name" {