Skip to content

Commit

Permalink
fix(eks): cannot update cluster configuration (#4696)
Browse files Browse the repository at this point in the history
* fix(eks): cannot update cluster configuration

Our custom resource naively tried to call the UpdateCluster API for updates, but this is in fact not inline with how AWS::EKS::Cluster is implemented. This change modifies the custom resource handler to handle updates based on the same specification as the official CloudFormation resource:

- Changes the cluster name, VPC or role will cause a replacement (creation of a cluster with a new name and removal of the old cluster).
- Changes to the version will use the UpdateClusterVersion API to update the version in-place.

This fixes #4311.

This commit also fixes #4310 which caused cluster deletions when updates failed. The root cause was that when errors were reported to CFN we always used the log stream name as the physical resource ID, and CFN thought we wanted to replace the resource. Oouch.

This change was manually tested since we still don't have a good unit test harness for this resource so we manually tested all types of updates and observed that the appropriate behaviour was taken (replacements, in-place).

* handle cluster name updates

* add provisional unit tests for the cluster resource (just a test plan)

* update expectations
  • Loading branch information
Elad Ben-Israel authored and mergify[bot] committed Oct 28, 2019
1 parent 9a96c37 commit e17ba55
Show file tree
Hide file tree
Showing 9 changed files with 338 additions and 152 deletions.
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-eks/lib/cluster-resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export class ClusterResource extends Construct {

// since we don't know the cluster name at this point, we must give this role star resource permissions
handler.addToRolePolicy(new PolicyStatement({
actions: [ 'eks:CreateCluster', 'eks:DescribeCluster', 'eks:DeleteCluster' ],
actions: [ 'eks:CreateCluster', 'eks:DescribeCluster', 'eks:DeleteCluster', 'eks:UpdateClusterVersion' ],
resources: [ '*' ]
}));

Expand Down
75 changes: 67 additions & 8 deletions packages/@aws-cdk/aws-eks/lib/cluster-resource/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ def cfn_error(message=None):
old_props = event.get('OldResourceProperties', {})
physical_id = event.get('PhysicalResourceId', None)
config = props['Config']
old_config = old_props.get('Config', {})

def new_cluster_name():
return "cluster-%s" % request_id

logger.info(json.dumps(config))

Expand All @@ -49,11 +53,36 @@ def cfn_error(message=None):
cluster_name=config.get('name', None)
if cluster_name is None:
if physical_id: cluster_name = physical_id
elif request_type == 'Create': cluster_name = "cluster-%s" % request_id
elif request_type == 'Create': cluster_name = new_cluster_name()
else: raise Exception("unexpected error. cannot determine cluster name")
config['name'] = cluster_name
logger.info("request: %s" % config)

# extract additional options
resourcesVpcConfig = config.get('resourcesVpcConfig', None)
roleArn = config.get('roleArn', None)
version = config.get('version', None)

def should_replace_cluster():
logger.info("old config: %s" % json.dumps(old_config))

old_name = physical_id
if old_name != cluster_name:
logger.info("'name' change requires replacement (old=%s, new=%s)" % (old_name, cluster_name))
return True

old_resourcesVpcConfig = old_config.get('resourcesVpcConfig', None)
if old_resourcesVpcConfig != resourcesVpcConfig:
logger.info("'resourcesVpcConfig' change requires replacement (old=%s, new=%s)" % (old_resourcesVpcConfig, resourcesVpcConfig))
return True

old_roleArn = old_config.get('roleArn', None)
if old_roleArn != roleArn:
logger.info("'roleArn' change requires replacement (old=%s, new=%s)" % (old_roleArn, roleArn))
return True

return False

# delete is a special case
if request_type == 'Delete':
logger.info('deleting cluster')
Expand All @@ -69,9 +98,36 @@ def cfn_error(message=None):
resp = eks.create_cluster(**config)
logger.info("create response: %s" % resp)
elif request_type == 'Update':
logger.info("updating cluster %s" % cluster_name)
resp = eks.update_cluster_config(**config)
logger.info("update response: %s" % resp)
# physical_id is always defined for "update"
logger.info("updating cluster %s" % physical_id)
current_state = eks.describe_cluster(name=physical_id)['cluster']

# changes to "name", "resourcesVpcConfig" and "roleArn" all require replacement
# according to the cloudformation spec, so if one of these change, we basically need to create
# a new cluster with the new configuration (in this case, if "version" has been changed, the
# new version will be used by the new cluster).
if should_replace_cluster():
# unless we are renaming the cluster, allocate a new cluster name
if cluster_name == physical_id:
cluster_name = new_cluster_name()
config['name'] = cluster_name

logger.info("replacing cluster %s with a new cluster %s" % (physical_id, cluster_name))
resp = eks.create_cluster(**config)
logger.info("create (replacement) response: %s" % resp)
else:
# version change - we can do that without replacement
old_version = old_config.get('version', None)
if (old_version is None) and (version is None):
logger.info("no version change")
else:
old_version_actual = current_state['version']
if version != old_version_actual:
if version is None:
raise Exception("Version cannot be changed from a specific value (%s) to undefined" % old_version)

resp = eks.update_cluster_version(name=cluster_name,version=version)
logger.info("update response: %s" % resp)
else:
raise Exception("Invalid request type %s" % request_type)

Expand All @@ -94,9 +150,8 @@ def cfn_error(message=None):
logger.info("attributes: %s" % attrs)
cfn_send(event, context, CFN_SUCCESS, responseData=attrs, physicalResourceId=cluster_name)

except KeyError as e:
cfn_error("invalid request. Missing '%s'" % str(e))
except Exception as e:
except:
e = sys.exc_info()[1]
logger.exception(e)
cfn_error(str(e))

Expand All @@ -111,10 +166,14 @@ def cfn_send(event, context, responseStatus, responseData={}, physicalResourceId
responseUrl = event['ResponseURL']
logger.info(responseUrl)

# use previous PhysicalResourceId if physical resource ID is not specified, otherwise update failures
# will result in resource replacement
physicalResourceId = physicalResourceId or event.get('PhysicalResourceId', context.log_stream_name)

responseBody = {}
responseBody['Status'] = responseStatus
responseBody['Reason'] = reason or ('See the details in CloudWatch Log Stream: ' + context.log_stream_name)
responseBody['PhysicalResourceId'] = physicalResourceId or context.log_stream_name
responseBody['PhysicalResourceId'] = physicalResourceId
responseBody['StackId'] = event['StackId']
responseBody['RequestId'] = event['RequestId']
responseBody['LogicalResourceId'] = event['LogicalResourceId']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,8 @@
"Action": [
"eks:CreateCluster",
"eks:DescribeCluster",
"eks:DeleteCluster"
"eks:DeleteCluster",
"eks:UpdateClusterVersion"
],
"Effect": "Allow",
"Resource": "*"
Expand Down Expand Up @@ -672,7 +673,7 @@
"Properties": {
"Code": {
"S3Bucket": {
"Ref": "AssetParametersc58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128S3BucketA2C12383"
"Ref": "AssetParametersea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204S3Bucket371D99F8"
},
"S3Key": {
"Fn::Join": [
Expand All @@ -685,7 +686,7 @@
"Fn::Split": [
"||",
{
"Ref": "AssetParametersc58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128S3VersionKeyB5F0BEF8"
"Ref": "AssetParametersea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204S3VersionKeyFDCB25DD"
}
]
}
Expand All @@ -698,7 +699,7 @@
"Fn::Split": [
"||",
{
"Ref": "AssetParametersc58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128S3VersionKeyB5F0BEF8"
"Ref": "AssetParametersea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204S3VersionKeyFDCB25DD"
}
]
}
Expand Down Expand Up @@ -1017,8 +1018,8 @@
],
"Tags": [
{
"Key":"Name",
"Value":"eks-integ-defaults/Cluster/DefaultCapacity"
"Key": "Name",
"Value": "eks-integ-defaults/Cluster/DefaultCapacity"
},
{
"Key": {
Expand Down Expand Up @@ -1185,18 +1186,6 @@
}
}
},
"Parameters": {
"AssetParametersc58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128S3BucketA2C12383": {"Type":"String","Description":"S3 bucket for asset \"c58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128\""},
"AssetParametersc58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128S3VersionKeyB5F0BEF8": {"Type":"String","Description":"S3 key for asset version \"c58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128\""},
"AssetParametersc58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128ArtifactHash8E5121DE": {"Type":"String","Description":"Artifact hash for asset \"c58ce740cd907e33a1af503069821ee4befab4fec4075707d67e86d2a0dbf128\""},
"AssetParameters640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444S3Bucket919126CB": {"Type":"String","Description":"S3 bucket for asset \"640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444\""},
"AssetParameters640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444S3VersionKey529BEF54": {"Type":"String","Description":"S3 key for asset version \"640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444\""},
"AssetParameters640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444ArtifactHash606C8127": {"Type":"String","Description":"Artifact hash for asset \"640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444\""},
"SsmParameterValueawsserviceeksoptimizedami114amazonlinux2recommendedimageidC96584B6F00A464EAD1953AFF4B05118Parameter": {
"Type": "AWS::SSM::Parameter::Value<String>",
"Default": "/aws/service/eks/optimized-ami/1.14/amazon-linux-2/recommended/image_id"
}
},
"Outputs": {
"ClusterConfigCommand43AAE40F": {
"Value": {
Expand Down Expand Up @@ -1226,5 +1215,35 @@
]
}
}
},
"Parameters": {
"AssetParametersea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204S3Bucket371D99F8": {
"Type": "String",
"Description": "S3 bucket for asset \"ea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204\""
},
"AssetParametersea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204S3VersionKeyFDCB25DD": {
"Type": "String",
"Description": "S3 key for asset version \"ea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204\""
},
"AssetParametersea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204ArtifactHashB80B497F": {
"Type": "String",
"Description": "Artifact hash for asset \"ea4957b16062595851e7d293ee45835db05c5693669a729cc02944b6ad19a204\""
},
"AssetParameters640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444S3Bucket919126CB": {
"Type": "String",
"Description": "S3 bucket for asset \"640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444\""
},
"AssetParameters640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444S3VersionKey529BEF54": {
"Type": "String",
"Description": "S3 key for asset version \"640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444\""
},
"AssetParameters640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444ArtifactHash606C8127": {
"Type": "String",
"Description": "Artifact hash for asset \"640847533c8a00b3133aeb128edcac41fb7b60349c9e18764fcf7ea4af14d444\""
},
"SsmParameterValueawsserviceeksoptimizedami114amazonlinux2recommendedimageidC96584B6F00A464EAD1953AFF4B05118Parameter": {
"Type": "AWS::SSM::Parameter::Value<String>",
"Default": "/aws/service/eks/optimized-ami/1.14/amazon-linux-2/recommended/image_id"
}
}
}
}
Loading

0 comments on commit e17ba55

Please sign in to comment.