Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Final ACL retries #9830

Merged
merged 1 commit into from
Aug 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 82 additions & 61 deletions aws/resource_aws_network_acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,79 +437,100 @@ func resourceAwsNetworkAclDelete(d *schema.ResourceData, meta interface{}) error
conn := meta.(*AWSClient).ec2conn

log.Printf("[INFO] Deleting Network Acl: %s", d.Id())
retryErr := resource.Retry(5*time.Minute, func() *resource.RetryError {
_, err := conn.DeleteNetworkAcl(&ec2.DeleteNetworkAclInput{
NetworkAclId: aws.String(d.Id()),
})
input := &ec2.DeleteNetworkAclInput{
NetworkAclId: aws.String(d.Id()),
}
err := resource.Retry(5*time.Minute, func() *resource.RetryError {
_, err := conn.DeleteNetworkAcl(input)
if err != nil {
ec2err := err.(awserr.Error)
switch ec2err.Code() {
case "InvalidNetworkAclID.NotFound":
if isAWSErr(err, "InvalidNetworkAclID.NotFound", "") {
return nil
case "DependencyViolation":
// In case of dependency violation, we remove the association between subnet and network acl.
// This means the subnet is attached to default acl of vpc.
var associations []*ec2.NetworkAclAssociation
if v, ok := d.GetOk("subnet_ids"); ok {
ids := v.(*schema.Set).List()
for _, i := range ids {
a, err := findNetworkAclAssociation(i.(string), conn)
if err != nil {
if isResourceNotFoundError(err) {
continue
}
return resource.NonRetryableError(err)
}
associations = append(associations, a)
}
}

log.Printf("[DEBUG] Replacing network associations for Network ACL (%s): %s", d.Id(), associations)
defaultAcl, err := getDefaultNetworkAcl(d.Get("vpc_id").(string), conn)
}
if isAWSErr(err, "DependencyViolation", "") {
err = cleanUpDependencyViolations(d, conn)
if err != nil {
return resource.NonRetryableError(err)
}

for _, a := range associations {
log.Printf("DEBUG] Replacing Network Acl Association (%s) with Default Network ACL ID (%s)", *a.NetworkAclAssociationId, *defaultAcl.NetworkAclId)
_, replaceErr := conn.ReplaceNetworkAclAssociation(&ec2.ReplaceNetworkAclAssociationInput{
AssociationId: a.NetworkAclAssociationId,
NetworkAclId: defaultAcl.NetworkAclId,
})
if replaceErr != nil {
if replaceEc2err, ok := replaceErr.(awserr.Error); ok {
// It's possible that during an attempt to replace this
// association, the Subnet in question has already been moved to
// another ACL. This can happen if you're destroying a network acl
// and simultaneously re-associating it's subnet(s) with another
// ACL; Terraform may have already re-associated the subnet(s) by
// the time we attempt to destroy them, even between the time we
// list them and then try to destroy them. In this case, the
// association we're trying to replace will no longer exist and
// this call will fail. Here we trap that error and fail
// gracefully; the association we tried to replace gone, we trust
// someone else has taken ownership.
if replaceEc2err.Code() == "InvalidAssociationID.NotFound" {
log.Printf("[WARN] Network Association (%s) no longer found; Network Association likely updated or removed externally, removing from state", *a.NetworkAclAssociationId)
continue
}
}
log.Printf("[ERR] Non retry-able error in replacing associations for Network ACL (%s): %s", d.Id(), replaceErr)
return resource.NonRetryableError(replaceErr)
}
}
return resource.RetryableError(fmt.Errorf("Dependencies found and cleaned up, retrying"))
default:
// Any other error, we want to quit the retry loop immediately
return resource.NonRetryableError(err)
}

return resource.NonRetryableError(err)

}
log.Printf("[Info] Deleted network ACL %s successfully", d.Id())
return nil
})
if isResourceTimeoutError(err) {
_, err = conn.DeleteNetworkAcl(input)
if err != nil && isAWSErr(err, "InvalidNetworkAclID.NotFound", "") {
return nil
}
err = cleanUpDependencyViolations(d, conn)
if err != nil {
// This seems excessive but is probably the best way to make sure it's actually deleted
_, err = conn.DeleteNetworkAcl(input)
if err != nil && isAWSErr(err, "InvalidNetworkAclID.NotFound", "") {
return nil
}
}
}
if err != nil {
return fmt.Errorf("Error destroying Network ACL (%s): %s", d.Id(), err)
}
return nil
}

if retryErr != nil {
return fmt.Errorf("Error destroying Network ACL (%s): %s", d.Id(), retryErr)
func cleanUpDependencyViolations(d *schema.ResourceData, conn *ec2.EC2) error {
// In case of dependency violation, we remove the association between subnet and network acl.
// This means the subnet is attached to default acl of vpc.
var associations []*ec2.NetworkAclAssociation
if v, ok := d.GetOk("subnet_ids"); ok {
ids := v.(*schema.Set).List()
for _, i := range ids {
a, err := findNetworkAclAssociation(i.(string), conn)
if err != nil {
if isResourceNotFoundError(err) {
continue
}
return fmt.Errorf("Error finding network ACL association: %s", err)
}
associations = append(associations, a)
}
}

log.Printf("[DEBUG] Replacing network associations for Network ACL (%s): %s", d.Id(), associations)
defaultAcl, err := getDefaultNetworkAcl(d.Get("vpc_id").(string), conn)
if err != nil {
return fmt.Errorf("Error getting default network ACL: %s", err)
}

for _, a := range associations {
log.Printf("DEBUG] Replacing Network Acl Association (%s) with Default Network ACL ID (%s)", *a.NetworkAclAssociationId, *defaultAcl.NetworkAclId)
_, replaceErr := conn.ReplaceNetworkAclAssociation(&ec2.ReplaceNetworkAclAssociationInput{
AssociationId: a.NetworkAclAssociationId,
NetworkAclId: defaultAcl.NetworkAclId,
})
if replaceErr != nil {
if replaceEc2err, ok := replaceErr.(awserr.Error); ok {
// It's possible that during an attempt to replace this
// association, the Subnet in question has already been moved to
// another ACL. This can happen if you're destroying a network acl
// and simultaneously re-associating it's subnet(s) with another
// ACL; Terraform may have already re-associated the subnet(s) by
// the time we attempt to destroy them, even between the time we
// list them and then try to destroy them. In this case, the
// association we're trying to replace will no longer exist and
// this call will fail. Here we trap that error and fail
// gracefully; the association we tried to replace gone, we trust
// someone else has taken ownership.
if replaceEc2err.Code() == "InvalidAssociationID.NotFound" {
log.Printf("[WARN] Network Association (%s) no longer found; Network Association likely updated or removed externally, removing from state", *a.NetworkAclAssociationId)
continue
}
}
log.Printf("[ERR] Non retry-able error in replacing associations for Network ACL (%s): %s", d.Id(), replaceErr)
return fmt.Errorf("Error replacing network ACL associations: %s", err)
}
}
return nil
}
Expand Down
16 changes: 11 additions & 5 deletions aws/resource_aws_network_acl_rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,18 +169,24 @@ func resourceAwsNetworkAclRuleCreate(d *schema.ResourceData, meta interface{}) e
// It appears it might be a while until the newly created rule is visible via the
// API (see issue GH-4721). Retry the `findNetworkAclRule` function until it is
// visible (which in most cases is likely immediately).
var r *ec2.NetworkAclEntry
err = resource.Retry(3*time.Minute, func() *resource.RetryError {
r, findErr := findNetworkAclRule(d, meta)
if findErr != nil {
return resource.RetryableError(findErr)
r, err = findNetworkAclRule(d, meta)
if err != nil {
return resource.RetryableError(err)
}
if r == nil {
err := fmt.Errorf("Network ACL rule (%s) not found", d.Id())
return resource.RetryableError(err)
return resource.RetryableError(fmt.Errorf("Network ACL rule (%s) not found", d.Id()))
}

return nil
})
if isResourceTimeoutError(err) {
r, err = findNetworkAclRule(d, meta)
if r == nil {
return fmt.Errorf("Network ACL rule (%s) not found", d.Id())
}
}
if err != nil {
return fmt.Errorf("Created Network ACL Rule was not visible in API within 3 minute period. Running 'terraform apply' again will resume infrastructure creation.")
}
Expand Down