Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

provider/aws: Handle all kinds of CloudFormation stack failures #5606

Merged
merged 2 commits into from
Sep 3, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 183 additions & 65 deletions builtin/providers/aws/resource_aws_cloudformation_stack.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,29 +145,50 @@ func resourceAwsCloudFormationStackCreate(d *schema.ResourceData, meta interface
}

d.SetId(*resp.StackId)
var lastStatus string

wait := resource.StateChangeConf{
Pending: []string{"CREATE_IN_PROGRESS", "ROLLBACK_IN_PROGRESS", "ROLLBACK_COMPLETE"},
Target: []string{"CREATE_COMPLETE"},
Pending: []string{
"CREATE_IN_PROGRESS",
"DELETE_IN_PROGRESS",
"ROLLBACK_IN_PROGRESS",
},
Target: []string{
"CREATE_COMPLETE",
"CREATE_FAILED",
"DELETE_COMPLETE",
"DELETE_FAILED",
"ROLLBACK_COMPLETE",
"ROLLBACK_FAILED",
},
Timeout: time.Duration(retryTimeout) * time.Minute,
MinTimeout: 5 * time.Second,
MinTimeout: 1 * time.Second,
Refresh: func() (interface{}, string, error) {
resp, err := conn.DescribeStacks(&cloudformation.DescribeStacksInput{
StackName: aws.String(d.Get("name").(string)),
StackName: aws.String(d.Id()),
})
if err != nil {
log.Printf("[ERROR] Failed to describe stacks: %s", err)
return nil, "", err
}
if len(resp.Stacks) == 0 {
// This shouldn't happen unless CloudFormation is inconsistent
// See https://github.com/hashicorp/terraform/issues/5487
log.Printf("[WARN] CloudFormation stack %q not found.\nresponse: %q",
d.Id(), resp)
return resp, "", fmt.Errorf(
"CloudFormation stack %q vanished unexpectedly during creation.\n"+
"Unless you knowingly manually deleted the stack "+
"please report this as bug at https://github.com/hashicorp/terraform/issues\n"+
"along with the config & Terraform version & the details below:\n"+
"Full API response: %s\n",
d.Id(), resp)
}

status := *resp.Stacks[0].StackStatus
lastStatus = status
log.Printf("[DEBUG] Current CloudFormation stack status: %q", status)

if status == "ROLLBACK_COMPLETE" {
stack := resp.Stacks[0]
failures, err := getCloudFormationFailures(stack.StackName, *stack.CreationTime, conn)
if err != nil {
return resp, "", fmt.Errorf(
"Failed getting details about rollback: %q", err.Error())
}

return resp, "", fmt.Errorf("ROLLBACK_COMPLETE:\n%q", failures)
}
return resp, status, err
},
}
Expand All @@ -177,26 +198,58 @@ func resourceAwsCloudFormationStackCreate(d *schema.ResourceData, meta interface
return err
}

log.Printf("[INFO] CloudFormation Stack %q created", d.Get("name").(string))
if lastStatus == "ROLLBACK_COMPLETE" || lastStatus == "ROLLBACK_FAILED" {
reasons, err := getCloudFormationRollbackReasons(d.Id(), nil, conn)
if err != nil {
return fmt.Errorf("Failed getting rollback reasons: %q", err.Error())
}

return fmt.Errorf("%s: %q", lastStatus, reasons)
}
if lastStatus == "DELETE_COMPLETE" || lastStatus == "DELETE_FAILED" {
reasons, err := getCloudFormationDeletionReasons(d.Id(), conn)
if err != nil {
return fmt.Errorf("Failed getting deletion reasons: %q", err.Error())
}

d.SetId("")
return fmt.Errorf("%s: %q", lastStatus, reasons)
}
if lastStatus == "CREATE_FAILED" {
reasons, err := getCloudFormationFailures(d.Id(), conn)
if err != nil {
return fmt.Errorf("Failed getting failure reasons: %q", err.Error())
}
return fmt.Errorf("%s: %q", lastStatus, reasons)
}

log.Printf("[INFO] CloudFormation Stack %q created", d.Id())

return resourceAwsCloudFormationStackRead(d, meta)
}

func resourceAwsCloudFormationStackRead(d *schema.ResourceData, meta interface{}) error {
conn := meta.(*AWSClient).cfconn
stackName := d.Get("name").(string)

input := &cloudformation.DescribeStacksInput{
StackName: aws.String(stackName),
StackName: aws.String(d.Id()),
}
resp, err := conn.DescribeStacks(input)
if err != nil {
awsErr, ok := err.(awserr.Error)
// ValidationError: Stack with id % does not exist
if ok && awsErr.Code() == "ValidationError" {
log.Printf("[WARN] Removing CloudFormation stack %s as it's already gone", d.Id())
d.SetId("")
return nil
}

return err
}

stacks := resp.Stacks
if len(stacks) < 1 {
log.Printf("[DEBUG] Removing CloudFormation stack %s as it's already gone", d.Id())
log.Printf("[WARN] Removing CloudFormation stack %s as it's already gone", d.Id())
d.SetId("")
return nil
}
Expand All @@ -210,7 +263,7 @@ func resourceAwsCloudFormationStackRead(d *schema.ResourceData, meta interface{}
}

tInput := cloudformation.GetTemplateInput{
StackName: aws.String(stackName),
StackName: aws.String(d.Id()),
}
out, err := conn.GetTemplate(&tInput)
if err != nil {
Expand Down Expand Up @@ -272,7 +325,7 @@ func resourceAwsCloudFormationStackUpdate(d *schema.ResourceData, meta interface
conn := meta.(*AWSClient).cfconn

input := &cloudformation.UpdateStackInput{
StackName: aws.String(d.Get("name").(string)),
StackName: aws.String(d.Id()),
}

// Either TemplateBody, TemplateURL or UsePreviousTemplate are required
Expand Down Expand Up @@ -310,7 +363,7 @@ func resourceAwsCloudFormationStackUpdate(d *schema.ResourceData, meta interface
return err
}

lastUpdatedTime, err := getLastCfEventTimestamp(d.Get("name").(string), conn)
lastUpdatedTime, err := getLastCfEventTimestamp(d.Id(), conn)
if err != nil {
return err
}
Expand All @@ -322,36 +375,34 @@ func resourceAwsCloudFormationStackUpdate(d *schema.ResourceData, meta interface
log.Printf("[DEBUG] CloudFormation timeout: %d", retryTimeout)
}
}
var lastStatus string
wait := resource.StateChangeConf{
Pending: []string{
"UPDATE_COMPLETE_CLEANUP_IN_PROGRESS",
"UPDATE_IN_PROGRESS",
"UPDATE_ROLLBACK_IN_PROGRESS",
"UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS",
},
Target: []string{
"UPDATE_COMPLETE",
"UPDATE_ROLLBACK_COMPLETE",
"UPDATE_ROLLBACK_FAILED",
},
Target: []string{"UPDATE_COMPLETE"},
Timeout: time.Duration(retryTimeout) * time.Minute,
MinTimeout: 5 * time.Second,
Refresh: func() (interface{}, string, error) {
resp, err := conn.DescribeStacks(&cloudformation.DescribeStacksInput{
StackName: aws.String(d.Get("name").(string)),
StackName: aws.String(d.Id()),
})
stack := resp.Stacks[0]
status := *stack.StackStatus
log.Printf("[DEBUG] Current CloudFormation stack status: %q", status)

if status == "UPDATE_ROLLBACK_COMPLETE" {
failures, err := getCloudFormationFailures(stack.StackName, *lastUpdatedTime, conn)
if err != nil {
return resp, "", fmt.Errorf(
"Failed getting details about rollback: %q", err.Error())
}

return resp, "", fmt.Errorf(
"UPDATE_ROLLBACK_COMPLETE:\n%q", failures)
if err != nil {
log.Printf("[ERROR] Failed to describe stacks: %s", err)
return nil, "", err
}

status := *resp.Stacks[0].StackStatus
lastStatus = status
log.Printf("[DEBUG] Current CloudFormation stack status: %q", status)

return resp, status, err
},
}
Expand All @@ -361,6 +412,15 @@ func resourceAwsCloudFormationStackUpdate(d *schema.ResourceData, meta interface
return err
}

if lastStatus == "UPDATE_ROLLBACK_COMPLETE" || lastStatus == "UPDATE_ROLLBACK_FAILED" {
reasons, err := getCloudFormationRollbackReasons(*stack.StackId, lastUpdatedTime, conn)
if err != nil {
return fmt.Errorf("Failed getting details about rollback: %q", err.Error())
}

return fmt.Errorf("%s: %q", lastStatus, reasons)
}

log.Printf("[DEBUG] CloudFormation stack %q has been updated", *stack.StackId)

return resourceAwsCloudFormationStackRead(d, meta)
Expand All @@ -370,7 +430,7 @@ func resourceAwsCloudFormationStackDelete(d *schema.ResourceData, meta interface
conn := meta.(*AWSClient).cfconn

input := &cloudformation.DeleteStackInput{
StackName: aws.String(d.Get("name").(string)),
StackName: aws.String(d.Id()),
}
log.Printf("[DEBUG] Deleting CloudFormation stack %s", input)
_, err := conn.DeleteStack(input)
Expand All @@ -386,37 +446,45 @@ func resourceAwsCloudFormationStackDelete(d *schema.ResourceData, meta interface
}
return err
}

var lastStatus string
wait := resource.StateChangeConf{
Pending: []string{"DELETE_IN_PROGRESS", "ROLLBACK_IN_PROGRESS"},
Target: []string{"DELETE_COMPLETE"},
Pending: []string{
"DELETE_IN_PROGRESS",
"ROLLBACK_IN_PROGRESS",
},
Target: []string{
"DELETE_COMPLETE",
"DELETE_FAILED",
},
Timeout: 30 * time.Minute,
MinTimeout: 5 * time.Second,
Refresh: func() (interface{}, string, error) {
resp, err := conn.DescribeStacks(&cloudformation.DescribeStacksInput{
StackName: aws.String(d.Get("name").(string)),
StackName: aws.String(d.Id()),
})

if err != nil {
awsErr, ok := err.(awserr.Error)
if !ok {
return resp, "DELETE_FAILED", err
return nil, "", err
}

log.Printf("[DEBUG] Error when deleting CloudFormation stack: %s: %s",
awsErr.Code(), awsErr.Message())

// ValidationError: Stack with id % does not exist
if awsErr.Code() == "ValidationError" {
return resp, "DELETE_COMPLETE", nil
}
return nil, "", err
}

if len(resp.Stacks) == 0 {
log.Printf("[DEBUG] CloudFormation stack %q is already gone", d.Get("name"))
log.Printf("[DEBUG] CloudFormation stack %q is already gone", d.Id())
return resp, "DELETE_COMPLETE", nil
}

status := *resp.Stacks[0].StackStatus
lastStatus = status
log.Printf("[DEBUG] Current CloudFormation stack status: %q", status)

return resp, status, err
Expand All @@ -428,6 +496,15 @@ func resourceAwsCloudFormationStackDelete(d *schema.ResourceData, meta interface
return err
}

if lastStatus == "DELETE_FAILED" {
reasons, err := getCloudFormationFailures(d.Id(), conn)
if err != nil {
return fmt.Errorf("Failed getting reasons of failure: %q", err.Error())
}

return fmt.Errorf("%s: %q", lastStatus, reasons)
}

log.Printf("[DEBUG] CloudFormation stack %q has been deleted", d.Id())

d.SetId("")
Expand All @@ -451,32 +528,73 @@ func getLastCfEventTimestamp(stackName string, conn *cloudformation.CloudFormati
return output.StackEvents[0].Timestamp, nil
}

// getCloudFormationFailures returns ResourceStatusReason(s)
// of events that should be failures based on regexp match of status
func getCloudFormationFailures(stackName *string, afterTime time.Time,
conn *cloudformation.CloudFormation) ([]string, error) {
func getCloudFormationRollbackReasons(stackId string, afterTime *time.Time, conn *cloudformation.CloudFormation) ([]string, error) {
var failures []string
// Only catching failures from last 100 events
// Some extra iteration logic via NextToken could be added
// but in reality it's nearly impossible to generate >100
// events by a single stack update
events, err := conn.DescribeStackEvents(&cloudformation.DescribeStackEventsInput{
StackName: stackName,

err := conn.DescribeStackEventsPages(&cloudformation.DescribeStackEventsInput{
StackName: aws.String(stackId),
}, func(page *cloudformation.DescribeStackEventsOutput, lastPage bool) bool {
for _, e := range page.StackEvents {
if afterTime != nil && !e.Timestamp.After(*afterTime) {
continue
}

if cfStackEventIsFailure(e) || cfStackEventIsRollback(e) {
failures = append(failures, *e.ResourceStatusReason)
}
}
return !lastPage
})

if err != nil {
return nil, err
}
return failures, err
}

failRe := regexp.MustCompile("_FAILED$")
rollbackRe := regexp.MustCompile("^ROLLBACK_")
func getCloudFormationDeletionReasons(stackId string, conn *cloudformation.CloudFormation) ([]string, error) {
var failures []string

for _, e := range events.StackEvents {
if (failRe.MatchString(*e.ResourceStatus) || rollbackRe.MatchString(*e.ResourceStatus)) &&
e.Timestamp.After(afterTime) && e.ResourceStatusReason != nil {
failures = append(failures, *e.ResourceStatusReason)
err := conn.DescribeStackEventsPages(&cloudformation.DescribeStackEventsInput{
StackName: aws.String(stackId),
}, func(page *cloudformation.DescribeStackEventsOutput, lastPage bool) bool {
for _, e := range page.StackEvents {
if cfStackEventIsFailure(e) || cfStackEventIsStackDeletion(e) {
failures = append(failures, *e.ResourceStatusReason)
}
}
}
return !lastPage
})

return failures, err
}

func getCloudFormationFailures(stackId string, conn *cloudformation.CloudFormation) ([]string, error) {
var failures []string

err := conn.DescribeStackEventsPages(&cloudformation.DescribeStackEventsInput{
StackName: aws.String(stackId),
}, func(page *cloudformation.DescribeStackEventsOutput, lastPage bool) bool {
for _, e := range page.StackEvents {
if cfStackEventIsFailure(e) {
failures = append(failures, *e.ResourceStatusReason)
}
}
return !lastPage
})

return failures, err
}

func cfStackEventIsFailure(event *cloudformation.StackEvent) bool {
failRe := regexp.MustCompile("_FAILED$")
return failRe.MatchString(*event.ResourceStatus) && event.ResourceStatusReason != nil
}

func cfStackEventIsRollback(event *cloudformation.StackEvent) bool {
rollbackRe := regexp.MustCompile("^ROLLBACK_")
return rollbackRe.MatchString(*event.ResourceStatus) && event.ResourceStatusReason != nil
}

return failures, nil
func cfStackEventIsStackDeletion(event *cloudformation.StackEvent) bool {
return *event.ResourceStatus == "DELETE_IN_PROGRESS" &&
*event.ResourceType == "AWS::CloudFormation::Stack" &&
event.ResourceStatusReason != nil
}