From a25165b6d1fe3e4b6541db1d565bc9ec83cbf263 Mon Sep 17 00:00:00 2001 From: Abdul Muqtadir Mohammed Date: Tue, 9 Mar 2021 17:30:55 -0600 Subject: [PATCH] Provide AutoScalingGroupName for webhook notifications (#384) --- pkg/monitor/sqsevent/asg-lifecycle-event.go | 13 ++++---- .../sqsevent/ec2-state-change-event.go | 15 +++++---- .../rebalance-recommendation-event.go | 14 ++++---- pkg/monitor/sqsevent/spot-itn-event.go | 15 +++++---- pkg/monitor/sqsevent/sqs-monitor.go | 33 ++++++++++++------- pkg/monitor/types.go | 26 ++++++++------- 6 files changed, 66 insertions(+), 50 deletions(-) diff --git a/pkg/monitor/sqsevent/asg-lifecycle-event.go b/pkg/monitor/sqsevent/asg-lifecycle-event.go index f4a88598..c3960764 100644 --- a/pkg/monitor/sqsevent/asg-lifecycle-event.go +++ b/pkg/monitor/sqsevent/asg-lifecycle-event.go @@ -70,12 +70,13 @@ func (m SQSMonitor) asgTerminationToInterruptionEvent(event EventBridgeEvent, me } interruptionEvent := monitor.InterruptionEvent{ - EventID: fmt.Sprintf("asg-lifecycle-term-%x", event.ID), - Kind: SQSTerminateKind, - StartTime: event.getTime(), - NodeName: nodeName, - InstanceID: lifecycleDetail.EC2InstanceID, - Description: fmt.Sprintf("ASG Lifecycle Termination event received. Instance will be interrupted at %s \n", event.getTime()), + EventID: fmt.Sprintf("asg-lifecycle-term-%x", event.ID), + Kind: SQSTerminateKind, + AutoScalingGroupName: lifecycleDetail.AutoScalingGroupName, + StartTime: event.getTime(), + NodeName: nodeName, + InstanceID: lifecycleDetail.EC2InstanceID, + Description: fmt.Sprintf("ASG Lifecycle Termination event received. Instance will be interrupted at %s \n", event.getTime()), } interruptionEvent.PostDrainTask = func(interruptionEvent monitor.InterruptionEvent, _ node.Node) error { diff --git a/pkg/monitor/sqsevent/ec2-state-change-event.go b/pkg/monitor/sqsevent/ec2-state-change-event.go index 7db7fff8..9fa37c8d 100644 --- a/pkg/monitor/sqsevent/ec2-state-change-event.go +++ b/pkg/monitor/sqsevent/ec2-state-change-event.go @@ -65,14 +65,15 @@ func (m SQSMonitor) ec2StateChangeToInterruptionEvent(event EventBridgeEvent, me if err != nil { return monitor.InterruptionEvent{}, err } - + asgName, err := m.retrieveAutoScalingGroupName(ec2StateChangeDetail.InstanceID) interruptionEvent := monitor.InterruptionEvent{ - EventID: fmt.Sprintf("ec2-state-change-event-%x", event.ID), - Kind: SQSTerminateKind, - StartTime: event.getTime(), - NodeName: nodeName, - InstanceID: ec2StateChangeDetail.InstanceID, - Description: fmt.Sprintf("EC2 State Change event received. Instance went into %s at %s \n", ec2StateChangeDetail.State, event.getTime()), + EventID: fmt.Sprintf("ec2-state-change-event-%x", event.ID), + Kind: SQSTerminateKind, + StartTime: event.getTime(), + NodeName: nodeName, + AutoScalingGroupName: asgName, + InstanceID: ec2StateChangeDetail.InstanceID, + Description: fmt.Sprintf("EC2 State Change event received. Instance went into %s at %s \n", ec2StateChangeDetail.State, event.getTime()), } interruptionEvent.PostDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error { errs := m.deleteMessages([]*sqs.Message{message}) diff --git a/pkg/monitor/sqsevent/rebalance-recommendation-event.go b/pkg/monitor/sqsevent/rebalance-recommendation-event.go index b12d5a40..e6a8876e 100644 --- a/pkg/monitor/sqsevent/rebalance-recommendation-event.go +++ b/pkg/monitor/sqsevent/rebalance-recommendation-event.go @@ -57,14 +57,16 @@ func (m SQSMonitor) rebalanceRecommendationToInterruptionEvent(event EventBridge if err != nil { return monitor.InterruptionEvent{}, err } + asgName, err := m.retrieveAutoScalingGroupName(rebalanceRecDetail.InstanceID) interruptionEvent := monitor.InterruptionEvent{ - EventID: fmt.Sprintf("rebalance-recommendation-event-%x", event.ID), - Kind: SQSTerminateKind, - StartTime: event.getTime(), - NodeName: nodeName, - InstanceID: rebalanceRecDetail.InstanceID, - Description: fmt.Sprintf("Rebalance recommendation event received. Instance will be cordoned at %s \n", event.getTime()), + EventID: fmt.Sprintf("rebalance-recommendation-event-%x", event.ID), + Kind: SQSTerminateKind, + AutoScalingGroupName: asgName, + StartTime: event.getTime(), + NodeName: nodeName, + InstanceID: rebalanceRecDetail.InstanceID, + Description: fmt.Sprintf("Rebalance recommendation event received. Instance will be cordoned at %s \n", event.getTime()), } interruptionEvent.PostDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error { errs := m.deleteMessages([]*sqs.Message{message}) diff --git a/pkg/monitor/sqsevent/spot-itn-event.go b/pkg/monitor/sqsevent/spot-itn-event.go index 6b578619..e77ae9a5 100644 --- a/pkg/monitor/sqsevent/spot-itn-event.go +++ b/pkg/monitor/sqsevent/spot-itn-event.go @@ -59,14 +59,15 @@ func (m SQSMonitor) spotITNTerminationToInterruptionEvent(event EventBridgeEvent if err != nil { return monitor.InterruptionEvent{}, err } - + asgName, err := m.retrieveAutoScalingGroupName(spotInterruptionDetail.InstanceID) interruptionEvent := monitor.InterruptionEvent{ - EventID: fmt.Sprintf("spot-itn-event-%x", event.ID), - Kind: SQSTerminateKind, - StartTime: event.getTime(), - NodeName: nodeName, - InstanceID: spotInterruptionDetail.InstanceID, - Description: fmt.Sprintf("Spot Interruption event received. Instance will be interrupted at %s \n", event.getTime()), + EventID: fmt.Sprintf("spot-itn-event-%x", event.ID), + Kind: SQSTerminateKind, + AutoScalingGroupName: asgName, + StartTime: event.getTime(), + NodeName: nodeName, + InstanceID: spotInterruptionDetail.InstanceID, + Description: fmt.Sprintf("Spot Interruption event received. Instance will be interrupted at %s \n", event.getTime()), } interruptionEvent.PostDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error { errs := m.deleteMessages([]*sqs.Message{message}) diff --git a/pkg/monitor/sqsevent/sqs-monitor.go b/pkg/monitor/sqsevent/sqs-monitor.go index 470553cd..6f1b66df 100644 --- a/pkg/monitor/sqsevent/sqs-monitor.go +++ b/pkg/monitor/sqsevent/sqs-monitor.go @@ -222,20 +222,11 @@ func (m SQSMonitor) isInstanceManaged(instanceID string) (bool, error) { if instanceID == "" { return false, fmt.Errorf("Instance ID was empty when calling isInstanceManaged") } - asgDescribeInstanceInput := autoscaling.DescribeAutoScalingInstancesInput{ - InstanceIds: []*string{&instanceID}, - MaxRecords: aws.Int64(50), - } - asgs, err := m.ASG.DescribeAutoScalingInstances(&asgDescribeInstanceInput) - if err != nil { + asgName, err := m.retrieveAutoScalingGroupName(instanceID) + if asgName == "" { return false, err } - if len(asgs.AutoScalingInstances) == 0 { - log.Debug().Str("instance_id", instanceID).Msg("Did not find an Auto Scaling Group for the given instance id") - return false, nil - } - asgName := asgs.AutoScalingInstances[0].AutoScalingGroupName - asgFilter := autoscaling.Filter{Name: aws.String("auto-scaling-group"), Values: []*string{asgName}} + asgFilter := autoscaling.Filter{Name: aws.String("auto-scaling-group"), Values: []*string{aws.String(asgName)}} asgDescribeTagsInput := autoscaling.DescribeTagsInput{ Filters: []*autoscaling.Filter{&asgFilter}, } @@ -259,3 +250,21 @@ func (m SQSMonitor) isInstanceManaged(instanceID string) (bool, error) { } return isManaged, err } + +// retrieveAutoScalingGroupName returns the autoscaling group name for a given instanceID +func (m SQSMonitor) retrieveAutoScalingGroupName(instanceID string) (string, error) { + asgDescribeInstanceInput := autoscaling.DescribeAutoScalingInstancesInput{ + InstanceIds: []*string{&instanceID}, + MaxRecords: aws.Int64(50), + } + asgs, err := m.ASG.DescribeAutoScalingInstances(&asgDescribeInstanceInput) + if err != nil { + return "", err + } + if len(asgs.AutoScalingInstances) == 0 { + log.Debug().Str("instance_id", instanceID).Msg("Did not find an Auto Scaling Group for the given instance id") + return "", nil + } + asgName := asgs.AutoScalingInstances[0].AutoScalingGroupName + return *asgName, err +} diff --git a/pkg/monitor/types.go b/pkg/monitor/types.go index a237088a..e84b8c07 100644 --- a/pkg/monitor/types.go +++ b/pkg/monitor/types.go @@ -24,18 +24,20 @@ type DrainTask func(InterruptionEvent, node.Node) error // InterruptionEvent gives more context of the interruption event type InterruptionEvent struct { - EventID string - Kind string - Description string - State string - NodeName string - InstanceID string - StartTime time.Time - EndTime time.Time - Drained bool - InProgress bool - PreDrainTask DrainTask `json:"-"` - PostDrainTask DrainTask `json:"-"` + EventID string + Kind string + Description string + State string + AutoScalingGroupName string + NodeName string + NodeLabels map[string]string + InstanceID string + StartTime time.Time + EndTime time.Time + Drained bool + InProgress bool + PreDrainTask DrainTask `json:"-"` + PostDrainTask DrainTask `json:"-"` } // TimeUntilEvent returns the duration until the event start time