Skip to content

Commit

Permalink
Tm 570 automate disabling of cloud watch alarms for non prod environm…
Browse files Browse the repository at this point in the history
…ents mp (#8231)
  • Loading branch information
andrewmooreio authored Oct 14, 2024
1 parent b2a1f09 commit 4083aac
Show file tree
Hide file tree
Showing 7 changed files with 261 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module "cloudwatch_alarms_disable" {
source = "../../../../modules/disable_alarms_lambda"

lambda_function_name = "${var.account_info.application_name}-${var.env_name}-disable-alarms"

tags = local.tags
}
54 changes: 54 additions & 0 deletions terraform/modules/disable_alarms_lambda/lambda/disable_alarms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import logging
import os

import boto3

# Set up logging
log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
logger = logging.getLogger()
logger.setLevel(log_level)


def lambda_handler(event, context):
try:
# Get parameters from environment variables and event
action = event.get("ACTION")
specific_alarms = (
os.environ.get("SPECIFIC_ALARMS", "").split(",")
if os.environ.get("SPECIFIC_ALARMS")
else []
)

if not action:
raise ValueError("ACTION not provided in the event")

# Create CloudWatch client
cloudwatch = boto3.client("cloudwatch")

# Get all alarms if no specific alarms are provided
if not specific_alarms:
response = cloudwatch.describe_alarms()
specific_alarms = [
alarm["AlarmName"] for alarm in response["MetricAlarms"]
]

# Perform action on alarms
for alarm_name in specific_alarms:
if action == "DISABLE":
cloudwatch.disable_alarm_actions(AlarmNames=[alarm_name])
logger.info(f"Disabled alarm: {alarm_name}")
elif action == "ENABLE":
cloudwatch.enable_alarm_actions(AlarmNames=[alarm_name])
logger.info(f"Enabled alarm: {alarm_name}")
else:
raise ValueError(f"Invalid action: {action}")

logger.info(f"Processed {len(specific_alarms)} alarms")

return {
"statusCode": 200,
"body": f"Alarms {action.lower()}d successfully",
}
except Exception as e:
logger.error(f"Error processing alarms: {str(e)}")
return {"statusCode": 500, "body": f"Error processing alarms: {str(e)}"}
33 changes: 33 additions & 0 deletions terraform/modules/disable_alarms_lambda/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
locals {
weekday_schedule = {
"disable_alarms_weekday" = {
name = "disable-alarms-weekday"
description = "Disable alarms on weekdays"
schedule = "cron(${split(":", var.start_time)[1]} ${split(":", var.start_time)[0]} ? * MON-FRI *)"
action = "disable"
},
"enable_alarms_weekday" = {
name = "enable-alarms-weekday"
description = "Enable alarms on weekdays"
schedule = "cron(${split(":", var.end_time)[1]} ${split(":", var.end_time)[0]} ? * MON-FRI *)"
action = "enable"
}
}

weekend_schedule = var.disable_weekend ? {
"disable_alarms_weekend" = {
name = "disable-alarms-weekend"
description = "Disable alarms on weekends"
schedule = "cron(${split(":", var.start_time)[1]} ${split(":", var.start_time)[0]} ? * FRI *)"
action = "disable"
},
"enable_alarms_monday" = {
name = "enable-alarms-monday"
description = "Enable alarms on Monday"
schedule = "cron(${split(":", var.end_time)[1]} ${split(":", var.end_time)[0]} ? * MON *)"
action = "enable"
}
} : {}

schedule_rules = merge(local.weekday_schedule, local.weekend_schedule)
}
85 changes: 85 additions & 0 deletions terraform/modules/disable_alarms_lambda/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
data "archive_file" "lambda_function_payload" {
type = "zip"
source_dir = "${path.module}/lambda/"
output_path = "${path.module}/lambda/disable_alarms.zip"
excludes = ["disable_alarms.zip"]
}

resource "aws_lambda_function" "disable_alarms" {
filename = "${path.module}/lambda/disable_alarms.zip"
function_name = var.lambda_function_name
architectures = ["arm64"]
role = aws_iam_role.lambda_exec.arn
runtime = "python3.12"
handler = "disable_alarms.lambda_handler"
source_code_hash = data.archive_file.lambda_function_payload.output_base64sha256

environment {
variables = {
LOG_LEVEL = var.lambda_log_level
SPECIFIC_ALARMS = tostring(join(",", var.alarm_list))
}
}

tags = var.tags
}

resource "aws_cloudwatch_log_group" "execution_logs" {
name = format("/aws/lambda/%s", var.lambda_function_name)
retention_in_days = 7

tags = var.tags
}

resource "aws_iam_role" "lambda_exec" {
name = "${var.lambda_function_name}-role"
assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json

tags = var.tags
}

data "aws_iam_policy_document" "lambda_assume_role" {
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["lambda.amazonaws.com"]
}
}
}

resource "aws_iam_role_policy" "lambda_logging" {
name = "${var.lambda_function_name}-logging-policy"
role = aws_iam_role.lambda_exec.id
policy = data.aws_iam_policy_document.lambda_logging.json
}

data "aws_iam_policy_document" "lambda_logging" {
statement {
effect = "Allow"
actions = [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents",
]
resources = ["arn:aws:logs:*:*:*"]
}
}

resource "aws_iam_role_policy" "lambda_cloudwatch" {
name = "${var.lambda_function_name}-cloudwatch-policy"
role = aws_iam_role.lambda_exec.id
policy = data.aws_iam_policy_document.lambda_cloudwatch.json
}

data "aws_iam_policy_document" "lambda_cloudwatch" {
statement {
effect = "Allow"
actions = [
"cloudwatch:DescribeAlarms",
"cloudwatch:DisableAlarmActions",
"cloudwatch:EnableAlarmActions",
]
resources = ["arn:aws:cloudwatch:*:*:alarm:*"]
}
}
14 changes: 14 additions & 0 deletions terraform/modules/disable_alarms_lambda/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
output "lambda_function_arn" {
description = "The ARN of the Lambda function"
value = aws_lambda_function.disable_alarms.arn
}

output "lambda_function_name" {
description = "The name of the Lambda function"
value = aws_lambda_function.disable_alarms.function_name
}

# output "cloudwatch_event_rule_arns" {
# description = "The ARNs of the CloudWatch Event Rules"
# value = { for k, v in aws_cloudwatch_event_rule.alarm_scheduler : k => v.arn }
# }
55 changes: 55 additions & 0 deletions terraform/modules/disable_alarms_lambda/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
variable "lambda_function_name" {
description = "Name of the Lambda function"
type = string
}

variable "tags" {
description = "Tags to apply to resources"
type = map(string)
default = {}
}

variable "start_time" {
description = "Start time for disabling alarms (HH:MM)"
type = string
default = "22:45"

validation {
condition = can(regex("^([0-1][0-9]|2[0-3]):[0-5][0-9]$", var.start_time))
error_message = "Start time must be in the format HH:MM (24-hour clock)."
}
}

variable "end_time" {
description = "End time for enabling alarms (HH:MM)"
type = string
default = "06:15"

validation {
condition = can(regex("^([0-1][0-9]|2[0-3]):[0-5][0-9]$", var.end_time))
error_message = "Start time must be in the format HH:MM (24-hour clock)."
}
}

variable "disable_weekend" {
description = "Whether to disable alarms for the entire weekend"
type = bool
default = true
}

variable "lambda_log_level" {
description = "Log level for the Lambda function"
type = string
default = "INFO"

validation {
condition = contains(["DEBUG", "INFO", "WARNING", "ERROR"], var.lambda_log_level)
error_message = "Log level must be one of DEBUG, INFO, WARNING, or ERROR"
}
}

variable "alarm_list" {
description = "List of specific alarms to manage (empty list means all alarms)"
type = list(string)
default = []
}
13 changes: 13 additions & 0 deletions terraform/modules/disable_alarms_lambda/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
terraform {
required_providers {
aws = {
version = "~> 5.0"
source = "hashicorp/aws"
}
archive = {
version = "~> 2.0"
source = "hashicorp/archive"
}
}
required_version = "~> 1.8"
}

0 comments on commit 4083aac

Please sign in to comment.