Skip to content

Commit

Permalink
TM-720: enable scheduled ssm command monitoring (#8785)
Browse files Browse the repository at this point in the history
* align main.tf across accounts

* enable ssm monitoring and widgets

* fix

* fix

* remove alarm
  • Loading branch information
drobinson-moj authored Nov 25, 2024
1 parent 4a68669 commit 89fb79a
Show file tree
Hide file tree
Showing 22 changed files with 68 additions and 11 deletions.
2 changes: 2 additions & 0 deletions terraform/environments/corporate-staff-rostering/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ locals {
"ec2_linux",
"ec2_instance_linux",
"ec2_instance_oracle_db_with_backup",
"ssm_command",
]
cloudwatch_metric_alarms_default_actions = ["pagerduty"]
cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"]
Expand All @@ -45,6 +46,7 @@ locals {
enable_s3_db_backup_bucket = true
enable_s3_shared_bucket = true
enable_s3_software_bucket = true
enable_ssm_command_monitoring = true
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
software_bucket_name = "csr-software"
}
Expand Down
6 changes: 6 additions & 0 deletions terraform/environments/corporate-staff-rostering/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module "baseline" {
)

cloudwatch_metric_alarms = merge(
module.baseline_presets.cloudwatch_metric_alarms_baseline,
lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}),
lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}),
)
Expand Down Expand Up @@ -177,6 +178,11 @@ module "baseline" {
lookup(local.baseline_environment_specific, "s3_buckets", {}),
)

schedule_alarms_lambda = merge(
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
module.baseline_presets.secretsmanager_secrets,
lookup(local.baseline_all_environments, "secretsmanager_secrets", {}),
Expand Down
2 changes: 2 additions & 0 deletions terraform/environments/hmpps-domain-services/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ locals {
"lb",
"ec2",
"ec2_windows",
"ssm_command",
]
cloudwatch_metric_alarms_default_actions = ["pagerduty"]
cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"]
Expand All @@ -38,6 +39,7 @@ locals {
enable_hmpps_domain = true
enable_image_builder = true
enable_s3_bucket = true
enable_ssm_command_monitoring = true
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ locals {
})
}

schedule_alarms = {
schedule_alarms_lambda = {
function_name = "schedule-alarms"
alarm_patterns = [
"public-https-*-https-unhealthy-load-balancer-host",
"public-https-*-unhealthy-load-balancer-host",
]
}

Expand Down
8 changes: 3 additions & 5 deletions terraform/environments/hmpps-domain-services/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module "baseline" {
)

cloudwatch_metric_alarms = merge(
module.baseline_presets.cloudwatch_metric_alarms_baseline,
lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}),
lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}),
)
Expand Down Expand Up @@ -178,11 +179,8 @@ module "baseline" {
)

schedule_alarms_lambda = merge(
{
function_name = "schedule-alarms"
},
lookup(local.baseline_all_environments, "schedule_alarms", {}),
lookup(local.baseline_environment_specific, "schedule_alarms", {}),
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
Expand Down
1 change: 1 addition & 0 deletions terraform/environments/hmpps-oem/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ locals {
enable_s3_shared_bucket = true
enable_s3_software_bucket = true
enable_ssm_command_monitoring = true
enable_ssm_missing_metric_monitoring = true
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ locals {
csr-r4-pp = ["r4.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"]
csr-r5-pp = ["r5.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"]
csr-r6-pp = ["r6.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"]
hpa-preprod = ["hpa-preprod.service.hmpps.dsd.io", true, "azure-fixngo-pagerduty"]
nomis-lsast = ["c.lsast-nomis.az.justice.gov.uk", true, "nomis-pagerduty"]
nomis-pp = ["c.pp-nomis.az.justice.gov.uk", true, "nomis-pagerduty"]
nomis-reporting-pp = ["reporting.pp-nomis.az.justice.gov.uk", true, "nomis-combined-reporting-pagerduty"]
Expand Down
4 changes: 2 additions & 2 deletions terraform/environments/hmpps-oem/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ module "baseline" {
)

schedule_alarms_lambda = merge(
lookup(local.baseline_all_environments, "schedule_alarms", {}),
lookup(local.baseline_environment_specific, "schedule_alarms", {}),
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
Expand Down
2 changes: 2 additions & 0 deletions terraform/environments/nomis-combined-reporting/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ locals {
"ec2_instance_linux",
"ec2_instance_oracle_db_with_backup",
"ec2_windows",
"ssm_command",
]
cloudwatch_metric_alarms_default_actions = ["pagerduty"]
cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"]
Expand All @@ -44,6 +45,7 @@ locals {
enable_s3_bucket = true
enable_s3_db_backup_bucket = true
enable_s3_software_bucket = true
enable_ssm_command_monitoring = true
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
}
}
Expand Down
6 changes: 6 additions & 0 deletions terraform/environments/nomis-combined-reporting/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module "baseline" {
)

cloudwatch_metric_alarms = merge(
module.baseline_presets.cloudwatch_metric_alarms_baseline,
lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}),
lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}),
)
Expand Down Expand Up @@ -177,6 +178,11 @@ module "baseline" {
lookup(local.baseline_environment_specific, "s3_buckets", {}),
)

schedule_alarms_lambda = merge(
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
module.baseline_presets.secretsmanager_secrets,
lookup(local.baseline_all_environments, "secretsmanager_secrets", {}),
Expand Down
2 changes: 2 additions & 0 deletions terraform/environments/nomis-data-hub/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ locals {
"ec2_instance_linux",
"ec2_instance_textfile_monitoring",
"ec2_windows",
"ssm_command",
]
cloudwatch_metric_alarms_default_actions = ["pagerduty"]
cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"]
Expand All @@ -42,6 +43,7 @@ locals {
enable_image_builder = true
enable_s3_bucket = true
enable_s3_software_bucket = true
enable_ssm_command_monitoring = true
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
}
}
Expand Down
6 changes: 6 additions & 0 deletions terraform/environments/nomis-data-hub/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module "baseline" {
)

cloudwatch_metric_alarms = merge(
module.baseline_presets.cloudwatch_metric_alarms_baseline,
lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}),
lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}),
)
Expand Down Expand Up @@ -177,6 +178,11 @@ module "baseline" {
lookup(local.baseline_environment_specific, "s3_buckets", {}),
)

schedule_alarms_lambda = merge(
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
module.baseline_presets.secretsmanager_secrets,
lookup(local.baseline_all_environments, "secretsmanager_secrets", {}),
Expand Down
1 change: 1 addition & 0 deletions terraform/environments/nomis/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ locals {
enable_s3_bucket = true
enable_s3_db_backup_bucket = true
enable_s3_software_bucket = true
enable_ssm_command_monitoring = true
route53_resolver_rules = { outbound-data-and-private-subnets = ["azure-fixngo-domain"] }
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
software_bucket_name = "ec2-image-builder-nomis"
Expand Down
6 changes: 6 additions & 0 deletions terraform/environments/nomis/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module "baseline" {
)

cloudwatch_metric_alarms = merge(
module.baseline_presets.cloudwatch_metric_alarms_baseline,
lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}),
lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}),
)
Expand Down Expand Up @@ -177,6 +178,11 @@ module "baseline" {
lookup(local.baseline_environment_specific, "s3_buckets", {}),
)

schedule_alarms_lambda = merge(
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
module.baseline_presets.secretsmanager_secrets,
lookup(local.baseline_all_environments, "secretsmanager_secrets", {}),
Expand Down
2 changes: 2 additions & 0 deletions terraform/environments/oasys-national-reporting/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ locals {
"ec2_linux",
"ec2_instance_linux",
"ec2_windows",
"ssm_command",
]
cloudwatch_metric_alarms_default_actions = ["pagerduty"]
cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"]
Expand All @@ -41,6 +42,7 @@ locals {
enable_image_builder = true
enable_s3_bucket = true
enable_s3_shared_bucket = true
enable_ssm_command_monitoring = true
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
}
}
Expand Down
6 changes: 6 additions & 0 deletions terraform/environments/oasys-national-reporting/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module "baseline" {
)

cloudwatch_metric_alarms = merge(
module.baseline_presets.cloudwatch_metric_alarms_baseline,
lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}),
lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}),
)
Expand Down Expand Up @@ -177,6 +178,11 @@ module "baseline" {
lookup(local.baseline_environment_specific, "s3_buckets", {}),
)

schedule_alarms_lambda = merge(
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
module.baseline_presets.secretsmanager_secrets,
lookup(local.baseline_all_environments, "secretsmanager_secrets", {}),
Expand Down
2 changes: 2 additions & 0 deletions terraform/environments/oasys/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ locals {
"ec2_instance_linux",
"ec2_instance_oracle_db_with_backup",
"ec2_instance_textfile_monitoring",
"ssm_command",
]
cloudwatch_metric_alarms_default_actions = ["pagerduty"]
cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"]
Expand All @@ -45,6 +46,7 @@ locals {
enable_s3_bucket = true
enable_s3_db_backup_bucket = true
enable_s3_shared_bucket = true
enable_ssm_command_monitoring = true
enable_vmimport = true
s3_bucket_name = "${local.application_name}-${local.environment}"
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
Expand Down
6 changes: 6 additions & 0 deletions terraform/environments/oasys/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module "baseline" {
)

cloudwatch_metric_alarms = merge(
module.baseline_presets.cloudwatch_metric_alarms_baseline,
lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}),
lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}),
)
Expand Down Expand Up @@ -177,6 +178,11 @@ module "baseline" {
lookup(local.baseline_environment_specific, "s3_buckets", {}),
)

schedule_alarms_lambda = merge(
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
module.baseline_presets.secretsmanager_secrets,
lookup(local.baseline_all_environments, "secretsmanager_secrets", {}),
Expand Down
2 changes: 2 additions & 0 deletions terraform/environments/planetfm/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ locals {
"network_lb",
"ec2",
"ec2_windows",
"ssm_command",
]
cloudwatch_metric_alarms_default_actions = ["pagerduty"]
cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"]
Expand All @@ -39,6 +40,7 @@ locals {
enable_image_builder = true
enable_s3_bucket = true
enable_s3_software_bucket = true
enable_ssm_command_monitoring = true
s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"]
}
}
Expand Down
6 changes: 6 additions & 0 deletions terraform/environments/planetfm/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module "baseline" {
)

cloudwatch_metric_alarms = merge(
module.baseline_presets.cloudwatch_metric_alarms_baseline,
lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}),
lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}),
)
Expand Down Expand Up @@ -177,6 +178,11 @@ module "baseline" {
lookup(local.baseline_environment_specific, "s3_buckets", {}),
)

schedule_alarms_lambda = merge(
lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}),
lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}),
)

secretsmanager_secrets = merge(
module.baseline_presets.secretsmanager_secrets,
lookup(local.baseline_all_environments, "secretsmanager_secrets", {}),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ locals {
var.options.enable_ssm_command_monitoring ? {
"failed-ssm-command-${var.environment.account_name}" = local.cloudwatch_metric_alarms_by_sns_topic["dso-pipelines-pagerduty"].ssm.failed-ssm-command
} : {},
var.options.enable_ssm_command_monitoring ? {
var.options.enable_ssm_missing_metric_monitoring ? {
"ssm-command-metrics-missing-${var.environment.account_name}" = local.cloudwatch_metric_alarms_by_sns_topic["dso-pipelines-pagerduty"].ssm.ssm-command-metrics-missing
} : {},
)
Expand Down
1 change: 1 addition & 0 deletions terraform/modules/baseline_presets/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ variable "options" {
enable_s3_shared_bucket = optional(bool, false) # create devtest and preprodprod S3 bucket for sharing between accounts
enable_s3_software_bucket = optional(bool, false) # create software S3 bucket in test account for image builder/configuration-management
enable_ssm_command_monitoring = optional(bool, false) # create SNS topic and alarms for SSM command monitoring
enable_ssm_missing_metric_monitoring = optional(bool, false) # create alarm if SSM command metrics are missing
enable_vmimport = optional(bool, false) # create role for vm imports
route53_resolver_rules = optional(map(list(string)), {}) # create route53 resolver rules; list of map keys to filter local.route53_resolver_rules_all
iam_service_linked_roles = optional(list(string)) # create iam service linked roles; list of map keys to filter local.iam_service_linked_roles; default is to create all
Expand Down

0 comments on commit 89fb79a

Please sign in to comment.