From db5d1cc2faa405f7dde3135d19511396f7ba6291 Mon Sep 17 00:00:00 2001 From: Seth Vargo Date: Tue, 30 Mar 2021 16:11:33 -0400 Subject: [PATCH] Lower data-layer TTLs (#1962) We currently have two TTLs on many services: the minTTL that is enforced via the database lock, and a minTTL that is enforced by the service being invoked by Cloud Scheduler. There were some mismatches over which was the "authorative" schedule. This commit lowers the minTTLs on most services to 5min. This will prevent concurrent runs and crazy DOsing. However, the _intended_ execution schedule's source of truth is Cloud Scheduler. I believe this is the root cause of the recent alert, because the "4 hour" windows between the backup job's minTTL and Cloud Scheduler's 4 hours misaligned. --- pkg/config/backup_config.go | 7 +++---- pkg/config/cleanup_server_config.go | 2 +- pkg/config/rotation_config.go | 2 +- pkg/config/stats_puller_config.go | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pkg/config/backup_config.go b/pkg/config/backup_config.go index a024f37c6..bb5c1db97 100644 --- a/pkg/config/backup_config.go +++ b/pkg/config/backup_config.go @@ -36,10 +36,9 @@ type BackupConfig struct { Port string `env:"PORT, default=8080"` // MinTTL is the minimum amount of time that must elapse between attempting - // backups. This is used to control whether the pull is actually attempted at - // the controller layer, independent of the data layer. In effect, it rate - // limits the number of requests. - MinTTL time.Duration `env:"BACKUP_MIN_PERIOD, default=4h"` + // backups. This is used to control whether the backup is actually executed + // and to prevent concurrent backups. + MinTTL time.Duration `env:"BACKUP_MIN_PERIOD, default=5m"` // Timeout is the maximum amount of time to wait for a backup operation to // complete. diff --git a/pkg/config/cleanup_server_config.go b/pkg/config/cleanup_server_config.go index 8688fca93..26135fde8 100644 --- a/pkg/config/cleanup_server_config.go +++ b/pkg/config/cleanup_server_config.go @@ -46,7 +46,7 @@ type CleanupConfig struct { // Cleanup config AuditEntryMaxAge time.Duration `env:"AUDIT_ENTRY_MAX_AGE, default=720h"` AuthorizedAppMaxAge time.Duration `env:"AUTHORIZED_APP_MAX_AGE, default=336h"` - CleanupMinPeriod time.Duration `env:"CLEANUP_MIN_PERIOD, default=15m"` + CleanupMinPeriod time.Duration `env:"CLEANUP_MIN_PERIOD, default=5m"` // KeyServerStatsMaxAge is the maximum amount of time to retain key-server stats. KeyServerStatsMaxAge time.Duration `env:"KEY_SERVER_STATS_MAX_AGE, default=720h"` MobileAppMaxAge time.Duration `env:"MOBILE_APP_MAX_AGE, default=168h"` diff --git a/pkg/config/rotation_config.go b/pkg/config/rotation_config.go index 7e2108597..b39e771b1 100644 --- a/pkg/config/rotation_config.go +++ b/pkg/config/rotation_config.go @@ -43,7 +43,7 @@ type RotationConfig struct { // rotation events. This is used to control whether rotation is actually // attempted at the controller layer, independent of the data layer. In // effect, it rate limits the number of rotation requests. - MinTTL time.Duration `env:"MIN_TTL, default=15m"` + MinTTL time.Duration `env:"MIN_TTL, default=5m"` // TokenSigning is the token signing configuration. This defines the parent // key and common data like issuer, but the individual versions are controlled diff --git a/pkg/config/stats_puller_config.go b/pkg/config/stats_puller_config.go index 526e4f50a..97259f408 100644 --- a/pkg/config/stats_puller_config.go +++ b/pkg/config/stats_puller_config.go @@ -54,7 +54,7 @@ type StatsPullerConfig struct { // stats-pull events. This is used to control whether the pull is actually // attempted at the controller layer, independent of the data layer. In // effect, it rate limits the number of rotation requests. - MinTTL time.Duration `env:"MIN_TTL, default=15m"` + MinTTL time.Duration `env:"MIN_TTL, default=5m"` // StatsPullerMinPeriod defines the period for which the stats puller will hold a lock // which prevents other calls from entering.