From df3dc5f6df1d1ea82340b20e097dbc2fbe491a9e Mon Sep 17 00:00:00 2001 From: Thorarinn Sigurdsson Date: Thu, 7 Mar 2019 14:12:30 +0100 Subject: [PATCH] fix: increase init delay for liveness probe The initial delay in the liveness probe config for k8s health checks was too short. This was due to the mistaken assumption that k8s would only start running the liveness probe after the readiness probe succeeded. For services that take a long time to start up, the old config would sometimes lead to the liveness probe failing and thus the container being restarted - repeatedly, in some situations. The initial delay and frequency of the readiness probe were also reduced. Later, we should aim to make initialDelaySeconds configurable for the liveness probe. --- .../plugins/kubernetes/container/deployment.ts | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/garden-service/src/plugins/kubernetes/container/deployment.ts b/garden-service/src/plugins/kubernetes/container/deployment.ts index 8a55e235a09..10a261778c0 100644 --- a/garden-service/src/plugins/kubernetes/container/deployment.ts +++ b/garden-service/src/plugins/kubernetes/container/deployment.ts @@ -269,16 +269,23 @@ function deploymentConfig(service: Service, configuredReplicas: number, namespac function configureHealthCheck(container, spec): void { + const readinessPeriodSeconds = 1 + const readinessFailureThreshold = 90 + container.readinessProbe = { - initialDelaySeconds: 10, - periodSeconds: 5, + initialDelaySeconds: 2, + periodSeconds: readinessPeriodSeconds, timeoutSeconds: 3, successThreshold: 2, - failureThreshold: 5, + failureThreshold: readinessFailureThreshold, } + /* + * We wait for the effective failure duration (period * threshold) of the readiness probe before starting the + * liveness probe. + */ container.livenessProbe = { - initialDelaySeconds: 15, + initialDelaySeconds: readinessPeriodSeconds * readinessFailureThreshold, periodSeconds: 5, timeoutSeconds: 3, successThreshold: 1,