Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(kuma-cp) health check add event log support #1631

Merged
merged 4 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 118 additions & 65 deletions api/mesh/v1alpha1/health_check.pb.go

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions api/mesh/v1alpha1/health_check.proto
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,25 @@ message HealthCheck {
// service.
google.protobuf.BoolValue fail_traffic_on_panic = 11;

// Specifies the path to the file where Envoy can log health check events.
// If empty, no event log will be written.
string event_log_path = 12;

// If set to true, health check failure events will always be logged. If set
// to false, only the initial health check failure event will be logged. The
// default value is false.
google.protobuf.BoolValue always_log_health_check_failures = 13;

// The "no traffic interval" is a special health check interval that is used
// when a cluster has never had traffic routed to it. This lower interval
// allows cluster information to be kept up to date, without sending a
// potentially large amount of active health checking traffic for no reason.
// Once a cluster has been used for traffic routing, Envoy will shift back
// to using the standard health check interval that is defined. Note that
// this interval takes precedence over any other. The default value for "no
// traffic interval" is 60 seconds.
google.protobuf.Duration no_traffic_interval = 14;

// Tcp defines optional configuration for specifying bytes to send and
// expected response during the health check
message Tcp {
Expand Down
3 changes: 3 additions & 0 deletions pkg/core/resources/apis/mesh/healthcheck_validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ func (d *HealthCheckResource) validateConf() (err validators.ValidationError) {
if d.Spec.Conf.IntervalJitter != nil {
err.Add(ValidateDuration(path.Field("intervalJitter"), d.Spec.Conf.IntervalJitter))
}
if d.Spec.Conf.NoTrafficInterval != nil {
err.Add(ValidateDuration(path.Field("noTrafficInterval"), d.Spec.Conf.NoTrafficInterval))
}
err.Add(d.validatePercentage(path.Field("healthyPanicThreshold"), d.Spec.Conf.HealthyPanicThreshold))
if d.Spec.Conf.GetHttp() != nil {
err.Add(d.validateConfHttp(path.Field("http")))
Expand Down
3 changes: 3 additions & 0 deletions pkg/core/resources/apis/mesh/healthcheck_validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ var _ = Describe("HealthCheck", func() {
initialJitter: 0s
intervalJitter: 0s
healthyPanicThreshold: 101
noTrafficInterval: 0s
http:
path: ""
requestHeadersToAdd:
Expand All @@ -138,6 +139,8 @@ var _ = Describe("HealthCheck", func() {
message: must have a positive value
- field: conf.intervalJitter
message: must have a positive value
- field: conf.noTrafficInterval
message: must have a positive value
- field: conf.healthyPanicThreshold
message: must be in range [0.0 - 100.0]
- field: conf.http.path
Expand Down
17 changes: 10 additions & 7 deletions pkg/xds/envoy/clusters/v2/health_check_configurer.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,16 @@ func (e *HealthCheckConfigurer) Configure(cluster *envoy_api.Cluster) error {
HealthChecker: &envoy_core.HealthCheck_TcpHealthCheck_{
TcpHealthCheck: &envoy_core.HealthCheck_TcpHealthCheck{},
},
Interval: activeChecks.Interval,
Timeout: activeChecks.Timeout,
UnhealthyThreshold: &wrappers.UInt32Value{Value: activeChecks.UnhealthyThreshold},
HealthyThreshold: &wrappers.UInt32Value{Value: activeChecks.HealthyThreshold},
InitialJitter: activeChecks.InitialJitter,
IntervalJitter: activeChecks.IntervalJitter,
IntervalJitterPercent: activeChecks.IntervalJitterPercent,
Interval: activeChecks.Interval,
Timeout: activeChecks.Timeout,
UnhealthyThreshold: &wrappers.UInt32Value{Value: activeChecks.UnhealthyThreshold},
HealthyThreshold: &wrappers.UInt32Value{Value: activeChecks.HealthyThreshold},
InitialJitter: activeChecks.InitialJitter,
IntervalJitter: activeChecks.IntervalJitter,
IntervalJitterPercent: activeChecks.IntervalJitterPercent,
EventLogPath: activeChecks.EventLogPath,
AlwaysLogHealthCheckFailures: activeChecks.AlwaysLogHealthCheckFailures.GetValue(),
NoTrafficInterval: activeChecks.NoTrafficInterval,
}

healthPanicThreshold(cluster, activeChecks.GetHealthyPanicThreshold())
Expand Down
38 changes: 38 additions & 0 deletions pkg/xds/envoy/clusters/v2/health_check_configurer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,44 @@ var _ = Describe("HealthCheckConfigurer", func() {
timeout: 4s
unhealthyThreshold: 3
name: testCluster
type: EDS`,
}),
Entry("HealthCheck with event log path", testCase{
clusterName: "testCluster",
healthCheck: &mesh_core.HealthCheckResource{
Spec: &mesh_proto.HealthCheck{
Sources: []*mesh_proto.Selector{
{Match: mesh_proto.TagSelector{"kuma.io/service": "backend"}},
},
Destinations: []*mesh_proto.Selector{
{Match: mesh_proto.TagSelector{"kuma.io/service": "redis"}},
},
Conf: &mesh_proto.HealthCheck_Conf{
Interval: ptypes.DurationProto(5 * time.Second),
Timeout: ptypes.DurationProto(4 * time.Second),
NoTrafficInterval: ptypes.DurationProto(6 * time.Second),
UnhealthyThreshold: 3,
HealthyThreshold: 2,
EventLogPath: "/event/log/path",
AlwaysLogHealthCheckFailures: &wrappers.BoolValue{Value: true},
},
},
},
expected: `
connectTimeout: 5s
edsClusterConfig:
edsConfig:
ads: {}
healthChecks:
- alwaysLogHealthCheckFailures: true
eventLogPath: /event/log/path
healthyThreshold: 2
interval: 5s
noTrafficInterval: 6s
tcpHealthCheck: {}
timeout: 4s
unhealthyThreshold: 3
name: testCluster
type: EDS`,
}),
)
Expand Down
17 changes: 10 additions & 7 deletions pkg/xds/envoy/clusters/v3/health_check_configurer.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,16 @@ func (e *HealthCheckConfigurer) Configure(cluster *envoy_cluster.Cluster) error
HealthChecker: &envoy_core.HealthCheck_TcpHealthCheck_{
TcpHealthCheck: &envoy_core.HealthCheck_TcpHealthCheck{},
},
Interval: activeChecks.Interval,
Timeout: activeChecks.Timeout,
UnhealthyThreshold: &wrappers.UInt32Value{Value: activeChecks.UnhealthyThreshold},
HealthyThreshold: &wrappers.UInt32Value{Value: activeChecks.HealthyThreshold},
InitialJitter: activeChecks.InitialJitter,
IntervalJitter: activeChecks.IntervalJitter,
IntervalJitterPercent: activeChecks.IntervalJitterPercent,
Interval: activeChecks.Interval,
Timeout: activeChecks.Timeout,
UnhealthyThreshold: &wrappers.UInt32Value{Value: activeChecks.UnhealthyThreshold},
HealthyThreshold: &wrappers.UInt32Value{Value: activeChecks.HealthyThreshold},
InitialJitter: activeChecks.InitialJitter,
IntervalJitter: activeChecks.IntervalJitter,
IntervalJitterPercent: activeChecks.IntervalJitterPercent,
EventLogPath: activeChecks.EventLogPath,
AlwaysLogHealthCheckFailures: activeChecks.AlwaysLogHealthCheckFailures.GetValue(),
NoTrafficInterval: activeChecks.NoTrafficInterval,
}

healthPanicThreshold(cluster, activeChecks.GetHealthyPanicThreshold())
Expand Down
39 changes: 39 additions & 0 deletions pkg/xds/envoy/clusters/v3/health_check_configurer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,45 @@ var _ = Describe("HealthCheckConfigurer", func() {
timeout: 4s
unhealthyThreshold: 3
name: testCluster
type: EDS`,
}),
Entry("HealthCheck with event log path", testCase{
clusterName: "testCluster",
healthCheck: &mesh_core.HealthCheckResource{
Spec: &mesh_proto.HealthCheck{
Sources: []*mesh_proto.Selector{
{Match: mesh_proto.TagSelector{"kuma.io/service": "backend"}},
},
Destinations: []*mesh_proto.Selector{
{Match: mesh_proto.TagSelector{"kuma.io/service": "redis"}},
},
Conf: &mesh_proto.HealthCheck_Conf{
Interval: ptypes.DurationProto(5 * time.Second),
Timeout: ptypes.DurationProto(4 * time.Second),
NoTrafficInterval: ptypes.DurationProto(6 * time.Second),
UnhealthyThreshold: 3,
HealthyThreshold: 2,
EventLogPath: "/event/log/path",
AlwaysLogHealthCheckFailures: &wrappers.BoolValue{Value: true},
},
},
},
expected: `
connectTimeout: 5s
edsClusterConfig:
edsConfig:
ads: {}
resourceApiVersion: V3
healthChecks:
- alwaysLogHealthCheckFailures: true
eventLogPath: /event/log/path
healthyThreshold: 2
interval: 5s
noTrafficInterval: 6s
tcpHealthCheck: {}
timeout: 4s
unhealthyThreshold: 3
name: testCluster
type: EDS`,
}),
)
Expand Down