Skip to content

Commit

Permalink
Active and passive health checks (#459)
Browse files Browse the repository at this point in the history
From time to time, destinations can get unhealthy and start failing requests processing due to various reasons, thus to prevent request failures and maintain a good quality of service YARP must monitor destinations health status and stop sending traffic to the ones became unhealthy until they have recovered.

This PR implements active and passive health check mechanisms where the former periodically probes destinations with dedicated HTTP requests and the latter watches for client request proxying results.

Fixes #228
  • Loading branch information
alnikola authored Oct 27, 2020
1 parent b83f675 commit 05a60d0
Show file tree
Hide file tree
Showing 93 changed files with 3,519 additions and 290 deletions.
12 changes: 9 additions & 3 deletions samples/ReverseProxy.Code.Sample/CustomConfigFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,23 @@ public class CustomConfigFilter : IProxyConfigFilter
{
public Task ConfigureClusterAsync(Cluster cluster, CancellationToken cancel)
{
cluster.HealthCheck ??= new HealthCheckOptions();
// How to use custom metadata to configure clusters
if (cluster.Metadata?.TryGetValue("CustomHealth", out var customHealth) ?? false
&& string.Equals(customHealth, "true", StringComparison.OrdinalIgnoreCase))
{
cluster.HealthCheck.Enabled = true;
cluster.HealthCheck ??= new HealthCheckOptions { Active = new ActiveHealthCheckOptions() };
cluster.HealthCheck.Active.Enabled = true;
cluster.HealthCheck.Active.Policy = HealthCheckConstants.ActivePolicy.ConsecutiveFailures;
}

// Or wrap the meatadata in config sugar
var config = new ConfigurationBuilder().AddInMemoryCollection(cluster.Metadata).Build();
cluster.HealthCheck.Enabled = config.GetValue<bool>("CustomHealth");
if (config.GetValue<bool>("CustomHealth"))
{
cluster.HealthCheck ??= new HealthCheckOptions { Active = new ActiveHealthCheckOptions() };
cluster.HealthCheck.Active.Enabled = true;
cluster.HealthCheck.Active.Policy = HealthCheckConstants.ActivePolicy.ConsecutiveFailures;
}

return Task.CompletedTask;
}
Expand Down
1 change: 1 addition & 0 deletions samples/ReverseProxy.Code.Sample/Startup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public void ConfigureServices(IServiceCollection services)
new Cluster()
{
Id = "cluster1",
SessionAffinity = new SessionAffinityOptions { Enabled = true, Mode = "Cookie" },
Destinations =
{
{ "destination1", new Destination() { Address = "https://localhost:10000" } }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ public class HealthController : ControllerBase
[Route("/api/health")]
public IActionResult CheckHealth()
{
// TODO: Implement health controller, use guid in route.
return Ok();
}
}
Expand Down
12 changes: 9 additions & 3 deletions samples/ReverseProxy.Config.Sample/CustomConfigFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,23 @@ public class CustomConfigFilter : IProxyConfigFilter
{
public Task ConfigureClusterAsync(Cluster cluster, CancellationToken cancel)
{
cluster.HealthCheck ??= new HealthCheckOptions();
// How to use custom metadata to configure clusters
if (cluster.Metadata?.TryGetValue("CustomHealth", out var customHealth) ?? false
&& string.Equals(customHealth, "true", StringComparison.OrdinalIgnoreCase))
{
cluster.HealthCheck.Enabled = true;
cluster.HealthCheck ??= new HealthCheckOptions { Active = new ActiveHealthCheckOptions() };
cluster.HealthCheck.Active.Enabled = true;
cluster.HealthCheck.Active.Policy = HealthCheckConstants.ActivePolicy.ConsecutiveFailures;
}

// Or wrap the meatadata in config sugar
var config = new ConfigurationBuilder().AddInMemoryCollection(cluster.Metadata).Build();
cluster.HealthCheck.Enabled = config.GetValue<bool>("CustomHealth");
if (config.GetValue<bool>("CustomHealth"))
{
cluster.HealthCheck ??= new HealthCheckOptions { Active = new ActiveHealthCheckOptions() };
cluster.HealthCheck.Active.Enabled = true;
cluster.HealthCheck.Active.Policy = HealthCheckConstants.ActivePolicy.ConsecutiveFailures;
}

return Task.CompletedTask;
}
Expand Down
1 change: 1 addition & 0 deletions samples/ReverseProxy.Config.Sample/Startup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public void Configure(IApplicationBuilder app)
proxyPipeline.UseAffinitizedDestinationLookup();
proxyPipeline.UseProxyLoadBalancing();
proxyPipeline.UseRequestAffinitizer();
proxyPipeline.UsePassiveHealthChecks();
});
});
}
Expand Down
17 changes: 15 additions & 2 deletions samples/ReverseProxy.Config.Sample/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,17 @@
"Enabled": "true",
"Mode": "Cookie"
},
"HealthCheck": {
"Active": {
"Enabled": "true",
"Interval": "00:00:10",
"Timeout": "00:00:10",
"Policy": "ConsecutiveFailures",
"Path": "/api/health"
}
},
"Metadata": {
"CustomHealth": "false"
"ConsecutiveFailuresHealthPolicy.Threshold": "3"
},
"Destinations": {
"cluster1/destination1": {
Expand All @@ -40,9 +49,13 @@
}
},
"cluster2": {
"Metadata": {
"CustomHealth": true
},
"Destinations": {
"cluster2/destination1": {
"Address": "https://localhost:10001/"
"Address": "https://localhost:10001/",
"Health": "https://localhost:10001/api/health"
}
}
}
Expand Down
12 changes: 11 additions & 1 deletion samples/SampleClient/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ public static async Task<int> Main(string[] args)
var scenarioFactories = new Dictionary<string, Func<IScenario>>(StringComparer.OrdinalIgnoreCase) {
{"Http1", () => new Http1Scenario()},
{"Http2", () => new Http2Scenario()},
{"RawUpgrade", () => new RawUpgradeScenario()},
// Disabled due to a conflict with a workaround to the issue https://github.com/microsoft/reverse-proxy/issues/255.
//{"RawUpgrade", () => new RawUpgradeScenario()},
{"WebSockets", () => new WebSocketsScenario()},
{"SessionAffinity", () => new SessionAffinityScenario()}
};
Expand All @@ -59,7 +60,11 @@ public static async Task<int> Main(string[] args)
}

Console.WriteLine();
Console.ForegroundColor = success ? ConsoleColor.Green : ConsoleColor.Red;
Console.WriteLine($"All scenarios completed {(success ? "successfully" : "with errors")}.");
Console.ResetColor();
Console.WriteLine("Press any key to exit.");
Console.ReadKey();
return success ? 0 : 1;
}

Expand Down Expand Up @@ -87,6 +92,11 @@ public static async Task<int> Main(string[] args)
return 1;
}

Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine("All scenarios completed successfully!");
Console.ResetColor();
Console.WriteLine("Press any key to exit.");
Console.ReadKey();
return 0;
}
}
Expand Down
2 changes: 0 additions & 2 deletions samples/SampleClient/Scenarios/SessionAffinityScenario.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
// Licensed under the MIT License.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
Expand Down
27 changes: 27 additions & 0 deletions samples/SampleServer/Controllers/HealthController.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

using Microsoft.AspNetCore.Mvc;

namespace Microsoft.ReverseProxy.Sample.Controllers
{
/// <summary>
/// Controller for active health check probes.
/// </summary>
[ApiController]
public class HealthController : ControllerBase
{
private static volatile int _count;
/// <summary>
/// Returns 200 if server is healthy.
/// </summary>
[HttpGet]
[Route("/api/health")]
public IActionResult CheckHealth()
{
_count++;
// Simulate temporary health degradation.
return _count % 10 < 4 ? Ok() : StatusCode(500);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

using System;

namespace Microsoft.ReverseProxy.Abstractions
{
/// <summary>
/// Defines options for the active health check monitor.
/// </summary>
public class ActiveHealthCheckMonitorOptions
{
/// <summary>
/// Default probing interval.
/// </summary>
public TimeSpan DefaultInterval { get; set; } = TimeSpan.FromSeconds(15);

/// <summary>
/// Default probes timeout.
/// </summary>
public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromSeconds(10);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

using System;

namespace Microsoft.ReverseProxy.Abstractions
{
/// <summary>
/// Active health check options.
/// </summary>
public sealed class ActiveHealthCheckOptions
{
/// <summary>
/// Whether active health checks are enabled.
/// </summary>
public bool Enabled { get; set; }

/// <summary>
/// Health probe interval.
/// </summary>
public TimeSpan? Interval { get; set; }

/// <summary>
/// Health probe timeout, after which a destination is considered unhealthy.
/// </summary>
public TimeSpan? Timeout { get; set; }

/// <summary>
/// Active health check policy.
/// </summary>
public string Policy { get; set; }

/// <summary>
/// HTTP health check endpoint path.
/// </summary>
public string Path { get; set; }

internal ActiveHealthCheckOptions DeepClone()
{
return new ActiveHealthCheckOptions
{
Enabled = Enabled,
Interval = Interval,
Timeout = Timeout,
Policy = Policy,
Path = Path,
};
}

internal static bool Equals(ActiveHealthCheckOptions options1, ActiveHealthCheckOptions options2)
{
if (options1 == null && options2 == null)
{
return true;
}

if (options1 == null || options2 == null)
{
return false;
}

return options1.Enabled == options2.Enabled
&& options1.Interval == options2.Interval
&& options1.Timeout == options2.Timeout
&& string.Equals(options1.Policy, options2.Policy, StringComparison.OrdinalIgnoreCase)
&& string.Equals(options1.Path, options2.Path, StringComparison.OrdinalIgnoreCase);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public sealed class Cluster : IDeepCloneable<Cluster>
public SessionAffinityOptions SessionAffinity { get; set; }

/// <summary>
/// Active health checking options.
/// Health checking options.
/// </summary>
public HealthCheckOptions HealthCheck { get; set; }

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

namespace Microsoft.ReverseProxy.Abstractions
{
/// <summary>
/// Defines options for the consecutive failures active health check policy.
/// </summary>
public class ConsecutiveFailuresHealthPolicyOptions
{
/// <summary>
/// Name of the consecutive failure threshold metadata parameter.
/// It's the number of consecutive failure that needs to happen in order to mark a destination as unhealthy.
/// </summary>
public static readonly string ThresholdMetadataName = "ConsecutiveFailuresHealthPolicy.Threshold";

/// <summary>
/// Default consecutive failures threshold that is applied if it's not set on a cluster's metadata.
/// </summary>
public long DefaultThreshold { get; set; } = 2;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

namespace Microsoft.ReverseProxy.Abstractions
{
public static class HealthCheckConstants
{
public static class PassivePolicy
{
public static readonly string TransportFailureRate = nameof(TransportFailureRate);
}

public static class ActivePolicy
{
public static readonly string ConsecutiveFailures = nameof(ConsecutiveFailures);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,26 @@
namespace Microsoft.ReverseProxy.Abstractions
{
/// <summary>
/// Active health check options.
/// All health check options.
/// </summary>
public sealed class HealthCheckOptions
{
/// <summary>
/// Whether health probes are enabled.
/// Passive health check options.
/// </summary>
public bool Enabled { get; set; }
public PassiveHealthCheckOptions Passive { get; set; }

/// <summary>
/// Health probe interval.
/// Active health check options.
/// </summary>
// TODO: Consider switching to ISO8601 duration (e.g. "PT5M")
public TimeSpan Interval { get; set; }

/// <summary>
/// Health probe timeout, after which the targeted endpoint is considered unhealthy.
/// </summary>
public TimeSpan Timeout { get; set; }

/// <summary>
/// Port number.
/// </summary>
public int Port { get; set; }

/// <summary>
/// Http path.
/// </summary>
public string Path { get; set; }
public ActiveHealthCheckOptions Active { get; set; }

internal HealthCheckOptions DeepClone()
{
return new HealthCheckOptions
{
Enabled = Enabled,
Interval = Interval,
Timeout = Timeout,
Port = Port,
Path = Path,
Passive = Passive?.DeepClone(),
Active = Active?.DeepClone()
};
}

Expand All @@ -60,11 +41,8 @@ internal static bool Equals(HealthCheckOptions options1, HealthCheckOptions opti
return false;
}

return options1.Enabled == options2.Enabled
&& options1.Interval == options2.Interval
&& options1.Timeout == options2.Timeout
&& options1.Port == options2.Port
&& string.Equals(options1.Path, options2.Path, StringComparison.OrdinalIgnoreCase);
return PassiveHealthCheckOptions.Equals(options1.Passive, options2.Passive)
&& ActiveHealthCheckOptions.Equals(options1.Active, options2.Active);
}
}
}
Loading

0 comments on commit 05a60d0

Please sign in to comment.