Skip to content

Commit

Permalink
Make HealthChecks.ResourceUtilization use observable instruments (#5798)
Browse files Browse the repository at this point in the history
  • Loading branch information
evgenyfedorov2 authored Jan 30, 2025
1 parent acc985a commit 2534f08
Show file tree
Hide file tree
Showing 11 changed files with 610 additions and 166 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
<PropertyGroup>
<EnableConfigurationBindingGenerator>true</EnableConfigurationBindingGenerator>
<InjectSharedDataValidation>true</InjectSharedDataValidation>
<InjectSharedDiagnosticIds>true</InjectSharedDiagnosticIds>
<InjectExperimentalAttributeOnLegacy>true</InjectExperimentalAttributeOnLegacy>
<InjectObsoleteAttributeOnLegacy>true</InjectObsoleteAttributeOnLegacy>
</PropertyGroup>

<PropertyGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;

namespace Microsoft.Extensions.Diagnostics.HealthChecks;

/// <summary>
/// Represents a health check for in-container resources <see cref="IHealthCheck"/>.
/// </summary>
internal sealed partial class ResourceUtilizationHealthCheck : IHealthCheck
{
#pragma warning disable CS0436 // Type conflicts with imported type
[Obsolete(DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiMessage,
DiagnosticId = DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiDiagId,
UrlFormat = DiagnosticIds.UrlFormat)]
public void ObsoleteConstructor(IResourceMonitor dataTracker) => _dataTracker = Throw.IfNull(dataTracker);

/// <summary>
/// Runs the health check.
/// </summary>
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can be used to cancel the health check.</param>
/// <returns>A <see cref="Task{HealthCheckResult}"/> that completes when the health check has finished, yielding the status of the component being checked.</returns>
#pragma warning disable IDE0060 // Remove unused parameter
[Obsolete(DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiMessage,
DiagnosticId = DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiDiagId,
UrlFormat = DiagnosticIds.UrlFormat)]
public Task<HealthCheckResult> ObsoleteCheckHealthAsync(CancellationToken cancellationToken = default)
{
var utilization = _dataTracker!.GetUtilization(_options.SamplingWindow);
return ResourceUtilizationHealthCheck.EvaluateHealthStatusAsync(utilization.CpuUsedPercentage, utilization.MemoryUsedPercentage, _options);
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
Expand All @@ -13,44 +15,30 @@ namespace Microsoft.Extensions.Diagnostics.HealthChecks;
/// <summary>
/// Represents a health check for in-container resources <see cref="IHealthCheck"/>.
/// </summary>
internal sealed class ResourceUtilizationHealthCheck : IHealthCheck
internal sealed partial class ResourceUtilizationHealthCheck : IHealthCheck, IDisposable
{
private readonly double _multiplier;
private readonly MeterListener? _meterListener;
private readonly ResourceUtilizationHealthCheckOptions _options;
private readonly IResourceMonitor _dataTracker;
private IResourceMonitor? _dataTracker;
private double _cpuUsedPercentage;
private double _memoryUsedPercentage;

/// <summary>
/// Initializes a new instance of the <see cref="ResourceUtilizationHealthCheck"/> class.
/// </summary>
/// <param name="options">The options.</param>
/// <param name="dataTracker">The datatracker.</param>
public ResourceUtilizationHealthCheck(IOptions<ResourceUtilizationHealthCheckOptions> options,
IResourceMonitor dataTracker)
{
_options = Throw.IfMemberNull(options, options.Value);
_dataTracker = Throw.IfNull(dataTracker);
}

/// <summary>
/// Runs the health check.
/// </summary>
/// <param name="context">A context object associated with the current execution.</param>
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can be used to cancel the health check.</param>
/// <returns>A <see cref="Task{HealthCheckResult}"/> that completes when the health check has finished, yielding the status of the component being checked.</returns>
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
#pragma warning disable EA0014 // The async method doesn't support cancellation
public static Task<HealthCheckResult> EvaluateHealthStatusAsync(double cpuUsedPercentage, double memoryUsedPercentage, ResourceUtilizationHealthCheckOptions options)
{
var utilization = _dataTracker.GetUtilization(_options.SamplingWindow);
IReadOnlyDictionary<string, object> data = new Dictionary<string, object>
{
{ nameof(utilization.CpuUsedPercentage), utilization.CpuUsedPercentage },
{ nameof(utilization.MemoryUsedPercentage), utilization.MemoryUsedPercentage },
{ "CpuUsedPercentage", cpuUsedPercentage },
{ "MemoryUsedPercentage", memoryUsedPercentage },
};

bool cpuUnhealthy = utilization.CpuUsedPercentage > _options.CpuThresholds.UnhealthyUtilizationPercentage;
bool memoryUnhealthy = utilization.MemoryUsedPercentage > _options.MemoryThresholds.UnhealthyUtilizationPercentage;
bool cpuUnhealthy = cpuUsedPercentage > options.CpuThresholds.UnhealthyUtilizationPercentage;
bool memoryUnhealthy = memoryUsedPercentage > options.MemoryThresholds.UnhealthyUtilizationPercentage;

if (cpuUnhealthy || memoryUnhealthy)
{
string message = string.Empty;
string message;
if (cpuUnhealthy && memoryUnhealthy)
{
message = "CPU and memory usage is above the limit";
Expand All @@ -67,12 +55,12 @@ public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, Canc
return Task.FromResult(HealthCheckResult.Unhealthy(message, default, data));
}

bool cpuDegraded = utilization.CpuUsedPercentage > _options.CpuThresholds.DegradedUtilizationPercentage;
bool memoryDegraded = utilization.MemoryUsedPercentage > _options.MemoryThresholds.DegradedUtilizationPercentage;
bool cpuDegraded = cpuUsedPercentage > options.CpuThresholds.DegradedUtilizationPercentage;
bool memoryDegraded = memoryUsedPercentage > options.MemoryThresholds.DegradedUtilizationPercentage;

if (cpuDegraded || memoryDegraded)
{
string message = string.Empty;
string message;
if (cpuDegraded && memoryDegraded)
{
message = "CPU and memory usage is close to the limit";
Expand All @@ -91,4 +79,104 @@ public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, Canc

return Task.FromResult(HealthCheckResult.Healthy(default, data));
}
#pragma warning restore EA0014 // The async method doesn't support cancellation

/// <summary>
/// Initializes a new instance of the <see cref="ResourceUtilizationHealthCheck"/> class.
/// </summary>
/// <param name="options">The options.</param>
/// <param name="dataTracker">The datatracker.</param>
public ResourceUtilizationHealthCheck(IOptions<ResourceUtilizationHealthCheckOptions> options, IResourceMonitor dataTracker)
{
_options = Throw.IfMemberNull(options, options.Value);
if (!_options.UseObservableResourceMonitoringInstruments)
{
ObsoleteConstructor(dataTracker);
return;
}

#if NETFRAMEWORK
_multiplier = 1;
#else
// Due to a bug on Windows https://github.com/dotnet/extensions/issues/5472,
// the CPU utilization comes in the range [0, 100].
if (OperatingSystem.IsWindows())
{
_multiplier = 1;
}

// On Linux, the CPU utilization comes in the correct range [0, 1], which we will be converting to percentage.
else
{
#pragma warning disable S109 // Magic numbers should not be used
_multiplier = 100;
#pragma warning restore S109 // Magic numbers should not be used
}
#endif

_meterListener = new()
{
InstrumentPublished = OnInstrumentPublished
};

_meterListener.SetMeasurementEventCallback<double>(OnMeasurementRecorded);
_meterListener.Start();
}

/// <summary>
/// Runs the health check.
/// </summary>
/// <param name="context">A context object associated with the current execution.</param>
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can be used to cancel the health check.</param>
/// <returns>A <see cref="Task{HealthCheckResult}"/> that completes when the health check has finished, yielding the status of the component being checked.</returns>
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
{
if (!_options.UseObservableResourceMonitoringInstruments)
{
return ObsoleteCheckHealthAsync(cancellationToken);
}

_meterListener!.RecordObservableInstruments();

return EvaluateHealthStatusAsync(_cpuUsedPercentage, _memoryUsedPercentage, _options);
}

/// <inheritdoc />
public void Dispose()
{
Dispose(true);
}

private void Dispose(bool disposing)
{
if (disposing)
{
_meterListener?.Dispose();
}
}

private void OnInstrumentPublished(Instrument instrument, MeterListener listener)
{
if (instrument.Meter.Name is "Microsoft.Extensions.Diagnostics.ResourceMonitoring")
{
listener.EnableMeasurementEvents(instrument);
}
}

private void OnMeasurementRecorded(
Instrument instrument, double measurement,
ReadOnlySpan<KeyValuePair<string, object?>> tags, object? state)
{
switch (instrument.Name)
{
case "process.cpu.utilization":
case "container.cpu.limit.utilization":
_cpuUsedPercentage = measurement * _multiplier;
break;
case "dotnet.process.memory.virtual.utilization":
case "container.memory.limit.utilization":
_memoryUsedPercentage = measurement * _multiplier;
break;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using Microsoft.Extensions.Diagnostics.ResourceMonitoring;
using System.Diagnostics.CodeAnalysis;
using Microsoft.Extensions.Options;
using Microsoft.Shared.Data.Validation;
using Microsoft.Shared.DiagnosticIds;

namespace Microsoft.Extensions.Diagnostics.HealthChecks;

Expand All @@ -20,8 +21,7 @@ public class ResourceUtilizationHealthCheckOptions
/// Gets or sets thresholds for CPU utilization.
/// </summary>
/// <remarks>
/// The thresholds are periodically compared against the utilization samples provided by
/// the registered <see cref="IResourceMonitor"/>.
/// The thresholds are periodically compared against the utilization samples provided by the Resource Monitoring library.
/// </remarks>
[ValidateObjectMembers]
public ResourceUsageThresholds CpuThresholds { get; set; } = new ResourceUsageThresholds();
Expand All @@ -30,18 +30,33 @@ public class ResourceUtilizationHealthCheckOptions
/// Gets or sets thresholds for memory utilization.
/// </summary>
/// <remarks>
/// The thresholds are periodically compared against the utilization samples provided by
/// the registered <see cref="IResourceMonitor"/>.
/// The thresholds are periodically compared against the utilization samples provided by the Resource Monitoring library.
/// </remarks>
[ValidateObjectMembers]
public ResourceUsageThresholds MemoryThresholds { get; set; } = new ResourceUsageThresholds();

/// <summary>
/// Gets or sets the time window for used for calculating CPU and memory utilization averages.
/// Gets or sets the time window used for calculating CPU and memory utilization averages.
/// </summary>
/// <value>
/// The default value is 5 seconds.
/// </value>
#pragma warning disable CS0436 // Type conflicts with imported type
[Obsolete(DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiMessage,
DiagnosticId = DiagnosticIds.Obsoletions.NonObservableResourceMonitoringApiDiagId,
UrlFormat = DiagnosticIds.UrlFormat)]
#pragma warning restore CS0436 // Type conflicts with imported type
[TimeSpan(MinimumSamplingWindow, int.MaxValue)]
public TimeSpan SamplingWindow { get; set; } = DefaultSamplingWindow;

/// <summary>
/// Gets or sets a value indicating whether the observable instruments will be used for getting CPU and Memory usage
/// as opposed to the default <see cref="Microsoft.Extensions.Diagnostics.ResourceMonitoring.IResourceMonitor"/> API which is obsolete.
/// </summary>
/// <value>
/// <see langword="true" /> if the observable instruments are used. The default is <see langword="false" />.
/// In the future the default will be <see langword="true" />.
/// </value>
[Experimental(diagnosticId: DiagnosticIds.Experiments.HealthChecks, UrlFormat = DiagnosticIds.UrlFormat)]
public bool UseObservableResourceMonitoringInstruments { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,6 @@
<ItemGroup>
<InternalsVisibleToDynamicProxyGenAssembly2 Include="*" />
<InternalsVisibleToTest Include="$(AssemblyName).Tests" />
<InternalsVisibleToTest Include="Microsoft.Extensions.Diagnostics.HealthChecks.ResourceUtilization.Tests" />
</ItemGroup>
</Project>
Loading

0 comments on commit 2534f08

Please sign in to comment.