From fea37f110c39edb69540281bf6e5a1272f7cf132 Mon Sep 17 00:00:00 2001 From: Tom Kerkhove Date: Mon, 23 Aug 2021 14:44:27 +0200 Subject: [PATCH 1/4] Implement Signed-off-by: Tom Kerkhove --- .../ThrottlingRequestHandler.cs | 72 +++++++++++++++++++ .../Docs/Open-Api.xml | 8 +++ .../Graph/AzureResourceGraph.cs | 16 ++++- ...reResourceGraphThrottlingRequestHandler.cs | 59 +++++++++++++++ .../Extensions/AzureEnvironmentExtensions.cs | 18 +++++ ...AzureScrapingPrometheusMetricsCollector.cs | 2 +- src/Promitor.Core/Promitor.Core.csproj | 1 + src/Promitor.Core/RuntimeMetricNames.cs | 3 + ...ResourceManagerThrottlingRequestHandler.cs | 49 ++++++------- ...AzureScrapingPrometheusMetricsCollector.cs | 5 ++ .../PrometheusSystemMetricsTests.cs | 17 +++++ .../Azure/AzureEnvironmentUnitTests.cs | 67 +++++++++++++++++ 12 files changed, 286 insertions(+), 31 deletions(-) create mode 100644 src/Promitor.Agents.Core/RequestHandlers/ThrottlingRequestHandler.cs create mode 100644 src/Promitor.Agents.ResourceDiscovery/Graph/RequestHandlers/AzureResourceGraphThrottlingRequestHandler.cs create mode 100644 src/Promitor.Core/Extensions/AzureEnvironmentExtensions.cs create mode 100644 src/Promitor.Tests.Unit/Azure/AzureEnvironmentUnitTests.cs diff --git a/src/Promitor.Agents.Core/RequestHandlers/ThrottlingRequestHandler.cs b/src/Promitor.Agents.Core/RequestHandlers/ThrottlingRequestHandler.cs new file mode 100644 index 000000000..521ae84d6 --- /dev/null +++ b/src/Promitor.Agents.Core/RequestHandlers/ThrottlingRequestHandler.cs @@ -0,0 +1,72 @@ +using System.Collections.Generic; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using GuardNet; +using Microsoft.Extensions.Logging; +using Promitor.Core.Metrics.Prometheus.Collectors.Interfaces; + +namespace Promitor.Agents.Core.RequestHandlers +{ + public abstract class ThrottlingRequestHandler : DelegatingHandler + { + public abstract string DependencyName { get; } + + protected ILogger Logger { get; } + protected IPrometheusMetricsCollector PrometheusMetricsCollector { get; } + + /// + /// Constructor + /// + /// Metrics collector for Prometheus + /// Logger to write telemetry to + protected ThrottlingRequestHandler(IPrometheusMetricsCollector prometheusMetricsCollector, ILogger logger) + { + Guard.NotNull(prometheusMetricsCollector, nameof(prometheusMetricsCollector)); + Guard.NotNull(logger, nameof(logger)); + + Logger = logger; + PrometheusMetricsCollector = prometheusMetricsCollector; + } + + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + request = BeforeSendingRequest(request); + + var response = await base.SendAsync(request, cancellationToken); + + var wasRequestThrottled = (int)response.StatusCode == 429; + if (wasRequestThrottled) + { + LogArmThrottling(); + } + + await AvailableRateLimitingCallsAsync(response); + AvailableThrottlingStatusAsync(wasRequestThrottled); + + return response; + } + + private void AvailableThrottlingStatusAsync(bool wasRequestThrottled) + { + var metricValue = wasRequestThrottled ? 1 : 0; + var metricLabels = GetMetricLabels(); + PrometheusMetricsCollector.WriteGaugeMeasurement(GetThrottlingStatusMetricName(), GetThrottlingStatusMetricDescription(), metricValue, metricLabels, includeTimestamp: true); + } + + protected abstract Dictionary GetMetricLabels(); + protected abstract string GetThrottlingStatusMetricName(); + protected abstract string GetThrottlingStatusMetricDescription(); + protected abstract Task AvailableRateLimitingCallsAsync(HttpResponseMessage response); + + protected virtual HttpRequestMessage BeforeSendingRequest(HttpRequestMessage request) + { + return request; + } + + protected void LogArmThrottling() + { + Logger.LogWarning($"{DependencyName} rate limit reached."); + } + } +} diff --git a/src/Promitor.Agents.ResourceDiscovery/Docs/Open-Api.xml b/src/Promitor.Agents.ResourceDiscovery/Docs/Open-Api.xml index b18b3518f..826cfdb01 100644 --- a/src/Promitor.Agents.ResourceDiscovery/Docs/Open-Api.xml +++ b/src/Promitor.Agents.ResourceDiscovery/Docs/Open-Api.xml @@ -123,6 +123,14 @@ Initializes a new instance of the class. + + + Constructor + + Metrics collector to write metrics to Prometheus + + Logger to write telemetry to + Gets the name of the parent resource from a resource URI diff --git a/src/Promitor.Agents.ResourceDiscovery/Graph/AzureResourceGraph.cs b/src/Promitor.Agents.ResourceDiscovery/Graph/AzureResourceGraph.cs index 57a4e4113..4f6bbc52c 100644 --- a/src/Promitor.Agents.ResourceDiscovery/Graph/AzureResourceGraph.cs +++ b/src/Promitor.Agents.ResourceDiscovery/Graph/AzureResourceGraph.cs @@ -16,8 +16,10 @@ using Promitor.Agents.ResourceDiscovery.Graph.Exceptions; using Promitor.Agents.ResourceDiscovery.Graph.Interfaces; using Promitor.Agents.ResourceDiscovery.Graph.Model; +using Promitor.Agents.ResourceDiscovery.Graph.RequestHandlers; using Promitor.Core; using Promitor.Core.Extensions; +using Promitor.Core.Metrics.Prometheus.Collectors.Interfaces; using Promitor.Integrations.Azure.Authentication; namespace Promitor.Agents.ResourceDiscovery.Graph @@ -25,6 +27,7 @@ namespace Promitor.Agents.ResourceDiscovery.Graph public class AzureResourceGraph : IAzureResourceGraph { private readonly IOptionsMonitor _resourceDeclarationMonitor; + private readonly IPrometheusMetricsCollector _prometheusMetricsCollector; private readonly ILogger _logger; private ResourceGraphClient _graphClient; @@ -39,8 +42,9 @@ public class AzureResourceGraph : IAzureResourceGraph private readonly AzureAuthenticationInfo _azureAuthenticationInfo; - public AzureResourceGraph(IOptionsMonitor resourceDeclarationMonitor, IConfiguration configuration, ILogger logger) + public AzureResourceGraph(IPrometheusMetricsCollector prometheusMetricsCollector, IOptionsMonitor resourceDeclarationMonitor, IConfiguration configuration, ILogger logger) { + Guard.NotNull(prometheusMetricsCollector, nameof(prometheusMetricsCollector)); Guard.NotNull(resourceDeclarationMonitor, nameof(resourceDeclarationMonitor)); Guard.NotNull(resourceDeclarationMonitor.CurrentValue, nameof(resourceDeclarationMonitor.CurrentValue)); Guard.NotNull(resourceDeclarationMonitor.CurrentValue.AzureLandscape, nameof(resourceDeclarationMonitor.CurrentValue.AzureLandscape)); @@ -49,6 +53,7 @@ public AzureResourceGraph(IOptionsMonitor resourceDeclarati _logger = logger; _resourceDeclarationMonitor = resourceDeclarationMonitor; + _prometheusMetricsCollector = prometheusMetricsCollector; _azureAuthenticationInfo = AzureAuthenticationFactory.GetConfiguredAzureAuthentication(configuration); } @@ -265,7 +270,14 @@ private async Task CreateClientAsync() var credentials = await AzureAuthenticationFactory.GetTokenCredentialsAsync(azureEnvironment.ManagementEndpoint, TenantId, _azureAuthenticationInfo, azureAuthorityHost); var resourceManagerBaseUri = new Uri(azureEnvironment.ResourceManagerEndpoint); - var resourceGraphClient = new ResourceGraphClient(resourceManagerBaseUri, credentials); + var metricLabels = new Dictionary + { + {"tenant_id", TenantId}, + {"cloud", azureEnvironment.GetDisplayName()}, + {"app_id", _azureAuthenticationInfo.IdentityId}, + {"auth_mode", _azureAuthenticationInfo.Mode.ToString()}, + }; + var resourceGraphClient = new ResourceGraphClient(resourceManagerBaseUri, credentials, new AzureResourceGraphThrottlingRequestHandler(_prometheusMetricsCollector, metricLabels, _logger)); var version = Promitor.Core.Version.Get(); var promitorUserAgent = UserAgent.Generate("Resource-Discovery", version); diff --git a/src/Promitor.Agents.ResourceDiscovery/Graph/RequestHandlers/AzureResourceGraphThrottlingRequestHandler.cs b/src/Promitor.Agents.ResourceDiscovery/Graph/RequestHandlers/AzureResourceGraphThrottlingRequestHandler.cs new file mode 100644 index 000000000..7b41e62ed --- /dev/null +++ b/src/Promitor.Agents.ResourceDiscovery/Graph/RequestHandlers/AzureResourceGraphThrottlingRequestHandler.cs @@ -0,0 +1,59 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net.Http; +using System.Threading.Tasks; +using GuardNet; +using Microsoft.Extensions.Logging; +using Promitor.Agents.Core.RequestHandlers; +using Promitor.Core; +using Promitor.Core.Metrics.Prometheus.Collectors.Interfaces; + +namespace Promitor.Agents.ResourceDiscovery.Graph.RequestHandlers +{ + internal class AzureResourceGraphThrottlingRequestHandler : ThrottlingRequestHandler + { + private readonly Dictionary _metricLabels; + + private const string ThrottlingHeaderName = "x-ms-user-quota-remaining"; + private const string AvailableCallsMetricDescription = "Indication how many calls are still available before Azure Resource Graph is going to throttle us."; + private const string ThrottledMetricDescription = "Indication concerning Azure Resource Graph are being throttled. (1 = yes, 0 = no)."; + + public override string DependencyName => "Azure Resource Graph"; + + /// + /// Constructor + /// + /// Metrics collector to write metrics to Prometheus + /// + /// Logger to write telemetry to + public AzureResourceGraphThrottlingRequestHandler(IPrometheusMetricsCollector prometheusMetricsCollector, Dictionary metricLabels, ILogger logger) + : base(prometheusMetricsCollector, logger) + { + Guard.NotNull(metricLabels, nameof(metricLabels)); + + _metricLabels = metricLabels; + } + + protected override Task AvailableRateLimitingCallsAsync(HttpResponseMessage response) + { + // Source: + // - https://docs.microsoft.com/en-us/azure/governance/resource-graph/overview#throttling + // - https://docs.microsoft.com/en-us/azure/governance/resource-graph/concepts/guidance-for-throttled-requests#understand-throttling-headers + if (response.Headers.Contains(ThrottlingHeaderName)) + { + var remainingApiCalls = response.Headers.GetValues(ThrottlingHeaderName).FirstOrDefault(); + var subscriptionReadLimit = Convert.ToInt16(remainingApiCalls); + + // Report metric + PrometheusMetricsCollector.WriteGaugeMeasurement(RuntimeMetricNames.RateLimitingForResourceGraph, AvailableCallsMetricDescription, subscriptionReadLimit, _metricLabels, includeTimestamp: true); + } + + return Task.CompletedTask; + } + + protected override Dictionary GetMetricLabels() => _metricLabels; + protected override string GetThrottlingStatusMetricName() => RuntimeMetricNames.ResourceGraphThrottled; + protected override string GetThrottlingStatusMetricDescription() => ThrottledMetricDescription; + } +} diff --git a/src/Promitor.Core/Extensions/AzureEnvironmentExtensions.cs b/src/Promitor.Core/Extensions/AzureEnvironmentExtensions.cs new file mode 100644 index 000000000..5a0f2682c --- /dev/null +++ b/src/Promitor.Core/Extensions/AzureEnvironmentExtensions.cs @@ -0,0 +1,18 @@ +using Humanizer; +using Microsoft.Azure.Management.ResourceManager.Fluent; + +namespace Promitor.Core.Extensions +{ + public static class AzureEnvironmentExtensions + { + /// + /// Get Azure environment information + /// + /// Microsoft Azure cloud + /// Azure environment information for specified cloud + public static string GetDisplayName(this AzureEnvironment azureCloud) + { + return azureCloud.Name.Replace("Azure", "").Replace("Cloud", "").Humanize(LetterCasing.Title); + } + } +} \ No newline at end of file diff --git a/src/Promitor.Core/Metrics/Prometheus/Collectors/Interfaces/IAzureScrapingPrometheusMetricsCollector.cs b/src/Promitor.Core/Metrics/Prometheus/Collectors/Interfaces/IAzureScrapingPrometheusMetricsCollector.cs index 0f53c01bf..8ac9752b9 100644 --- a/src/Promitor.Core/Metrics/Prometheus/Collectors/Interfaces/IAzureScrapingPrometheusMetricsCollector.cs +++ b/src/Promitor.Core/Metrics/Prometheus/Collectors/Interfaces/IAzureScrapingPrometheusMetricsCollector.cs @@ -2,7 +2,7 @@ namespace Promitor.Core.Metrics.Prometheus.Collectors.Interfaces { - public interface IAzureScrapingPrometheusMetricsCollector + public interface IAzureScrapingPrometheusMetricsCollector : IPrometheusMetricsCollector { /// /// Sets a new value for a measurement on a gauge diff --git a/src/Promitor.Core/Promitor.Core.csproj b/src/Promitor.Core/Promitor.Core.csproj index f33cd361c..bfa5b16de 100644 --- a/src/Promitor.Core/Promitor.Core.csproj +++ b/src/Promitor.Core/Promitor.Core.csproj @@ -16,6 +16,7 @@ + diff --git a/src/Promitor.Core/RuntimeMetricNames.cs b/src/Promitor.Core/RuntimeMetricNames.cs index 39a5b7d59..1d7b44e5b 100644 --- a/src/Promitor.Core/RuntimeMetricNames.cs +++ b/src/Promitor.Core/RuntimeMetricNames.cs @@ -3,6 +3,9 @@ public static class RuntimeMetricNames { public static string RateLimitingForArm => "promitor_ratelimit_arm"; + public static string ArmThrottled => "promitor_ratelimit_arm_throttled"; + public static string RateLimitingForResourceGraph => "promitor_ratelimit_resource_graph_remaining"; + public static string ResourceGraphThrottled => "promitor_ratelimit_resource_graph_throttled"; public static string ScrapeSuccessful => "promitor_scrape_success"; public static string ScrapeError => "promitor_scrape_error"; } diff --git a/src/Promitor.Integrations.AzureMonitor/RequestHandlers/AzureResourceManagerThrottlingRequestHandler.cs b/src/Promitor.Integrations.AzureMonitor/RequestHandlers/AzureResourceManagerThrottlingRequestHandler.cs index 08d468009..9bd6f0a48 100644 --- a/src/Promitor.Integrations.AzureMonitor/RequestHandlers/AzureResourceManagerThrottlingRequestHandler.cs +++ b/src/Promitor.Integrations.AzureMonitor/RequestHandlers/AzureResourceManagerThrottlingRequestHandler.cs @@ -2,11 +2,10 @@ using System.Collections.Generic; using System.Linq; using System.Net.Http; -using System.Threading; using System.Threading.Tasks; using GuardNet; -using Microsoft.Azure.Management.ResourceManager.Fluent.Core; using Microsoft.Extensions.Logging; +using Promitor.Agents.Core.RequestHandlers; using Promitor.Core; using Promitor.Core.Metrics.Prometheus.Collectors.Interfaces; using Promitor.Core.Metrics.Sinks; @@ -18,12 +17,15 @@ namespace Promitor.Integrations.AzureMonitor.RequestHandlers /// /// Request handler to provide insights on the current api consumption and throttling of Azure Resource Manager /// - public class AzureResourceManagerThrottlingRequestHandler : DelegatingHandlerBase + public class AzureResourceManagerThrottlingRequestHandler : ThrottlingRequestHandler { - private readonly ILogger _logger; private readonly Dictionary _metricLabels; private readonly MetricSinkWriter _metricSinkWriter; private readonly IAzureScrapingPrometheusMetricsCollector _azureScrapingPrometheusMetricsCollector; + public override string DependencyName => "Azure Resource Manager (ARM)"; + private const string ThrottlingHeaderName = "x-ms-ratelimit-remaining-subscription-reads"; + private const string AvailableCallsMetricDescription = "Indication how many calls are still available before Azure Resource Manager (ARM) is going to throttle us."; + private const string ThrottledMetricDescription = "Indication concerning Azure Resource Manager are being throttled. (1 = yes, 0 = no)."; /// /// Constructor @@ -35,15 +37,14 @@ public class AzureResourceManagerThrottlingRequestHandler : DelegatingHandlerBas /// Metrics collector to write metrics to Prometheus /// Logger to write telemetry to public AzureResourceManagerThrottlingRequestHandler(string tenantId, string subscriptionId, AzureAuthenticationInfo azureAuthenticationInfo, MetricSinkWriter metricSinkWriter, IAzureScrapingPrometheusMetricsCollector azureScrapingPrometheusMetricsCollector, ILogger logger) + : base(azureScrapingPrometheusMetricsCollector, logger) { Guard.NotNullOrWhitespace(tenantId, nameof(tenantId)); Guard.NotNullOrWhitespace(subscriptionId, nameof(subscriptionId)); Guard.NotNull(metricSinkWriter, nameof(metricSinkWriter)); Guard.NotNull(azureScrapingPrometheusMetricsCollector, nameof(azureScrapingPrometheusMetricsCollector)); Guard.NotNull(azureAuthenticationInfo, nameof(azureAuthenticationInfo)); - Guard.NotNull(logger, nameof(logger)); - _logger = logger; _metricSinkWriter = metricSinkWriter; _azureScrapingPrometheusMetricsCollector = azureScrapingPrometheusMetricsCollector; @@ -57,37 +58,34 @@ public AzureResourceManagerThrottlingRequestHandler(string tenantId, string subs }; } - protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + protected override HttpRequestMessage BeforeSendingRequest(HttpRequestMessage request) { string agentVersion = Version.Get(); var promitorUserAgent = UserAgent.Generate("Scraper", agentVersion); request.Headers.UserAgent.Clear(); request.Headers.UserAgent.TryParseAdd(promitorUserAgent); - var response = await base.SendAsync(request, cancellationToken); - - await MeasureArmRateLimitingAsync(response); - - if ((int)response.StatusCode == 429) - { - LogArmThrottling(); - } - - return response; + return request; } - - private async Task MeasureArmRateLimitingAsync(HttpResponseMessage response) + + protected override async Task AvailableRateLimitingCallsAsync(HttpResponseMessage response) { // Source: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits - if (response.Headers.Contains("x-ms-ratelimit-remaining-subscription-reads")) + if (response.Headers.Contains(ThrottlingHeaderName)) { - var remainingApiCalls = response.Headers.GetValues("x-ms-ratelimit-remaining-subscription-reads").FirstOrDefault(); + var remainingApiCalls = response.Headers.GetValues(ThrottlingHeaderName).FirstOrDefault(); var subscriptionReadLimit = Convert.ToInt16(remainingApiCalls); - await _metricSinkWriter.ReportMetricAsync(RuntimeMetricNames.RateLimitingForArm, "Indication how many calls are still available before Azure Resource Manager is going to throttle us.", subscriptionReadLimit, _metricLabels); - _azureScrapingPrometheusMetricsCollector.WriteGaugeMeasurement(RuntimeMetricNames.RateLimitingForArm, "Indication how many calls are still available before Azure Resource Manager is going to throttle us.", subscriptionReadLimit, _metricLabels); + + // Report metric + await _metricSinkWriter.ReportMetricAsync(RuntimeMetricNames.RateLimitingForArm, AvailableCallsMetricDescription, subscriptionReadLimit, _metricLabels); + _azureScrapingPrometheusMetricsCollector.WriteGaugeMeasurement(RuntimeMetricNames.RateLimitingForArm, AvailableCallsMetricDescription, subscriptionReadLimit, _metricLabels); } } + protected override Dictionary GetMetricLabels() => _metricLabels; + protected override string GetThrottlingStatusMetricName() => RuntimeMetricNames.ArmThrottled; + protected override string GetThrottlingStatusMetricDescription() => ThrottledMetricDescription; + private string DetermineApplicationId(AzureAuthenticationInfo azureAuthenticationInfo) { switch (azureAuthenticationInfo.Mode) @@ -102,10 +100,5 @@ private string DetermineApplicationId(AzureAuthenticationInfo azureAuthenticatio throw new ArgumentOutOfRangeException(nameof(azureAuthenticationInfo.Mode)); } } - - private void LogArmThrottling() - { - _logger.LogWarning("Azure subscription rate limit reached."); - } } } \ No newline at end of file diff --git a/src/Promitor.Integrations.Sinks.Prometheus/Collectors/AzureScrapingPrometheusMetricsCollector.cs b/src/Promitor.Integrations.Sinks.Prometheus/Collectors/AzureScrapingPrometheusMetricsCollector.cs index 3dc14d3e0..51158d694 100644 --- a/src/Promitor.Integrations.Sinks.Prometheus/Collectors/AzureScrapingPrometheusMetricsCollector.cs +++ b/src/Promitor.Integrations.Sinks.Prometheus/Collectors/AzureScrapingPrometheusMetricsCollector.cs @@ -45,5 +45,10 @@ public void WriteGaugeMeasurement(string name, string description, double value, _prometheusMetricsCollector.WriteGaugeMeasurement(name, description, value, orderedLabels, enableMetricTimestamps); } + + public void WriteGaugeMeasurement(string name, string description, double value, Dictionary labels, bool includeTimestamp) + { + _prometheusMetricsCollector.WriteGaugeMeasurement(name, description, value, labels, includeTimestamp); + } } } diff --git a/src/Promitor.Tests.Integration/Services/ResourceDiscovery/PrometheusSystemMetricsTests.cs b/src/Promitor.Tests.Integration/Services/ResourceDiscovery/PrometheusSystemMetricsTests.cs index e3dea0ce1..69f7ad2df 100644 --- a/src/Promitor.Tests.Integration/Services/ResourceDiscovery/PrometheusSystemMetricsTests.cs +++ b/src/Promitor.Tests.Integration/Services/ResourceDiscovery/PrometheusSystemMetricsTests.cs @@ -1,5 +1,6 @@ using System.Threading.Tasks; using Promitor.Agents.ResourceDiscovery.Scheduling; +using Promitor.Core; using Promitor.Tests.Integration.Clients; using Xunit; using Xunit.Abstractions; @@ -68,5 +69,21 @@ public async Task Prometheus_Scrape_ExpectedAzureResourceGroupInfoMetricIsAvaila Assert.NotNull(gaugeMetric.Measurements); Assert.False(gaugeMetric.Measurements.Count < 1); } + + [Fact] + public async Task Prometheus_Scrape_ExpectedAzureResourceGraphThrottlingMetricIsAvailable() + { + // Arrange + var resourceDiscoveryClient = new ResourceDiscoveryClient(Configuration, Logger); + + // Act + var gaugeMetric = await resourceDiscoveryClient.WaitForPrometheusMetricAsync(RuntimeMetricNames.RateLimitingForResourceGraph); + + // Assert + Assert.NotNull(gaugeMetric); + Assert.Equal(AzureResourceGroupsDiscoveryBackgroundJob.MetricName, gaugeMetric.Name); + Assert.NotNull(gaugeMetric.Measurements); + Assert.False(gaugeMetric.Measurements.Count < 1); + } } } diff --git a/src/Promitor.Tests.Unit/Azure/AzureEnvironmentUnitTests.cs b/src/Promitor.Tests.Unit/Azure/AzureEnvironmentUnitTests.cs new file mode 100644 index 000000000..4adf14b44 --- /dev/null +++ b/src/Promitor.Tests.Unit/Azure/AzureEnvironmentUnitTests.cs @@ -0,0 +1,67 @@ +using System.ComponentModel; +using Microsoft.Azure.Management.ResourceManager.Fluent; +using Promitor.Core.Extensions; +using Xunit; + +namespace Promitor.Tests.Unit.Azure +{ + [Category("Unit")] + public class AzureEnvironmentUnitTests : UnitTest + { + [Fact] + public void GetDisplayName_ForAzureGlobalCloud_ProvidesCorrectDisplayNameEnvironmentInfo() + { + // Arrange + var azureEnvironment = AzureEnvironment.AzureGlobalCloud; + var expectedDisplayName = "Global"; + + // Act + var displayName = azureEnvironment.GetDisplayName(); + + // Assert + Assert.Equal(expectedDisplayName, displayName); + } + + [Fact] + public void GetDisplayName_ForAzureChinaCloud_ProvidesCorrectDisplayNameEnvironmentInfo() + { + // Arrange + var azureEnvironment = AzureEnvironment.AzureChinaCloud; + var expectedDisplayName = "China"; + + // Act + var displayName = azureEnvironment.GetDisplayName(); + + // Assert + Assert.Equal(expectedDisplayName, displayName); + } + + [Fact] + public void GetDisplayName_ForAzureGermanCloud_ProvidesCorrectDisplayNameEnvironmentInfo() + { + // Arrange + var azureEnvironment = AzureEnvironment.AzureGermanCloud; + var expectedDisplayName = "German"; + + // Act + var displayName = azureEnvironment.GetDisplayName(); + + // Assert + Assert.Equal(expectedDisplayName, displayName); + } + + [Fact] + public void GetDisplayName_ForUSGovernmentCloud_ProvidesCorrectDisplayNameEnvironmentInfo() + { + // Arrange + var azureEnvironment = AzureEnvironment.AzureUSGovernment; + var expectedDisplayName = "US Government"; + + // Act + var displayName = azureEnvironment.GetDisplayName(); + + // Assert + Assert.Equal(expectedDisplayName, displayName); + } + } +} From df38388002c2f2361e911986a0de16b7bdb8af17 Mon Sep 17 00:00:00 2001 From: Tom Kerkhove Date: Mon, 23 Aug 2021 15:59:46 +0200 Subject: [PATCH 2/4] Docs! Signed-off-by: Tom Kerkhove --- changelog/content/experimental/unreleased.md | 6 +++ docs/operations/index.md | 51 ++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/changelog/content/experimental/unreleased.md b/changelog/content/experimental/unreleased.md index 483dabea9..ab9eee97a 100644 --- a/changelog/content/experimental/unreleased.md +++ b/changelog/content/experimental/unreleased.md @@ -8,6 +8,8 @@ version: - {{% tag added %}} Provide system metrics related to agent performance & resources ([docs](https://promitor.io/operations/#performance) | [#341](https://github.com/tomkerkhove/promitor/issues/341)) +- {{% tag added %}} Provide system metrics indicating ARM throttling status ([docs](https://promitor.io/operations/#azure-resource-manager-api---consumption--throttling) + | [#1738](https://github.com/tomkerkhove/promitor/issues/1738)) #### Resource Discovery @@ -20,3 +22,7 @@ version: | [#1716](https://github.com/tomkerkhove/promitor/issues/1716)) - {{% tag added %}} Provide system metrics with discovered resource group information ([docs](https://promitor.io/operations/#discovery)) | [#1716](https://github.com/tomkerkhove/promitor/issues/1716)) +- {{% tag added %}} Provide system metrics indicating Azure Resource Graph throttling status ([docs](https://promitor.io/operations/#azure-resource-graph) + | [#1739](https://github.com/tomkerkhove/promitor/issues/1739)) +- {{% tag added %}} Provide system metrics providing insights on Azure Resource Graph rate limiting ([docs](https://promitor.io/operations/#azure-resource-graph) + | [#973](https://github.com/tomkerkhove/promitor/issues/973)) diff --git a/docs/operations/index.md b/docs/operations/index.md index fba11f06b..41e7fc9e1 100644 --- a/docs/operations/index.md +++ b/docs/operations/index.md @@ -18,6 +18,7 @@ Here is an overview of how you can operate Promitor. - [Exploring our REST APIs](#exploring-our-rest-apis) - [Integrations](#integrations) - [Azure Resource Manager API - Consumption & Throttling](#azure-resource-manager-api---consumption--throttling) + - [Azure Resource Graph](#azure-resource-graph) - [Azure Monitor](#azure-monitor) ## Health @@ -212,8 +213,58 @@ Azure Resource Manager API: - `subscription_id` - _Id of the subscription that is being interacted with_ - `app_id` - _Id of the application that is being used to interact with API_ +- `promitor_ratelimit_arm_throttled` - Indication whether or not we are being throttled by Azure Resource Manager + (ARM). Metric provides following labels: + - `tenant_id` - _Id of the tenant that is being interacted with_ + - `subscription_id` - _Id of the subscription that is being interacted with_ + - `app_id` - _Id of the application that is being used to interact with API_ + +```text +# HELP promitor_ratelimit_arm Indication how many calls are still available before Azure Resource Manager (ARM) is going to throttle us. +# TYPE promitor_ratelimit_arm gauge +promitor_ratelimit_arm{tenant_id="c8819874-9e56-4e3f-b1a8-1c0325138f27",subscription_id="0329dd2a-59dc-4493-aa54-cb01cb027dc2",app_id="ceb249a3-44ce-4c90-8863-6776336f5b7e"} 11995 1629719527020 +promitor_ratelimit_arm{tenant_id="c8819874-9e56-4e3f-b1a8-1c0325138f27",subscription_id="0f9d7fea-99e8-4768-8672-06a28514f77e",app_id="ceb249a3-44ce-4c90-8863-6776336f5b7e"} 11989 1629719532626 +# HELP promitor_ratelimit_arm_throttled Indication concerning Azure Resource Manager are being throttled. (1 = yes, 0 = no). +# TYPE promitor_ratelimit_arm_throttled gauge +promitor_ratelimit_arm_throttled{tenant_id="c8819874-9e56-4e3f-b1a8-1c0325138f27",subscription_id="0329dd2a-59dc-4493-aa54-cb01cb027dc2",app_id="ceb249a3-44ce-4c90-8863-6776336f5b7e"} 0 1629719527020 +promitor_ratelimit_arm_throttled{tenant_id="c8819874-9e56-4e3f-b1a8-1c0325138f27",subscription_id="0f9d7fea-99e8-4768-8672-06a28514f77e",app_id="ceb249a3-44ce-4c90-8863-6776336f5b7e"} 0 1629719532626 +``` + You can read more about the Azure Resource Manager limitations on [docs.microsoft.com](https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits). +### Azure Resource Graph + +![Resource Discovery Support Badge](https://img.shields.io/badge/Support%20for%20Resource%20Discovery-Yes-green.svg) +![Scraper Support Badge](https://img.shields.io/badge/Support%20for%20Scraper-No-red.svg) + +Promitor exposes runtime metrics to provide insights on the API consumption of +Azure Resource Graph: + +- `promitor_ratelimit_resource_graph_remaining` - Indication how many calls are still available before + Azure Resource Manager is going to throttle us. Metric provides following labels: + - `tenant_id` - _Id of the tenant that is being interacted with_ + - `cloud` - _Name of the cloud_ + - `auth_mode` - _Authentication mode to authenticate with_ + - `app_id` - _Id of the application that is being used to interact with_ + +- `promitor_ratelimit_resource_graph_throttled` - Indication whether or not we are being throttled by Azure Resource + Graph. Metric provides following labels: + - `tenant_id` - _Id of the tenant that is being interacted with_ + - `cloud` - _Name of the cloud_ + - `auth_mode` - _Authentication mode to authenticate with_ + - `app_id` - _Id of the application that is being used to interact with_ + +```text +# HELP promitor_ratelimit_resource_graph_remaining Indication how many calls are still available before Azure Resource Graph is going to throttle us. +# TYPE promitor_ratelimit_resource_graph_remaining gauge +promitor_ratelimit_resource_graph_remaining{tenant_id="c8819874-9e56-4e3f-b1a8-1c0325138f27",cloud="Global",auth_mode="ServicePrincipal",app_id="67882a00-21d3-4ee7-b32a-430ea0768cd3"} 9 1629719863738 +# HELP promitor_ratelimit_resource_graph_throttled Indication concerning Azure Resource Graph are being throttled. (1 = yes, 0 = no). +# TYPE promitor_ratelimit_resource_graph_throttled gauge +promitor_ratelimit_resource_graph_throttled{tenant_id="c8819874-9e56-4e3f-b1a8-1c0325138f27",cloud="Global",auth_mode="ServicePrincipal",app_id="67882a00-21d3-4ee7-b32a-430ea0768cd3"} 0 1629719863738 +``` + +You can read more about the Azure Resource Graph throttling on [docs.microsoft.com](https://docs.microsoft.com/en-us/azure/governance/resource-graph/overview#throttling). + ### Azure Monitor Promitor interacts with Azure Monitor API to scrape all the required metrics. From d4c27647e64dcb660b40f658f50ad5f0842e92c1 Mon Sep 17 00:00:00 2001 From: Tom Kerkhove Date: Mon, 23 Aug 2021 16:04:58 +0200 Subject: [PATCH 3/4] More tests Signed-off-by: Tom Kerkhove --- .../PrometheusSystemMetricsTests.cs | 18 +++++++++- .../PrometheusSystemMetricsTests.cs | 33 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/Promitor.Tests.Integration/Services/ResourceDiscovery/PrometheusSystemMetricsTests.cs b/src/Promitor.Tests.Integration/Services/ResourceDiscovery/PrometheusSystemMetricsTests.cs index 69f7ad2df..9775d53e2 100644 --- a/src/Promitor.Tests.Integration/Services/ResourceDiscovery/PrometheusSystemMetricsTests.cs +++ b/src/Promitor.Tests.Integration/Services/ResourceDiscovery/PrometheusSystemMetricsTests.cs @@ -81,7 +81,23 @@ public async Task Prometheus_Scrape_ExpectedAzureResourceGraphThrottlingMetricIs // Assert Assert.NotNull(gaugeMetric); - Assert.Equal(AzureResourceGroupsDiscoveryBackgroundJob.MetricName, gaugeMetric.Name); + Assert.Equal(RuntimeMetricNames.RateLimitingForResourceGraph, gaugeMetric.Name); + Assert.NotNull(gaugeMetric.Measurements); + Assert.False(gaugeMetric.Measurements.Count < 1); + } + + [Fact] + public async Task Prometheus_Scrape_ExpectedResourceGraphThrottledMetricIsAvailable() + { + // Arrange + var resourceDiscoveryClient = new ResourceDiscoveryClient(Configuration, Logger); + + // Act + var gaugeMetric = await resourceDiscoveryClient.WaitForPrometheusMetricAsync(RuntimeMetricNames.ResourceGraphThrottled); + + // Assert + Assert.NotNull(gaugeMetric); + Assert.Equal(RuntimeMetricNames.ResourceGraphThrottled, gaugeMetric.Name); Assert.NotNull(gaugeMetric.Measurements); Assert.False(gaugeMetric.Measurements.Count < 1); } diff --git a/src/Promitor.Tests.Integration/Services/Scraper/MetricSinks/PrometheusSystemMetricsTests.cs b/src/Promitor.Tests.Integration/Services/Scraper/MetricSinks/PrometheusSystemMetricsTests.cs index 1ab27f45b..58e0a143b 100644 --- a/src/Promitor.Tests.Integration/Services/Scraper/MetricSinks/PrometheusSystemMetricsTests.cs +++ b/src/Promitor.Tests.Integration/Services/Scraper/MetricSinks/PrometheusSystemMetricsTests.cs @@ -1,4 +1,5 @@ using System.Threading.Tasks; +using Promitor.Core; using Promitor.Tests.Integration.Clients; using Xunit; using Xunit.Abstractions; @@ -34,5 +35,37 @@ public async Task Prometheus_Scrape_ExpectedSystemPerformanceMetricIsAvailable(s Assert.NotNull(gaugeMetric.Measurements); Assert.False(gaugeMetric.Measurements.Count < 1); } + + [Fact] + public async Task Prometheus_Scrape_ExpectedRateLimitingForArmMetricIsAvailable() + { + // Arrange + var resourceDiscoveryClient = new ResourceDiscoveryClient(Configuration, Logger); + + // Act + var gaugeMetric = await resourceDiscoveryClient.WaitForPrometheusMetricAsync(RuntimeMetricNames.RateLimitingForArm); + + // Assert + Assert.NotNull(gaugeMetric); + Assert.Equal(RuntimeMetricNames.RateLimitingForArm, gaugeMetric.Name); + Assert.NotNull(gaugeMetric.Measurements); + Assert.False(gaugeMetric.Measurements.Count < 1); + } + + [Fact] + public async Task Prometheus_Scrape_ExpectedArmThrottledMetricIsAvailable() + { + // Arrange + var resourceDiscoveryClient = new ResourceDiscoveryClient(Configuration, Logger); + + // Act + var gaugeMetric = await resourceDiscoveryClient.WaitForPrometheusMetricAsync(RuntimeMetricNames.ArmThrottled); + + // Assert + Assert.NotNull(gaugeMetric); + Assert.Equal(RuntimeMetricNames.ArmThrottled, gaugeMetric.Name); + Assert.NotNull(gaugeMetric.Measurements); + Assert.False(gaugeMetric.Measurements.Count < 1); + } } } From 8a7ddade9844330a8081740935841937f9dd4a3d Mon Sep 17 00:00:00 2001 From: Tom Kerkhove Date: Tue, 24 Aug 2021 07:21:50 +0200 Subject: [PATCH 4/4] Rely on defaults when discovered info is not available Signed-off-by: Tom Kerkhove --- .../AzureResourceGroupsDiscoveryBackgroundJob.cs | 7 +++---- .../AzureSubscriptionDiscoveryBackgroundJob.cs | 8 ++++---- .../Scheduling/DiscoveryBackgroundJob.cs | 10 ++++++++++ .../MetricSinks/PrometheusSystemMetricsTests.cs | 8 ++++---- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/Promitor.Agents.ResourceDiscovery/Scheduling/AzureResourceGroupsDiscoveryBackgroundJob.cs b/src/Promitor.Agents.ResourceDiscovery/Scheduling/AzureResourceGroupsDiscoveryBackgroundJob.cs index e4fa36e5d..0e1b85b48 100644 --- a/src/Promitor.Agents.ResourceDiscovery/Scheduling/AzureResourceGroupsDiscoveryBackgroundJob.cs +++ b/src/Promitor.Agents.ResourceDiscovery/Scheduling/AzureResourceGroupsDiscoveryBackgroundJob.cs @@ -43,15 +43,14 @@ public async Task ExecuteAsync(CancellationToken cancellationToken) private void ReportDiscoveredAzureInfo(AzureResourceGroupInformation resourceGroupInformation) { - var managedByLabel = string.IsNullOrWhiteSpace(resourceGroupInformation.ManagedBy) ? "n/a" : resourceGroupInformation.ManagedBy; var labels = new Dictionary { { "tenant_id", resourceGroupInformation.TenantId }, { "subscription_id", resourceGroupInformation.SubscriptionId }, { "resource_group_name", resourceGroupInformation.Name }, - { "provisioning_state", resourceGroupInformation.ProvisioningState }, - { "managed_by", managedByLabel }, - { "region", resourceGroupInformation.Region } + { "provisioning_state", GetValueOrDefault(resourceGroupInformation.ProvisioningState, "n/a") }, + { "managed_by", GetValueOrDefault(resourceGroupInformation.ManagedBy, "n/a") }, + { "region", GetValueOrDefault(resourceGroupInformation.Region, "n/a") } }; // Report metric in Prometheus endpoint diff --git a/src/Promitor.Agents.ResourceDiscovery/Scheduling/AzureSubscriptionDiscoveryBackgroundJob.cs b/src/Promitor.Agents.ResourceDiscovery/Scheduling/AzureSubscriptionDiscoveryBackgroundJob.cs index e48448f57..3478ed541 100644 --- a/src/Promitor.Agents.ResourceDiscovery/Scheduling/AzureSubscriptionDiscoveryBackgroundJob.cs +++ b/src/Promitor.Agents.ResourceDiscovery/Scheduling/AzureSubscriptionDiscoveryBackgroundJob.cs @@ -49,10 +49,10 @@ private void ReportDiscoveredAzureInfo(AzureSubscriptionInformation azureLandsca { "tenant_id", azureLandscapeInformation.TenantId }, { "subscription_id", azureLandscapeInformation.Id }, { "subscription_name", azureLandscapeInformation.Name}, - { "quota_id", azureLandscapeInformation.QuotaId}, - { "spending_limit", azureLandscapeInformation.SpendingLimit}, - { "state", azureLandscapeInformation.State}, - { "authorization", azureLandscapeInformation.AuthorizationSource} + { "quota_id", GetValueOrDefault(azureLandscapeInformation.QuotaId, "n/a")}, + { "spending_limit", GetValueOrDefault(azureLandscapeInformation.SpendingLimit, "n/a")}, + { "state", GetValueOrDefault(azureLandscapeInformation.State, "n/a")}, + { "authorization", GetValueOrDefault(azureLandscapeInformation.AuthorizationSource, "n/a")} }; // Report metric in Prometheus endpoint diff --git a/src/Promitor.Agents.ResourceDiscovery/Scheduling/DiscoveryBackgroundJob.cs b/src/Promitor.Agents.ResourceDiscovery/Scheduling/DiscoveryBackgroundJob.cs index d8dc885df..e1591f285 100644 --- a/src/Promitor.Agents.ResourceDiscovery/Scheduling/DiscoveryBackgroundJob.cs +++ b/src/Promitor.Agents.ResourceDiscovery/Scheduling/DiscoveryBackgroundJob.cs @@ -27,5 +27,15 @@ protected void WritePrometheusMetric(string metricName, string metricDescription { _prometheusMetricsCollector.WriteGaugeMeasurement(metricName, metricDescription, value, labels, includeTimestamp: true); } + + protected string GetValueOrDefault(string preferredValue, string alternative) + { + if (string.IsNullOrWhiteSpace(preferredValue)) + { + return alternative; + } + + return preferredValue; + } } } diff --git a/src/Promitor.Tests.Integration/Services/Scraper/MetricSinks/PrometheusSystemMetricsTests.cs b/src/Promitor.Tests.Integration/Services/Scraper/MetricSinks/PrometheusSystemMetricsTests.cs index 58e0a143b..54afbdeaf 100644 --- a/src/Promitor.Tests.Integration/Services/Scraper/MetricSinks/PrometheusSystemMetricsTests.cs +++ b/src/Promitor.Tests.Integration/Services/Scraper/MetricSinks/PrometheusSystemMetricsTests.cs @@ -40,10 +40,10 @@ public async Task Prometheus_Scrape_ExpectedSystemPerformanceMetricIsAvailable(s public async Task Prometheus_Scrape_ExpectedRateLimitingForArmMetricIsAvailable() { // Arrange - var resourceDiscoveryClient = new ResourceDiscoveryClient(Configuration, Logger); + var scraperClient = new ScraperClient(Configuration, Logger); // Act - var gaugeMetric = await resourceDiscoveryClient.WaitForPrometheusMetricAsync(RuntimeMetricNames.RateLimitingForArm); + var gaugeMetric = await scraperClient.WaitForPrometheusMetricAsync(RuntimeMetricNames.RateLimitingForArm); // Assert Assert.NotNull(gaugeMetric); @@ -56,10 +56,10 @@ public async Task Prometheus_Scrape_ExpectedRateLimitingForArmMetricIsAvailable( public async Task Prometheus_Scrape_ExpectedArmThrottledMetricIsAvailable() { // Arrange - var resourceDiscoveryClient = new ResourceDiscoveryClient(Configuration, Logger); + var scraperClient = new ScraperClient(Configuration, Logger); // Act - var gaugeMetric = await resourceDiscoveryClient.WaitForPrometheusMetricAsync(RuntimeMetricNames.ArmThrottled); + var gaugeMetric = await scraperClient.WaitForPrometheusMetricAsync(RuntimeMetricNames.ArmThrottled); // Assert Assert.NotNull(gaugeMetric);