Skip to content

Commit

Permalink
Metrics and support bundle 110 (#3372)
Browse files Browse the repository at this point in the history
  • Loading branch information
lfitchett authored Aug 7, 2020
1 parent 65d21e1 commit b0a872a
Show file tree
Hide file tree
Showing 71 changed files with 4,696 additions and 1,088 deletions.
23 changes: 13 additions & 10 deletions edge-agent/src/Microsoft.Azure.Devices.Edge.Agent.Core/Agent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class Agent
readonly AsyncLock reconcileLock = new AsyncLock();
readonly ISerde<DeploymentConfigInfo> deploymentConfigInfoSerde;
readonly IEncryptionProvider encryptionProvider;
readonly IAvailabilityMetric availabilityMetric;
readonly IDeploymentMetrics deploymentMetrics;
IEnvironment environment;
DeploymentConfigInfo currentConfig;
DeploymentStatus status;
Expand All @@ -46,7 +46,7 @@ public Agent(
DeploymentConfigInfo initialDeployedConfigInfo,
ISerde<DeploymentConfigInfo> deploymentConfigInfoSerde,
IEncryptionProvider encryptionProvider,
IAvailabilityMetric availabilityMetric)
IDeploymentMetrics deploymentMetrics)
{
this.configSource = Preconditions.CheckNotNull(configSource, nameof(configSource));
this.planner = Preconditions.CheckNotNull(planner, nameof(planner));
Expand All @@ -59,7 +59,7 @@ public Agent(
this.deploymentConfigInfoSerde = Preconditions.CheckNotNull(deploymentConfigInfoSerde, nameof(deploymentConfigInfoSerde));
this.environment = this.environmentProvider.Create(this.currentConfig.DeploymentConfig);
this.encryptionProvider = Preconditions.CheckNotNull(encryptionProvider, nameof(encryptionProvider));
this.availabilityMetric = Preconditions.CheckNotNull(availabilityMetric, nameof(availabilityMetric));
this.deploymentMetrics = Preconditions.CheckNotNull(deploymentMetrics, nameof(deploymentMetrics));
this.status = DeploymentStatus.Unknown;
Events.AgentCreated();
}
Expand All @@ -74,7 +74,7 @@ public static async Task<Agent> Create(
IEntityStore<string, string> configStore,
ISerde<DeploymentConfigInfo> deploymentConfigInfoSerde,
IEncryptionProvider encryptionProvider,
IAvailabilityMetric availabilityMetric)
IDeploymentMetrics deploymentMetrics)
{
Preconditions.CheckNotNull(deploymentConfigInfoSerde, nameof(deploymentConfigInfoSerde));
Preconditions.CheckNotNull(configStore, nameof(configStore));
Expand Down Expand Up @@ -106,7 +106,7 @@ await deploymentConfigInfoJson.ForEachAsync(
deploymentConfigInfo.GetOrElse(DeploymentConfigInfo.Empty),
deploymentConfigInfoSerde,
encryptionProvider,
availabilityMetric);
deploymentMetrics);
return agent;
}

Expand All @@ -133,7 +133,7 @@ public async Task ReconcileAsync(CancellationToken token)
else
{
ModuleSet desiredModuleSet = deploymentConfig.GetModuleSet();
_ = Task.Run(() => this.availabilityMetric.ComputeAvailability(desiredModuleSet, current))
_ = Task.Run(() => this.deploymentMetrics.ComputeAvailability(desiredModuleSet, current))
.ContinueWith(t => Events.UnknownFailure(t.Exception), TaskContinuationOptions.OnlyOnFaulted)
.ConfigureAwait(false);

Expand All @@ -152,11 +152,14 @@ public async Task ReconcileAsync(CancellationToken token)
{
try
{
bool result = await this.planRunner.ExecuteAsync(deploymentConfigInfo.Version, plan, token);
await this.UpdateCurrentConfig(deploymentConfigInfo);
if (result)
using (this.deploymentMetrics.ReportDeploymentTime())
{
this.status = DeploymentStatus.Success;
bool result = await this.planRunner.ExecuteAsync(deploymentConfigInfo.Version, plan, token);
await this.UpdateCurrentConfig(deploymentConfigInfo);
if (result)
{
this.status = DeploymentStatus.Success;
}
}
}
catch (Exception ex) when (!ex.IsFatal())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public static class Constants

public const string NetworkIdKey = "NetworkId";

public const string EdgeletClientApiVersion = "2019-11-05";
public const string EdgeletClientApiVersion = "2020-07-07";

public const string EdgeletInitializationVectorFileName = "IOTEDGE_BACKUP_IV";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core

public class SystemInfo
{
[JsonConstructor]
public SystemInfo(string operatingSystemType, string architecture, string version)
public SystemInfo(string operatingSystemType, string architecture, string version, string serverVersion, string kernelVersion, string operatingSystem, int numCpus)
{
this.OperatingSystemType = operatingSystemType;
this.Architecture = architecture;
this.Version = version;
this.ServerVersion = serverVersion;
this.KernelVersion = kernelVersion;
this.OperatingSystem = operatingSystem;
this.NumCpus = numCpus;
}

public SystemInfo(string operatingSystemType, string architecture, string version)
: this(operatingSystemType, architecture, version, string.Empty, string.Empty, string.Empty, 0)
{
}

public string OperatingSystemType { get; }
Expand All @@ -20,6 +28,14 @@ public SystemInfo(string operatingSystemType, string architecture, string versio

public string Version { get; }

public string ServerVersion { get; }

public string KernelVersion { get; }

public string OperatingSystem { get; }

public int NumCpus { get; }

static SystemInfo Empty { get; } = new SystemInfo(string.Empty, string.Empty, string.Empty);
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright (c) Microsoft. All rights reserved.
namespace Microsoft.Azure.Devices.Edge.Agent.Core.Logs
{
using System;
using System.IO;
using System.Threading.Tasks;

public interface IRequestsUploader
{
Task UploadLogs(string uri, string module, byte[] payload, LogsContentEncoding logsContentEncoding, LogsContentType logsContentType);

Task<Func<ArraySegment<byte>, Task>> GetLogsUploaderCallback(string uri, string module, LogsContentEncoding logsContentEncoding, LogsContentType logsContentType);

Task UploadSupportBundle(string uri, Stream source);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core.Metrics
using Microsoft.Azure.Devices.Edge.Util.Metrics;
using Microsoft.Extensions.Logging;

public class AvailabilityMetrics : IAvailabilityMetric, IDisposable
public class DeploymentMetrics : IDeploymentMetrics, IDisposable
{
readonly IMetricsGauge running;
readonly IMetricsGauge expectedRunning;
readonly IMetricsCounter unsuccessfulSyncs;
readonly IMetricsCounter totalSyncs;
readonly IMetricsHistogram deploymentTime;

readonly ISystemTime systemTime;
readonly ILogger log = Logger.Factory.CreateLogger<AvailabilityMetrics>();
readonly ILogger log = Logger.Factory.CreateLogger<DeploymentMetrics>();

// This allows edgeAgent to track its own avaliability. If edgeAgent shutsdown unexpectedly, it can look at the last checkpoint time to determine its previous avaliability.
readonly TimeSpan checkpointFrequency = TimeSpan.FromMinutes(5);
Expand All @@ -26,7 +30,7 @@ public class AvailabilityMetrics : IAvailabilityMetric, IDisposable
readonly List<Availability> availabilities;
readonly Lazy<Availability> edgeAgent;

public AvailabilityMetrics(IMetricsProvider metricsProvider, string storageFolder, ISystemTime time = null)
public DeploymentMetrics(IMetricsProvider metricsProvider, string storageFolder, ISystemTime time = null)
{
this.systemTime = time ?? SystemTime.Instance;
this.availabilities = new List<Availability>();
Expand All @@ -43,6 +47,21 @@ public AvailabilityMetrics(IMetricsProvider metricsProvider, string storageFolde
"The amount of time the module was specified in the deployment",
new List<string> { "module_name", MetricsConstants.MsTelemetry });

this.unsuccessfulSyncs = metricsProvider.CreateCounter(
"total_unsuccessful_iothub_syncs",
"The amount of times edgeAgent failed to sync with iotHub",
new List<string> { MetricsConstants.MsTelemetry });

this.totalSyncs = metricsProvider.CreateCounter(
"total_iothub_syncs",
"The amount of times edgeAgent attempted to sync with iotHub, both successful and unsuccessful",
new List<string> { MetricsConstants.MsTelemetry });

this.deploymentTime = metricsProvider.CreateHistogram(
"deployment_time_seconds",
"The amount of time it took to complete a new deployment",
new List<string> { MetricsConstants.MsTelemetry });

string storageDirectory = Path.Combine(Preconditions.CheckNonWhiteSpace(storageFolder, nameof(storageFolder)), "availability");
try
{
Expand Down Expand Up @@ -132,6 +151,22 @@ public void IndicateCleanShutdown()
}
}

public IDisposable ReportDeploymentTime()
{
return DurationMeasurer.MeasureDuration(duration => this.deploymentTime.Update(duration.TotalSeconds, new string[] { true.ToString() }));
}

public void ReportIotHubSync(bool successful)
{
string[] tags = { true.ToString() };
this.totalSyncs.Increment(1, tags);

if (!successful)
{
this.unsuccessfulSyncs.Increment(1, tags);
}
}

TimeSpan CalculateEdgeAgentDowntime()
{
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

namespace Microsoft.Azure.Devices.Edge.Agent.Core.Metrics
{
public interface IAvailabilityMetric
using System;

public interface IDeploymentMetrics
{
void ComputeAvailability(ModuleSet desired, ModuleSet current);
void IndicateCleanShutdown();
void ReportIotHubSync(bool successful);
IDisposable ReportDeploymentTime();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (c) Microsoft. All rights reserved.

namespace Microsoft.Azure.Devices.Edge.Agent.Core.Metrics
{
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using Microsoft.Azure.Devices.Edge.Util;
using Microsoft.Azure.Devices.Edge.Util.Metrics;
using Microsoft.Extensions.Logging;

public class MetadataMetrics
{
readonly IMetricsGauge metaData;
readonly Func<Task<SystemInfo>> getSystemMetadata;

public MetadataMetrics(IMetricsProvider metricsProvider, Func<Task<SystemInfo>> getSystemMetadata)
{
this.getSystemMetadata = Preconditions.CheckNotNull(getSystemMetadata, nameof(getSystemMetadata));

Preconditions.CheckNotNull(metricsProvider, nameof(metricsProvider));
this.metaData = metricsProvider.CreateGauge(
"metadata",
"General metadata about the device. The value is always 0, information is encoded in the tags.",
new List<string> { "edge_agent_version", "experimental_features", "host_information", MetricsConstants.MsTelemetry });
}

public async Task Start(ILogger logger, string agentVersion, string experimentalFeatures)
{
logger.LogInformation("Collecting metadata metrics");
string edgeletVersion = Newtonsoft.Json.JsonConvert.SerializeObject(await this.getSystemMetadata());

string[] values = { agentVersion, experimentalFeatures, edgeletVersion, true.ToString() };
this.metaData.Set(0, values);
logger.LogInformation($"Set metadata metrics: {values.Join(", ")}");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core.Requests
using Microsoft.Azure.Devices.Edge.Util.Json;
using Newtonsoft.Json;

public class LogsRequest
public class ModuleLogsRequest
{
public LogsRequest(
public ModuleLogsRequest(
string schemaVersion,
List<LogRequestItem> items,
LogsContentEncoding encoding,
Expand All @@ -23,7 +23,7 @@ public LogsRequest(
}

[JsonConstructor]
LogsRequest(
ModuleLogsRequest(
string schemaVersion,
List<LogRequestItem> items,
LogsContentEncoding? encoding,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core.Requests
using Microsoft.Azure.Devices.Edge.Util;
using Microsoft.Extensions.Logging;

public class LogsRequestHandler : RequestHandlerBase<LogsRequest, IEnumerable<LogsResponse>>
public class ModuleLogsRequestHandler : RequestHandlerBase<ModuleLogsRequest, IEnumerable<ModuleLogsResponse>>
{
const int MaxTailValue = 500;

Expand All @@ -20,17 +20,17 @@ public class LogsRequestHandler : RequestHandlerBase<LogsRequest, IEnumerable<Lo
readonly ILogsProvider logsProvider;
readonly IRuntimeInfoProvider runtimeInfoProvider;

public LogsRequestHandler(ILogsProvider logsProvider, IRuntimeInfoProvider runtimeInfoProvider)
public ModuleLogsRequestHandler(ILogsProvider logsProvider, IRuntimeInfoProvider runtimeInfoProvider)
{
this.logsProvider = Preconditions.CheckNotNull(logsProvider, nameof(logsProvider));
this.runtimeInfoProvider = Preconditions.CheckNotNull(runtimeInfoProvider, nameof(runtimeInfoProvider));
}

public override string RequestName => "GetLogs";

protected override async Task<Option<IEnumerable<LogsResponse>>> HandleRequestInternal(Option<LogsRequest> payloadOption, CancellationToken cancellationToken)
protected override async Task<Option<IEnumerable<ModuleLogsResponse>>> HandleRequestInternal(Option<ModuleLogsRequest> payloadOption, CancellationToken cancellationToken)
{
LogsRequest payload = payloadOption.Expect(() => new ArgumentException("Request payload not found"));
ModuleLogsRequest payload = payloadOption.Expect(() => new ArgumentException("Request payload not found"));
if (ExpectedSchemaVersion.CompareMajorVersion(payload.SchemaVersion, "logs upload request schema") != 0)
{
Events.MismatchedMinorVersions(payload.SchemaVersion, ExpectedSchemaVersion);
Expand All @@ -47,7 +47,7 @@ protected override async Task<Option<IEnumerable<LogsResponse>>> HandleRequestIn
false);

IList<(string id, ModuleLogOptions logOptions)> logOptionsList = await requestToOptionsMapper.MapToLogOptions(payload.Items, cancellationToken);
IEnumerable<Task<LogsResponse>> uploadLogsTasks = logOptionsList.Select(
IEnumerable<Task<ModuleLogsResponse>> uploadLogsTasks = logOptionsList.Select(
async l =>
{
Events.ReceivedLogOptions(l);
Expand All @@ -65,17 +65,17 @@ protected override async Task<Option<IEnumerable<LogsResponse>>> HandleRequestIn
Events.ReceivedModuleLogs(moduleLogs, l.id);
return logOptions.ContentEncoding == LogsContentEncoding.Gzip
? new LogsResponse(l.id, moduleLogs)
: new LogsResponse(l.id, moduleLogs.FromBytes());
? new ModuleLogsResponse(l.id, moduleLogs)
: new ModuleLogsResponse(l.id, moduleLogs.FromBytes());
});
IEnumerable<LogsResponse> response = await Task.WhenAll(uploadLogsTasks);
IEnumerable<ModuleLogsResponse> response = await Task.WhenAll(uploadLogsTasks);
return Option.Some(response);
}

static class Events
{
const int IdStart = AgentEventIds.LogsRequestHandler;
static readonly ILogger Log = Logger.Factory.CreateLogger<LogsRequestHandler>();
static readonly ILogger Log = Logger.Factory.CreateLogger<ModuleLogsRequestHandler>();

enum EventIds
{
Expand Down Expand Up @@ -107,7 +107,7 @@ public static void ReceivedLogOptions((string id, ModuleLogOptions logOptions) r
}
}

public static void ProcessingRequest(LogsRequest payload)
public static void ProcessingRequest(ModuleLogsRequest payload)
{
Log.LogInformation((int)EventIds.ProcessingRequest, $"Processing request to get logs for {payload.ToJson()}");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core.Requests
using Microsoft.Azure.Devices.Edge.Util.Json;
using Newtonsoft.Json;

public class LogsResponse
public class ModuleLogsResponse
{
public LogsResponse(string id, byte[] payloadBytes)
public ModuleLogsResponse(string id, byte[] payloadBytes)
: this(id, null, payloadBytes)
{
}

public LogsResponse(string id, string payload)
public ModuleLogsResponse(string id, string payload)
: this(id, payload, null)
{
}

[JsonConstructor]
LogsResponse(string id, string payload, byte[] payloadBytes)
ModuleLogsResponse(string id, string payload, byte[] payloadBytes)
{
this.Id = Preconditions.CheckNonWhiteSpace(id, nameof(id));
this.PayloadBytes = Option.Maybe(payloadBytes);
Expand Down
Loading

0 comments on commit b0a872a

Please sign in to comment.