Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metrics and support bundle 110 #3372

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions edge-agent/src/Microsoft.Azure.Devices.Edge.Agent.Core/Agent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class Agent
readonly AsyncLock reconcileLock = new AsyncLock();
readonly ISerde<DeploymentConfigInfo> deploymentConfigInfoSerde;
readonly IEncryptionProvider encryptionProvider;
readonly IAvailabilityMetric availabilityMetric;
readonly IDeploymentMetrics deploymentMetrics;
IEnvironment environment;
DeploymentConfigInfo currentConfig;
DeploymentStatus status;
Expand All @@ -46,7 +46,7 @@ public Agent(
DeploymentConfigInfo initialDeployedConfigInfo,
ISerde<DeploymentConfigInfo> deploymentConfigInfoSerde,
IEncryptionProvider encryptionProvider,
IAvailabilityMetric availabilityMetric)
IDeploymentMetrics deploymentMetrics)
{
this.configSource = Preconditions.CheckNotNull(configSource, nameof(configSource));
this.planner = Preconditions.CheckNotNull(planner, nameof(planner));
Expand All @@ -59,7 +59,7 @@ public Agent(
this.deploymentConfigInfoSerde = Preconditions.CheckNotNull(deploymentConfigInfoSerde, nameof(deploymentConfigInfoSerde));
this.environment = this.environmentProvider.Create(this.currentConfig.DeploymentConfig);
this.encryptionProvider = Preconditions.CheckNotNull(encryptionProvider, nameof(encryptionProvider));
this.availabilityMetric = Preconditions.CheckNotNull(availabilityMetric, nameof(availabilityMetric));
this.deploymentMetrics = Preconditions.CheckNotNull(deploymentMetrics, nameof(deploymentMetrics));
this.status = DeploymentStatus.Unknown;
Events.AgentCreated();
}
Expand All @@ -74,7 +74,7 @@ public static async Task<Agent> Create(
IEntityStore<string, string> configStore,
ISerde<DeploymentConfigInfo> deploymentConfigInfoSerde,
IEncryptionProvider encryptionProvider,
IAvailabilityMetric availabilityMetric)
IDeploymentMetrics deploymentMetrics)
{
Preconditions.CheckNotNull(deploymentConfigInfoSerde, nameof(deploymentConfigInfoSerde));
Preconditions.CheckNotNull(configStore, nameof(configStore));
Expand Down Expand Up @@ -106,7 +106,7 @@ await deploymentConfigInfoJson.ForEachAsync(
deploymentConfigInfo.GetOrElse(DeploymentConfigInfo.Empty),
deploymentConfigInfoSerde,
encryptionProvider,
availabilityMetric);
deploymentMetrics);
return agent;
}

Expand All @@ -133,7 +133,7 @@ public async Task ReconcileAsync(CancellationToken token)
else
{
ModuleSet desiredModuleSet = deploymentConfig.GetModuleSet();
_ = Task.Run(() => this.availabilityMetric.ComputeAvailability(desiredModuleSet, current))
_ = Task.Run(() => this.deploymentMetrics.ComputeAvailability(desiredModuleSet, current))
.ContinueWith(t => Events.UnknownFailure(t.Exception), TaskContinuationOptions.OnlyOnFaulted)
.ConfigureAwait(false);

Expand All @@ -152,11 +152,14 @@ public async Task ReconcileAsync(CancellationToken token)
{
try
{
bool result = await this.planRunner.ExecuteAsync(deploymentConfigInfo.Version, plan, token);
await this.UpdateCurrentConfig(deploymentConfigInfo);
if (result)
using (this.deploymentMetrics.ReportDeploymentTime())
{
this.status = DeploymentStatus.Success;
bool result = await this.planRunner.ExecuteAsync(deploymentConfigInfo.Version, plan, token);
await this.UpdateCurrentConfig(deploymentConfigInfo);
if (result)
{
this.status = DeploymentStatus.Success;
}
}
}
catch (Exception ex) when (!ex.IsFatal())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public static class Constants

public const string NetworkIdKey = "NetworkId";

public const string EdgeletClientApiVersion = "2019-11-05";
public const string EdgeletClientApiVersion = "2020-07-07";

public const string EdgeletInitializationVectorFileName = "IOTEDGE_BACKUP_IV";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core

public class SystemInfo
{
[JsonConstructor]
public SystemInfo(string operatingSystemType, string architecture, string version)
public SystemInfo(string operatingSystemType, string architecture, string version, string serverVersion, string kernelVersion, string operatingSystem, int numCpus)
{
this.OperatingSystemType = operatingSystemType;
this.Architecture = architecture;
this.Version = version;
this.ServerVersion = serverVersion;
this.KernelVersion = kernelVersion;
this.OperatingSystem = operatingSystem;
this.NumCpus = numCpus;
}

public SystemInfo(string operatingSystemType, string architecture, string version)
: this(operatingSystemType, architecture, version, string.Empty, string.Empty, string.Empty, 0)
{
}

public string OperatingSystemType { get; }
Expand All @@ -20,6 +28,14 @@ public SystemInfo(string operatingSystemType, string architecture, string versio

public string Version { get; }

public string ServerVersion { get; }

public string KernelVersion { get; }

public string OperatingSystem { get; }

public int NumCpus { get; }

static SystemInfo Empty { get; } = new SystemInfo(string.Empty, string.Empty, string.Empty);
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright (c) Microsoft. All rights reserved.
namespace Microsoft.Azure.Devices.Edge.Agent.Core.Logs
{
using System;
using System.IO;
using System.Threading.Tasks;

public interface IRequestsUploader
{
Task UploadLogs(string uri, string module, byte[] payload, LogsContentEncoding logsContentEncoding, LogsContentType logsContentType);

Task<Func<ArraySegment<byte>, Task>> GetLogsUploaderCallback(string uri, string module, LogsContentEncoding logsContentEncoding, LogsContentType logsContentType);

Task UploadSupportBundle(string uri, Stream source);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core.Metrics
using Microsoft.Azure.Devices.Edge.Util.Metrics;
using Microsoft.Extensions.Logging;

public class AvailabilityMetrics : IAvailabilityMetric, IDisposable
public class DeploymentMetrics : IDeploymentMetrics, IDisposable
{
readonly IMetricsGauge running;
readonly IMetricsGauge expectedRunning;
readonly IMetricsCounter unsuccessfulSyncs;
readonly IMetricsCounter totalSyncs;
readonly IMetricsHistogram deploymentTime;

readonly ISystemTime systemTime;
readonly ILogger log = Logger.Factory.CreateLogger<AvailabilityMetrics>();
readonly ILogger log = Logger.Factory.CreateLogger<DeploymentMetrics>();

// This allows edgeAgent to track its own avaliability. If edgeAgent shutsdown unexpectedly, it can look at the last checkpoint time to determine its previous avaliability.
readonly TimeSpan checkpointFrequency = TimeSpan.FromMinutes(5);
Expand All @@ -26,7 +30,7 @@ public class AvailabilityMetrics : IAvailabilityMetric, IDisposable
readonly List<Availability> availabilities;
readonly Lazy<Availability> edgeAgent;

public AvailabilityMetrics(IMetricsProvider metricsProvider, string storageFolder, ISystemTime time = null)
public DeploymentMetrics(IMetricsProvider metricsProvider, string storageFolder, ISystemTime time = null)
{
this.systemTime = time ?? SystemTime.Instance;
this.availabilities = new List<Availability>();
Expand All @@ -43,6 +47,21 @@ public AvailabilityMetrics(IMetricsProvider metricsProvider, string storageFolde
"The amount of time the module was specified in the deployment",
new List<string> { "module_name", MetricsConstants.MsTelemetry });

this.unsuccessfulSyncs = metricsProvider.CreateCounter(
"total_unsuccessful_iothub_syncs",
"The amount of times edgeAgent failed to sync with iotHub",
new List<string> { MetricsConstants.MsTelemetry });

this.totalSyncs = metricsProvider.CreateCounter(
"total_iothub_syncs",
"The amount of times edgeAgent attempted to sync with iotHub, both successful and unsuccessful",
new List<string> { MetricsConstants.MsTelemetry });

this.deploymentTime = metricsProvider.CreateHistogram(
"deployment_time_seconds",
"The amount of time it took to complete a new deployment",
new List<string> { MetricsConstants.MsTelemetry });

string storageDirectory = Path.Combine(Preconditions.CheckNonWhiteSpace(storageFolder, nameof(storageFolder)), "availability");
try
{
Expand Down Expand Up @@ -132,6 +151,22 @@ public void IndicateCleanShutdown()
}
}

public IDisposable ReportDeploymentTime()
{
return DurationMeasurer.MeasureDuration(duration => this.deploymentTime.Update(duration.TotalSeconds, new string[] { true.ToString() }));
}

public void ReportIotHubSync(bool successful)
{
string[] tags = { true.ToString() };
this.totalSyncs.Increment(1, tags);

if (!successful)
{
this.unsuccessfulSyncs.Increment(1, tags);
}
}

TimeSpan CalculateEdgeAgentDowntime()
{
try
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

namespace Microsoft.Azure.Devices.Edge.Agent.Core.Metrics
{
public interface IAvailabilityMetric
using System;

public interface IDeploymentMetrics
{
void ComputeAvailability(ModuleSet desired, ModuleSet current);
void IndicateCleanShutdown();
void ReportIotHubSync(bool successful);
IDisposable ReportDeploymentTime();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (c) Microsoft. All rights reserved.

namespace Microsoft.Azure.Devices.Edge.Agent.Core.Metrics
{
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using Microsoft.Azure.Devices.Edge.Util;
using Microsoft.Azure.Devices.Edge.Util.Metrics;
using Microsoft.Extensions.Logging;

public class MetadataMetrics
{
readonly IMetricsGauge metaData;
readonly Func<Task<SystemInfo>> getSystemMetadata;

public MetadataMetrics(IMetricsProvider metricsProvider, Func<Task<SystemInfo>> getSystemMetadata)
{
this.getSystemMetadata = Preconditions.CheckNotNull(getSystemMetadata, nameof(getSystemMetadata));

Preconditions.CheckNotNull(metricsProvider, nameof(metricsProvider));
this.metaData = metricsProvider.CreateGauge(
"metadata",
"General metadata about the device. The value is always 0, information is encoded in the tags.",
new List<string> { "edge_agent_version", "experimental_features", "host_information", MetricsConstants.MsTelemetry });
}

public async Task Start(ILogger logger, string agentVersion, string experimentalFeatures)
{
logger.LogInformation("Collecting metadata metrics");
string edgeletVersion = Newtonsoft.Json.JsonConvert.SerializeObject(await this.getSystemMetadata());

string[] values = { agentVersion, experimentalFeatures, edgeletVersion, true.ToString() };
this.metaData.Set(0, values);
logger.LogInformation($"Set metadata metrics: {values.Join(", ")}");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core.Requests
using Microsoft.Azure.Devices.Edge.Util.Json;
using Newtonsoft.Json;

public class LogsRequest
public class ModuleLogsRequest
{
public LogsRequest(
public ModuleLogsRequest(
string schemaVersion,
List<LogRequestItem> items,
LogsContentEncoding encoding,
Expand All @@ -23,7 +23,7 @@ public LogsRequest(
}

[JsonConstructor]
LogsRequest(
ModuleLogsRequest(
string schemaVersion,
List<LogRequestItem> items,
LogsContentEncoding? encoding,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core.Requests
using Microsoft.Azure.Devices.Edge.Util;
using Microsoft.Extensions.Logging;

public class LogsRequestHandler : RequestHandlerBase<LogsRequest, IEnumerable<LogsResponse>>
public class ModuleLogsRequestHandler : RequestHandlerBase<ModuleLogsRequest, IEnumerable<ModuleLogsResponse>>
{
const int MaxTailValue = 500;

Expand All @@ -20,17 +20,17 @@ public class LogsRequestHandler : RequestHandlerBase<LogsRequest, IEnumerable<Lo
readonly ILogsProvider logsProvider;
readonly IRuntimeInfoProvider runtimeInfoProvider;

public LogsRequestHandler(ILogsProvider logsProvider, IRuntimeInfoProvider runtimeInfoProvider)
public ModuleLogsRequestHandler(ILogsProvider logsProvider, IRuntimeInfoProvider runtimeInfoProvider)
{
this.logsProvider = Preconditions.CheckNotNull(logsProvider, nameof(logsProvider));
this.runtimeInfoProvider = Preconditions.CheckNotNull(runtimeInfoProvider, nameof(runtimeInfoProvider));
}

public override string RequestName => "GetLogs";

protected override async Task<Option<IEnumerable<LogsResponse>>> HandleRequestInternal(Option<LogsRequest> payloadOption, CancellationToken cancellationToken)
protected override async Task<Option<IEnumerable<ModuleLogsResponse>>> HandleRequestInternal(Option<ModuleLogsRequest> payloadOption, CancellationToken cancellationToken)
{
LogsRequest payload = payloadOption.Expect(() => new ArgumentException("Request payload not found"));
ModuleLogsRequest payload = payloadOption.Expect(() => new ArgumentException("Request payload not found"));
if (ExpectedSchemaVersion.CompareMajorVersion(payload.SchemaVersion, "logs upload request schema") != 0)
{
Events.MismatchedMinorVersions(payload.SchemaVersion, ExpectedSchemaVersion);
Expand All @@ -47,7 +47,7 @@ protected override async Task<Option<IEnumerable<LogsResponse>>> HandleRequestIn
false);

IList<(string id, ModuleLogOptions logOptions)> logOptionsList = await requestToOptionsMapper.MapToLogOptions(payload.Items, cancellationToken);
IEnumerable<Task<LogsResponse>> uploadLogsTasks = logOptionsList.Select(
IEnumerable<Task<ModuleLogsResponse>> uploadLogsTasks = logOptionsList.Select(
async l =>
{
Events.ReceivedLogOptions(l);
Expand All @@ -65,17 +65,17 @@ protected override async Task<Option<IEnumerable<LogsResponse>>> HandleRequestIn

Events.ReceivedModuleLogs(moduleLogs, l.id);
return logOptions.ContentEncoding == LogsContentEncoding.Gzip
? new LogsResponse(l.id, moduleLogs)
: new LogsResponse(l.id, moduleLogs.FromBytes());
? new ModuleLogsResponse(l.id, moduleLogs)
: new ModuleLogsResponse(l.id, moduleLogs.FromBytes());
});
IEnumerable<LogsResponse> response = await Task.WhenAll(uploadLogsTasks);
IEnumerable<ModuleLogsResponse> response = await Task.WhenAll(uploadLogsTasks);
return Option.Some(response);
}

static class Events
{
const int IdStart = AgentEventIds.LogsRequestHandler;
static readonly ILogger Log = Logger.Factory.CreateLogger<LogsRequestHandler>();
static readonly ILogger Log = Logger.Factory.CreateLogger<ModuleLogsRequestHandler>();

enum EventIds
{
Expand Down Expand Up @@ -107,7 +107,7 @@ public static void ReceivedLogOptions((string id, ModuleLogOptions logOptions) r
}
}

public static void ProcessingRequest(LogsRequest payload)
public static void ProcessingRequest(ModuleLogsRequest payload)
{
Log.LogInformation((int)EventIds.ProcessingRequest, $"Processing request to get logs for {payload.ToJson()}");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@ namespace Microsoft.Azure.Devices.Edge.Agent.Core.Requests
using Microsoft.Azure.Devices.Edge.Util.Json;
using Newtonsoft.Json;

public class LogsResponse
public class ModuleLogsResponse
{
public LogsResponse(string id, byte[] payloadBytes)
public ModuleLogsResponse(string id, byte[] payloadBytes)
: this(id, null, payloadBytes)
{
}

public LogsResponse(string id, string payload)
public ModuleLogsResponse(string id, string payload)
: this(id, payload, null)
{
}

[JsonConstructor]
LogsResponse(string id, string payload, byte[] payloadBytes)
ModuleLogsResponse(string id, string payload, byte[] payloadBytes)
{
this.Id = Preconditions.CheckNonWhiteSpace(id, nameof(id));
this.PayloadBytes = Option.Maybe(payloadBytes);
Expand Down
Loading