Example Alerts for Azure Resource Graph Data


Alerting on Azure Resource Graph data recently went public preview. To succesfully create alerts in Azure Monitor using ARG Data you will need a Managed Identity with reader access to:

  • any resource(s) you want to alert on
  • log analytics reader on a Log Analytics Workspace

Alerts for Compute Resources

Performance & Heartbeat Alerts

Heartbeat Alert filtered by Resource Tags

let Threshold = 10m;
| where type =~ 'microsoft.compute/virtualmachines' or type =~ 'microsoft.hybridcompute/machines'
| where isnotempty(tags)
| where tags !has "hidden-"
| mv-expand bagexpansion=array tags limit 400
| extend tagName = tags[0], tagValue = tags[1]
| where tagName == "Environment" and tagValue == "SandlotLab"
| project ResourceId = id, tagName, tagValue
| join (
       | summarize LastHeartBeat = max(TimeGenerated), arg_max(SubscriptionId, ResourceGroup, Computer) by ResourceId) 
    on ResourceId
| where LastHeartBeat < ago(Threshold)
| project Computer, ResourceId, ResourceGroup, SubscriptionId, LastHeartBeat

Alet on high CPU Usage filtered by Resource Tags

let Threshold = 90;
| where type =~ 'microsoft.compute/virtualmachines' or type =~ 'microsoft.hybridcompute/machines'
| where isnotempty(tags)
| where tags !has "hidden-"
| mv-expand bagexpansion=array tags limit 400
| extend tagName = tags[0], tagValue = tags[1]
| where tagName == "Environment" and tagValue == "SandlotLab"
| project ResourceId = tolower(id), name, tagName, tagValue
| join(
        | where CounterName == "% Processor Time"
        | summarize ['% Processor']=avg(CounterValue) by ResourceId = tolower(_ResourceId)
        | where ['% Processor'] > Threshold)
    on ResourceId
| project ['% Processor'],ResourceId, name, tagName, tagValue

Alert on Disk Free space filtered by Resource Tags

let Threshold = 10;
| where type =~ 'microsoft.compute/virtualmachines' or type =~ 'microsoft.hybridcompute/machines'
| where isnotempty(tags)
| where tags !has "hidden-"
| mv-expand bagexpansion=array tags limit 400
| extend tagName = tags[0], tagValue = tags[1]
| where tagName == "Environment" and tagValue == "SandlotLab"
| project ResourceId = tolower(id), name, tagName, tagValue
| join(
        | where CounterName == "% Free Space"
        | summarize ['% Free Space']=avg(CounterValue) by ResourceId = tolower(_ResourceId)
        | where ['% Free Space'] < Threshold
    on ResourceId
| project ['% Free Space'],ResourceId, name, tagName, tagValue

Alert on Memory Usage using VMInsights Filtered by Resource Tags

let NameKey = "Environment";
let NameValue = "Sandlot";
| where type =~ 'microsoft.compute/virtualmachines' or type =~ 'microsoft.hybridcompute/machines'
| where isnotempty(tags)
| where tags !has "hidden-"
| mv-expand bagexpansion=array tags limit 400
| extend tagName = tags[0], tagValue = tags[1]
| where tagName == NameKey and tagValue == NameValue
| join(
        | where Namespace == "Memory"
        | extend Tags = todynamic(Tags)
        | extend TotalMem = toreal(Tags[''])
        | summarize RemainMB = avg(TotalMem - Val), 
                    ['Percent Used'] = avg(Val / TotalMem) by _ResourceId
        | where RemainMB < 500 or ['Percent Used'] > 90)
on ResourceId

Alerts for Azure Update Manager

Alert on VMs needing patches

| where type has "softwarepatches"
| extend id = tolower(id)
| parse id with resourceId "/patchassessmentresults" *
| where isnotnull(properties.kbId)
| extend
    MissingUpdate = tostring(properties.patchName),
    Classification = tostring(properties.classifications[0])
| extend UpdatesNeeded = pack_array(MissingUpdate, Classification)
| summarize UpdatesNeeded = make_set(UpdatesNeeded), Count= count() by resourceId

Alert on failed update runs

| extend failed = toint(properties.resourceUpdateSummary.failed), 
        timeout = toint(properties.resourceUpdateSummary.timedout), 
        maintenanceId = tostring(properties.maintenanceConfigurationId),
        EndTime = todatetime(properties.endDateTime)
| where failed > 0 or timeout > 0 
| where EndTime > ago(12h)
| summarize Failed=count() by maintenanceId

Alert on Azure Resources

Alert on Classic Application Insights

| where type =~ 'microsoft.insights/components'
| extend WorkspaceType = properties.IngestionMode
| where WorkspaceType =~ "ApplicationInsights"
| project id, subscriptionId, resourceGroup, WorkspaceType

Alert on App Service Certification Expiration

| where type =~ "microsoft.web/certificates"
| extend expirationDate = todatetime(properties.expirationDate)
| extend ExpirationStatus = datetime_diff("day", expirationDate, now())
| extend Expiration = case(
                ExpirationStatus <= 30 and ExpirationStatus >= 0, "Expires Soon",
                ExpirationStatus < 0, "Expired",
                ExpirationStatus > 30, "Good","")          
| where Expiration != "Good"
| project id, subscriptionId, expirationDate, Expiration, ExpirationStatus

Alert on Orhpaned Resources

Alert on orhpaned public IPs

| where type =~ ""
| extend ipConfig = tostring(properties.ipConfiguration)
| where isempty(ipConfig)
| project id, subscriptionId, ipConfig

Alert on orphaned disks

| where type has "microsoft.compute/disks"
| extend diskState = tostring(properties.diskState)
| where managedBy == ""
| where not(name endswith "-ASRReplica" or name startswith "ms-asr-" or name startswith "asrseeddisk-")
| project id, subscriptionId

Alert on Empty App Service Plans

| where type =~ "microsoft.web/serverfarms"
| extend sites = toint(properties.numberOfSites),
        createdTime = todatetime(properties.createdTime)
| where sites == 0 and createdTime < ago(7d)
| summarize EmptyAppServicePlans = count() by subscriptionId

Alert on Empty Availability Sets

| where type =~ 'Microsoft.Compute/availabilitySets'
| where properties.virtualMachines == "[]"
| summarize EmptyAvailabilitySets = count() subscriptionId
| project-away tenantId

Alert on Empty Load Balancers

| where type == ""
| extend properties.backendAddressPools == "[]"
| summarize EmptyLoadBalancers = count() subscriptionId
| project-away tenantId

Alert on Orhpaned NICs

| where type =~ ""
| where isnull(properties.privateEndpoint)
        and isnull(properties.privateLinkService)
        and properties.hostedWorkloads == "[]"
        and properties !has 'virtualmachine'
| summarize EmptyNICs = count() by subscriptionId

Alert on unused NAT Gateway

| where type == ""
| where isnull(properties.subnets)
| summarize count(type) by subscriptionId

Alert on Resource Changes

Alert on any resource deletion

| extend Target = tostring(properties.targetResourceType),
        changeType = tostring(properties.changeType),
        targetResourceId = tostring(properties.targetResourceId),
        timestamp = todatetime(properties.changeAttributes.timestamp),
        correlationId = todatetime(properties.changeAttributes.correlationId)
| where changeType == "Delete"

vNet Peering state is disconnected

| extend Target = tostring(properties.targetResourceType),
        PeeringStatus = properties.changes["properties.virtualNetworkPeerings[0].properties.peeringState"],
        timestamp = todatetime(properties.changeAttributes.timestamp)
| where Target == ""
| extend PeeringStatus = tostring(PeeringStatus.newValue)
| where PeeringStatus != "Connected"