From 2c3312dee05a68ec6f59bb1f21f151ced9c6c53a Mon Sep 17 00:00:00 2001 From: Ying Mao Date: Tue, 29 Mar 2022 12:29:43 -0400 Subject: [PATCH 1/4] Adding telemetry for number of scheduled actions --- .../alerting/server/usage/alerting_telemetry.ts | 11 +++++++++++ .../server/usage/alerting_usage_collector.ts | 13 +++++++++++++ x-pack/plugins/alerting/server/usage/task.ts | 6 ++++-- x-pack/plugins/alerting/server/usage/types.ts | 5 +++++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts b/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts index 4fbad593d1600..d84374f0b8d6e 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts @@ -409,6 +409,12 @@ export async function getExecutionsPerDayCount( avgTotalSearchDuration: { avg: { field: 'kibana.alert.rule.execution.metrics.total_search_duration_ms' }, }, + percentileScheduledActions: { + percentiles: { + field: 'kibana.alert.rule.execution.metrics.number_of_scheduled_actions', + percents: [50, 90, 99], + }, + }, }, }, }); @@ -439,6 +445,10 @@ export async function getExecutionsPerDayCount( searchResult.aggregations.avgTotalSearchDuration.value ); + const aggsScheduledActionsPercentiles = + // @ts-expect-error aggegation type is not specified + searchResult.aggregations.percentileScheduledActions.values; + const executionFailuresAggregations = searchResult.aggregations as { failuresByReason: { value: { reasons: Record> } }; }; @@ -537,6 +547,7 @@ export async function getExecutionsPerDayCount( }), {} ), + scheduledActionsPercentiles: aggsScheduledActionsPercentiles, }; } diff --git a/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts b/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts index f375e758a8c9b..43e56bb65a38e 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts @@ -66,6 +66,13 @@ const byReasonSchema: MakeSchemaFrom['count_rules_executions_fail unknown: { type: 'long' }, }; +const byPercentileSchema: MakeSchemaFrom['percentile_num_scheduled_actions_per_day'] = + { + p50: { type: 'long' }, + p90: { type: 'long' }, + p99: { type: 'long' }, + }; + const byReasonSchemaByType: MakeSchemaFrom['count_rules_executions_failured_by_reason_by_type_per_day'] = { // TODO: Find out an automated way to populate the keys or reformat these into an array (and change the Remote Telemetry indexer accordingly) @@ -160,6 +167,11 @@ export function createAlertingUsageCollector( avg_es_search_duration_by_type_per_day: {}, avg_total_search_duration_per_day: 0, avg_total_search_duration_by_type_per_day: {}, + percentile_num_scheduled_actions_per_day: { + p50: 0, + p90: 0, + p99: 0, + }, }; } }, @@ -211,6 +223,7 @@ export function createAlertingUsageCollector( avg_es_search_duration_by_type_per_day: byTypeSchema, avg_total_search_duration_per_day: { type: 'long' }, avg_total_search_duration_by_type_per_day: byTypeSchema, + percentile_num_scheduled_actions_per_day: byPercentileSchema, }, }); } diff --git a/x-pack/plugins/alerting/server/usage/task.ts b/x-pack/plugins/alerting/server/usage/task.ts index 7aee043653806..31e677e7dd9be 100644 --- a/x-pack/plugins/alerting/server/usage/task.ts +++ b/x-pack/plugins/alerting/server/usage/task.ts @@ -144,8 +144,10 @@ export function telemetryTaskRunner( avg_total_search_duration_per_day: dailyExecutionCounts.avgTotalSearchDuration, avg_total_search_duration_by_type_per_day: dailyExecutionCounts.avgTotalSearchDurationByType, + percentile_num_scheduled_actions_per_day: + dailyExecutionCounts.scheduledActionsPercentiles, }, - runAt: getNextMidnight(), + runAt: new Date(), // getNextMidnight(), }; } ) @@ -153,7 +155,7 @@ export function telemetryTaskRunner( logger.warn(`Error executing alerting telemetry task: ${errMsg}`); return { state: {}, - runAt: getNextMidnight(), + runAt: new Date(), // getNextMidnight(), }; }); }, diff --git a/x-pack/plugins/alerting/server/usage/types.ts b/x-pack/plugins/alerting/server/usage/types.ts index a03483bd54007..9adb122362005 100644 --- a/x-pack/plugins/alerting/server/usage/types.ts +++ b/x-pack/plugins/alerting/server/usage/types.ts @@ -25,6 +25,11 @@ export interface AlertingUsage { string, Record >; + percentile_num_scheduled_actions_per_day: { + p50: number; + p90: number; + p99: number; + }; avg_execution_time_per_day: number; avg_execution_time_by_type_per_day: Record; avg_es_search_duration_per_day: number; From dc47fdbae88384fc357b4c0b0af0edb1b47978e4 Mon Sep 17 00:00:00 2001 From: Ying Mao Date: Wed, 30 Mar 2022 09:52:35 -0400 Subject: [PATCH 2/4] Adding percentile by type types --- .../server/usage/alerting_telemetry.ts | 1 + .../server/usage/alerting_usage_collector.ts | 13 + x-pack/plugins/alerting/server/usage/task.ts | 2 + x-pack/plugins/alerting/server/usage/types.ts | 5 + .../schema/xpack_plugins.json | 326 ++++++++++++++++++ 5 files changed, 347 insertions(+) diff --git a/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts b/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts index d84374f0b8d6e..1becf9d1783bd 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts @@ -548,6 +548,7 @@ export async function getExecutionsPerDayCount( {} ), scheduledActionsPercentiles: aggsScheduledActionsPercentiles, + scheduledActionsPercentilesByType: {}, }; } diff --git a/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts b/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts index 43e56bb65a38e..a9f18bf207ae0 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts @@ -73,6 +73,13 @@ const byPercentileSchema: MakeSchemaFrom['percentile_num_schedule p99: { type: 'long' }, }; +const byPercentileSchemaByType: MakeSchemaFrom['percentile_num_scheduled_actions_by_type_per_day'] = + { + p50: byTypeSchema, + p90: byTypeSchema, + p99: byTypeSchema, + }; + const byReasonSchemaByType: MakeSchemaFrom['count_rules_executions_failured_by_reason_by_type_per_day'] = { // TODO: Find out an automated way to populate the keys or reformat these into an array (and change the Remote Telemetry indexer accordingly) @@ -172,6 +179,11 @@ export function createAlertingUsageCollector( p90: 0, p99: 0, }, + percentile_num_scheduled_actions_by_type_per_day: { + p50: {}, + p90: {}, + p99: {}, + }, }; } }, @@ -224,6 +236,7 @@ export function createAlertingUsageCollector( avg_total_search_duration_per_day: { type: 'long' }, avg_total_search_duration_by_type_per_day: byTypeSchema, percentile_num_scheduled_actions_per_day: byPercentileSchema, + percentile_num_scheduled_actions_by_type_per_day: byPercentileSchemaByType, }, }); } diff --git a/x-pack/plugins/alerting/server/usage/task.ts b/x-pack/plugins/alerting/server/usage/task.ts index 31e677e7dd9be..fcab46a10e68a 100644 --- a/x-pack/plugins/alerting/server/usage/task.ts +++ b/x-pack/plugins/alerting/server/usage/task.ts @@ -146,6 +146,8 @@ export function telemetryTaskRunner( dailyExecutionCounts.avgTotalSearchDurationByType, percentile_num_scheduled_actions_per_day: dailyExecutionCounts.scheduledActionsPercentiles, + percentile_num_scheduled_actions_by_type_per_day: + dailyExecutionCounts.scheduledActionsPercentilesByType, }, runAt: new Date(), // getNextMidnight(), }; diff --git a/x-pack/plugins/alerting/server/usage/types.ts b/x-pack/plugins/alerting/server/usage/types.ts index 9adb122362005..00bd3b46f91b1 100644 --- a/x-pack/plugins/alerting/server/usage/types.ts +++ b/x-pack/plugins/alerting/server/usage/types.ts @@ -30,6 +30,11 @@ export interface AlertingUsage { p90: number; p99: number; }; + percentile_num_scheduled_actions_by_type_per_day: { + p50: Record; + p90: Record; + p99: Record; + }; avg_execution_time_per_day: number; avg_execution_time_by_type_per_day: Record; avg_es_search_duration_per_day: number; diff --git a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json index 1a3e32a3ccd6f..38627b44bb209 100644 --- a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json +++ b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json @@ -1943,6 +1943,332 @@ "type": "long" } } + }, + "percentile_num_scheduled_actions_per_day": { + "properties": { + "p50": { + "type": "long" + }, + "p90": { + "type": "long" + }, + "p99": { + "type": "long" + } + } + }, + "percentile_num_scheduled_actions_by_type_per_day": { + "properties": { + "p50": { + "properties": { + "DYNAMIC_KEY": { + "type": "long" + }, + "__index-threshold": { + "type": "long" + }, + "__es-query": { + "type": "long" + }, + "transform_health": { + "type": "long" + }, + "apm__error_rate": { + "type": "long" + }, + "apm__transaction_error_rate": { + "type": "long" + }, + "apm__transaction_duration": { + "type": "long" + }, + "apm__transaction_duration_anomaly": { + "type": "long" + }, + "metrics__alert__threshold": { + "type": "long" + }, + "metrics__alert__inventory__threshold": { + "type": "long" + }, + "logs__alert__document__count": { + "type": "long" + }, + "monitoring_alert_cluster_health": { + "type": "long" + }, + "monitoring_alert_cpu_usage": { + "type": "long" + }, + "monitoring_alert_disk_usage": { + "type": "long" + }, + "monitoring_alert_elasticsearch_version_mismatch": { + "type": "long" + }, + "monitoring_alert_kibana_version_mismatch": { + "type": "long" + }, + "monitoring_alert_license_expiration": { + "type": "long" + }, + "monitoring_alert_logstash_version_mismatch": { + "type": "long" + }, + "monitoring_alert_nodes_changed": { + "type": "long" + }, + "siem__signals": { + "type": "long" + }, + "siem__notifications": { + "type": "long" + }, + "siem__eqlRule": { + "type": "long" + }, + "siem__indicatorRule": { + "type": "long" + }, + "siem__mlRule": { + "type": "long" + }, + "siem__queryRule": { + "type": "long" + }, + "siem__savedQueryRule": { + "type": "long" + }, + "siem__thresholdRule": { + "type": "long" + }, + "xpack__uptime__alerts__monitorStatus": { + "type": "long" + }, + "xpack__uptime__alerts__tls": { + "type": "long" + }, + "xpack__uptime__alerts__durationAnomaly": { + "type": "long" + }, + "__geo-containment": { + "type": "long" + }, + "xpack__ml__anomaly_detection_alert": { + "type": "long" + }, + "xpack__ml__anomaly_detection_jobs_health": { + "type": "long" + } + } + }, + "p90": { + "properties": { + "DYNAMIC_KEY": { + "type": "long" + }, + "__index-threshold": { + "type": "long" + }, + "__es-query": { + "type": "long" + }, + "transform_health": { + "type": "long" + }, + "apm__error_rate": { + "type": "long" + }, + "apm__transaction_error_rate": { + "type": "long" + }, + "apm__transaction_duration": { + "type": "long" + }, + "apm__transaction_duration_anomaly": { + "type": "long" + }, + "metrics__alert__threshold": { + "type": "long" + }, + "metrics__alert__inventory__threshold": { + "type": "long" + }, + "logs__alert__document__count": { + "type": "long" + }, + "monitoring_alert_cluster_health": { + "type": "long" + }, + "monitoring_alert_cpu_usage": { + "type": "long" + }, + "monitoring_alert_disk_usage": { + "type": "long" + }, + "monitoring_alert_elasticsearch_version_mismatch": { + "type": "long" + }, + "monitoring_alert_kibana_version_mismatch": { + "type": "long" + }, + "monitoring_alert_license_expiration": { + "type": "long" + }, + "monitoring_alert_logstash_version_mismatch": { + "type": "long" + }, + "monitoring_alert_nodes_changed": { + "type": "long" + }, + "siem__signals": { + "type": "long" + }, + "siem__notifications": { + "type": "long" + }, + "siem__eqlRule": { + "type": "long" + }, + "siem__indicatorRule": { + "type": "long" + }, + "siem__mlRule": { + "type": "long" + }, + "siem__queryRule": { + "type": "long" + }, + "siem__savedQueryRule": { + "type": "long" + }, + "siem__thresholdRule": { + "type": "long" + }, + "xpack__uptime__alerts__monitorStatus": { + "type": "long" + }, + "xpack__uptime__alerts__tls": { + "type": "long" + }, + "xpack__uptime__alerts__durationAnomaly": { + "type": "long" + }, + "__geo-containment": { + "type": "long" + }, + "xpack__ml__anomaly_detection_alert": { + "type": "long" + }, + "xpack__ml__anomaly_detection_jobs_health": { + "type": "long" + } + } + }, + "p99": { + "properties": { + "DYNAMIC_KEY": { + "type": "long" + }, + "__index-threshold": { + "type": "long" + }, + "__es-query": { + "type": "long" + }, + "transform_health": { + "type": "long" + }, + "apm__error_rate": { + "type": "long" + }, + "apm__transaction_error_rate": { + "type": "long" + }, + "apm__transaction_duration": { + "type": "long" + }, + "apm__transaction_duration_anomaly": { + "type": "long" + }, + "metrics__alert__threshold": { + "type": "long" + }, + "metrics__alert__inventory__threshold": { + "type": "long" + }, + "logs__alert__document__count": { + "type": "long" + }, + "monitoring_alert_cluster_health": { + "type": "long" + }, + "monitoring_alert_cpu_usage": { + "type": "long" + }, + "monitoring_alert_disk_usage": { + "type": "long" + }, + "monitoring_alert_elasticsearch_version_mismatch": { + "type": "long" + }, + "monitoring_alert_kibana_version_mismatch": { + "type": "long" + }, + "monitoring_alert_license_expiration": { + "type": "long" + }, + "monitoring_alert_logstash_version_mismatch": { + "type": "long" + }, + "monitoring_alert_nodes_changed": { + "type": "long" + }, + "siem__signals": { + "type": "long" + }, + "siem__notifications": { + "type": "long" + }, + "siem__eqlRule": { + "type": "long" + }, + "siem__indicatorRule": { + "type": "long" + }, + "siem__mlRule": { + "type": "long" + }, + "siem__queryRule": { + "type": "long" + }, + "siem__savedQueryRule": { + "type": "long" + }, + "siem__thresholdRule": { + "type": "long" + }, + "xpack__uptime__alerts__monitorStatus": { + "type": "long" + }, + "xpack__uptime__alerts__tls": { + "type": "long" + }, + "xpack__uptime__alerts__durationAnomaly": { + "type": "long" + }, + "__geo-containment": { + "type": "long" + }, + "xpack__ml__anomaly_detection_alert": { + "type": "long" + }, + "xpack__ml__anomaly_detection_jobs_health": { + "type": "long" + } + } + } + } } } }, From f9d44391c5f48d82a2b59d8f3f11611339698394 Mon Sep 17 00:00:00 2001 From: Ying Mao Date: Wed, 30 Mar 2022 13:28:07 -0400 Subject: [PATCH 3/4] Parsing percentiles by rule type and adding tests --- .../server/usage/alerting_telemetry.test.ts | 187 ++++++++++++++++++ .../server/usage/alerting_telemetry.ts | 76 ++++++- .../server/usage/alerting_usage_collector.ts | 2 + 3 files changed, 259 insertions(+), 6 deletions(-) diff --git a/x-pack/plugins/alerting/server/usage/alerting_telemetry.test.ts b/x-pack/plugins/alerting/server/usage/alerting_telemetry.test.ts index 3bb64ad00a194..aa8aa42902ff6 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_telemetry.test.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_telemetry.test.ts @@ -15,6 +15,7 @@ import { getExecutionsPerDayCount, getExecutionTimeoutsPerDayCount, getFailedAndUnrecognizedTasksPerDay, + parsePercentileAggsByRuleType, } from './alerting_telemetry'; describe('alerting telemetry', () => { @@ -181,6 +182,41 @@ Object { avgTotalSearchDuration: { value: 30.642857142857142, }, + percentileScheduledActions: { + values: { + '50.0': 4.0, + '90.0': 26.0, + '99.0': 26.0, + }, + }, + aggsByType: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: '.index-threshold', + doc_count: 149, + percentileScheduledActions: { + values: { + '50.0': 4.0, + '90.0': 26.0, + '99.0': 26.0, + }, + }, + }, + { + key: 'logs.alert.document.count', + doc_count: 1, + percentileScheduledActions: { + values: { + '50.0': 10.0, + '90.0': 10.0, + '99.0': 10.0, + }, + }, + }, + ], + }, }, hits: { hits: [], @@ -228,6 +264,25 @@ Object { }, countTotal: 4, countTotalFailures: 4, + scheduledActionsPercentiles: { + p50: 4, + p90: 26, + p99: 26, + }, + scheduledActionsPercentilesByType: { + p50: { + '__index-threshold': 4, + logs__alert__document__count: 10, + }, + p90: { + '__index-threshold': 26, + logs__alert__document__count: 10, + }, + p99: { + '__index-threshold': 26, + logs__alert__document__count: 10, + }, + }, }); }); @@ -316,4 +371,136 @@ Object { countTotal: 5, }); }); + + test('parsePercentileAggsByRuleType', () => { + const aggsByType = { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: '.index-threshold', + doc_count: 149, + percentileScheduledActions: { + values: { + '50.0': 4.0, + '90.0': 26.0, + '99.0': 26.0, + }, + }, + }, + { + key: 'logs.alert.document.count', + doc_count: 1, + percentileScheduledActions: { + values: { + '50.0': 10.0, + '90.0': 10.0, + '99.0': 10.0, + }, + }, + }, + ], + }; + expect( + parsePercentileAggsByRuleType(aggsByType.buckets, 'percentileScheduledActions.values') + ).toEqual({ + p50: { + '__index-threshold': 4, + logs__alert__document__count: 10, + }, + p90: { + '__index-threshold': 26, + logs__alert__document__count: 10, + }, + p99: { + '__index-threshold': 26, + logs__alert__document__count: 10, + }, + }); + }); + + test('parsePercentileAggsByRuleType handles unknown path', () => { + const aggsByType = { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: '.index-threshold', + doc_count: 149, + percentileScheduledActions: { + values: { + '50.0': 4.0, + '90.0': 26.0, + '99.0': 26.0, + }, + }, + }, + { + key: 'logs.alert.document.count', + doc_count: 1, + percentileScheduledActions: { + values: { + '50.0': 10.0, + '90.0': 10.0, + '99.0': 10.0, + }, + }, + }, + ], + }; + expect(parsePercentileAggsByRuleType(aggsByType.buckets, 'foo.values')).toEqual({ + p50: {}, + p90: {}, + p99: {}, + }); + }); + + test('parsePercentileAggsByRuleType handles unrecognized percentiles', () => { + const aggsByType = { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: '.index-threshold', + doc_count: 149, + percentileScheduledActions: { + values: { + '50.0': 4.0, + '75.0': 8.0, + '90.0': 26.0, + '99.0': 26.0, + }, + }, + }, + { + key: 'logs.alert.document.count', + doc_count: 1, + percentileScheduledActions: { + values: { + '50.0': 10.0, + '75.0': 10.0, + '90.0': 10.0, + '99.0': 10.0, + }, + }, + }, + ], + }; + expect( + parsePercentileAggsByRuleType(aggsByType.buckets, 'percentileScheduledActions.values') + ).toEqual({ + p50: { + '__index-threshold': 4, + logs__alert__document__count: 10, + }, + p90: { + '__index-threshold': 26, + logs__alert__document__count: 10, + }, + p99: { + '__index-threshold': 26, + logs__alert__document__count: 10, + }, + }); + }); }); diff --git a/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts b/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts index 1becf9d1783bd..0dd97d7186ff7 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts @@ -5,8 +5,17 @@ * 2.0. */ +import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import { ElasticsearchClient } from 'kibana/server'; +import { get, merge } from 'lodash'; import { AlertingUsage } from './types'; +import { NUM_ALERTING_RULE_TYPES } from './alerting_usage_collector'; + +const percentileFieldNameMapping: Record = { + '50.0': 'p50', + '90.0': 'p90', + '99.0': 'p99', +}; const ruleTypeMetric = { scripted_metric: { @@ -38,6 +47,13 @@ const ruleTypeMetric = { }, }; +const scheduledActionsPercentilesAgg = { + percentiles: { + field: 'kibana.alert.rule.execution.metrics.number_of_scheduled_actions', + percents: [50, 90, 99], + }, +}; + const ruleTypeExecutionsWithDurationMetric = { scripted_metric: { init_script: @@ -409,10 +425,14 @@ export async function getExecutionsPerDayCount( avgTotalSearchDuration: { avg: { field: 'kibana.alert.rule.execution.metrics.total_search_duration_ms' }, }, - percentileScheduledActions: { - percentiles: { - field: 'kibana.alert.rule.execution.metrics.number_of_scheduled_actions', - percents: [50, 90, 99], + percentileScheduledActions: scheduledActionsPercentilesAgg, + aggsByType: { + terms: { + field: 'rule.category', + size: NUM_ALERTING_RULE_TYPES, + }, + aggs: { + percentileScheduledActions: scheduledActionsPercentilesAgg, }, }, }, @@ -449,6 +469,10 @@ export async function getExecutionsPerDayCount( // @ts-expect-error aggegation type is not specified searchResult.aggregations.percentileScheduledActions.values; + const aggsByTypeBuckets = + // @ts-expect-error aggegation type is not specified + searchResult.aggregations.aggsByType.buckets; + const executionFailuresAggregations = searchResult.aggregations as { failuresByReason: { value: { reasons: Record> } }; }; @@ -547,8 +571,21 @@ export async function getExecutionsPerDayCount( }), {} ), - scheduledActionsPercentiles: aggsScheduledActionsPercentiles, - scheduledActionsPercentilesByType: {}, + scheduledActionsPercentiles: Object.keys(aggsScheduledActionsPercentiles).reduce( + // ES DSL aggregations are returned as `any` by esClient.search + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (acc: any, curr: string) => ({ + ...acc, + ...(percentileFieldNameMapping[curr] + ? { [percentileFieldNameMapping[curr]]: aggsScheduledActionsPercentiles[curr] } + : {}), + }), + {} + ), + scheduledActionsPercentilesByType: parsePercentileAggsByRuleType( + aggsByTypeBuckets, + 'percentileScheduledActions.values' + ), }; } @@ -713,3 +750,30 @@ function replaceDotSymbolsInRuleTypeIds(ruleTypeIdObj: Record) { {} ); } + +export function parsePercentileAggsByRuleType( + aggsByType: estypes.AggregationsStringTermsBucketKeys[], + path: string +) { + return (aggsByType ?? []).reduce( + (acc, curr) => { + const percentiles = get(curr, path, {}); + return merge( + acc, + Object.keys(percentiles).reduce((pacc, pcurr) => { + return { + ...pacc, + ...(percentileFieldNameMapping[pcurr] + ? { + [percentileFieldNameMapping[pcurr]]: { + [replaceDotSymbols(curr.key)]: percentiles[pcurr], + }, + } + : {}), + }; + }, {}) + ); + }, + { p50: {}, p90: {}, p99: {} } + ); +} diff --git a/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts b/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts index a9f18bf207ae0..b0990bab9491d 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts @@ -56,6 +56,8 @@ const byTypeSchema: MakeSchemaFrom['count_by_type'] = { xpack__ml__anomaly_detection_jobs_health: { type: 'long' }, // eslint-disable-line @typescript-eslint/naming-convention }; +export const NUM_ALERTING_RULE_TYPES = Object.keys(byTypeSchema).length; + const byReasonSchema: MakeSchemaFrom['count_rules_executions_failured_by_reason_per_day'] = { // TODO: Find out an automated way to populate the keys or reformat these into an array (and change the Remote Telemetry indexer accordingly) From 238f3dcd6285f96625987ce6c4a0334112b097f5 Mon Sep 17 00:00:00 2001 From: Ying Mao Date: Wed, 30 Mar 2022 15:45:14 -0400 Subject: [PATCH 4/4] Adding functional tests --- .../server/usage/alerting_telemetry.test.ts | 14 +++ .../server/usage/alerting_telemetry.ts | 2 +- x-pack/plugins/alerting/server/usage/task.ts | 4 +- .../tests/telemetry/alerting_telemetry.ts | 115 +++++++++++++++--- 4 files changed, 112 insertions(+), 23 deletions(-) diff --git a/x-pack/plugins/alerting/server/usage/alerting_telemetry.test.ts b/x-pack/plugins/alerting/server/usage/alerting_telemetry.test.ts index aa8aa42902ff6..61383656e67d5 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_telemetry.test.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_telemetry.test.ts @@ -399,6 +399,17 @@ Object { }, }, }, + { + key: 'document.test.', + doc_count: 1, + percentileScheduledActions: { + values: { + '50.0': null, + '90.0': null, + '99.0': null, + }, + }, + }, ], }; expect( @@ -406,14 +417,17 @@ Object { ).toEqual({ p50: { '__index-threshold': 4, + document__test__: 0, logs__alert__document__count: 10, }, p90: { '__index-threshold': 26, + document__test__: 0, logs__alert__document__count: 10, }, p99: { '__index-threshold': 26, + document__test__: 0, logs__alert__document__count: 10, }, }); diff --git a/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts b/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts index 0dd97d7186ff7..2e360374faa42 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_telemetry.ts @@ -766,7 +766,7 @@ export function parsePercentileAggsByRuleType( ...(percentileFieldNameMapping[pcurr] ? { [percentileFieldNameMapping[pcurr]]: { - [replaceDotSymbols(curr.key)]: percentiles[pcurr], + [replaceDotSymbols(curr.key)]: percentiles[pcurr] ?? 0, }, } : {}), diff --git a/x-pack/plugins/alerting/server/usage/task.ts b/x-pack/plugins/alerting/server/usage/task.ts index fcab46a10e68a..0d0d2d802a3fb 100644 --- a/x-pack/plugins/alerting/server/usage/task.ts +++ b/x-pack/plugins/alerting/server/usage/task.ts @@ -149,7 +149,7 @@ export function telemetryTaskRunner( percentile_num_scheduled_actions_by_type_per_day: dailyExecutionCounts.scheduledActionsPercentilesByType, }, - runAt: new Date(), // getNextMidnight(), + runAt: getNextMidnight(), }; } ) @@ -157,7 +157,7 @@ export function telemetryTaskRunner( logger.warn(`Error executing alerting telemetry task: ${errMsg}`); return { state: {}, - runAt: new Date(), // getNextMidnight(), + runAt: getNextMidnight(), }; }); }, diff --git a/x-pack/test/alerting_api_integration/security_and_spaces/tests/telemetry/alerting_telemetry.ts b/x-pack/test/alerting_api_integration/security_and_spaces/tests/telemetry/alerting_telemetry.ts index 3b768b563b999..2773d137f25a4 100644 --- a/x-pack/test/alerting_api_integration/security_and_spaces/tests/telemetry/alerting_telemetry.ts +++ b/x-pack/test/alerting_api_integration/security_and_spaces/tests/telemetry/alerting_telemetry.ts @@ -176,13 +176,7 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider throttle: null, enabled: false, params: {}, - actions: [ - { - id: noopConnectorId, - group: 'default', - params: {}, - }, - ], + actions: [], }, }); @@ -190,20 +184,25 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider space: space.id, ruleOverwrites: { rule_type_id: 'test.multipleSearches', - schedule: { interval: '29s' }, + schedule: { interval: '40s' }, throttle: '1m', params: { numSearches: 2, delay: `2s` }, + actions: [], + }, + }); + + await createRule({ + space: space.id, + ruleOverwrites: { + rule_type_id: 'test.cumulative-firing', + schedule: { interval: '61s' }, + throttle: '2s', actions: [ { id: noopConnectorId, group: 'default', params: {}, }, - { - id: noopConnectorId, - group: 'default', - params: {}, - }, ], }, }); @@ -242,10 +241,10 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider const telemetry = JSON.parse(taskState!); // total number of rules - expect(telemetry.count_total).to.equal(18); + expect(telemetry.count_total).to.equal(21); // total number of enabled rules - expect(telemetry.count_active_total).to.equal(15); + expect(telemetry.count_active_total).to.equal(18); // total number of disabled rules expect(telemetry.count_disabled_total).to.equal(3); @@ -256,6 +255,7 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider expect(telemetry.count_by_type.test__throw).to.equal(3); expect(telemetry.count_by_type.test__noop).to.equal(6); expect(telemetry.count_by_type.test__multipleSearches).to.equal(3); + expect(telemetry.count_by_type['test__cumulative-firing']).to.equal(3); // total number of enabled rules broken down by rule type expect(telemetry.count_active_by_type.test__onlyContextVariables).to.equal(3); @@ -263,13 +263,14 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider expect(telemetry.count_active_by_type.test__throw).to.equal(3); expect(telemetry.count_active_by_type.test__noop).to.equal(3); expect(telemetry.count_active_by_type.test__multipleSearches).to.equal(3); + expect(telemetry.count_active_by_type['test__cumulative-firing']).to.equal(3); // throttle time stats expect(telemetry.throttle_time.min).to.equal('0s'); - expect(telemetry.throttle_time.avg).to.equal('138.2s'); + expect(telemetry.throttle_time.avg).to.equal('115.5s'); expect(telemetry.throttle_time.max).to.equal('600s'); expect(telemetry.throttle_time_number_s.min).to.equal(0); - expect(telemetry.throttle_time_number_s.avg).to.equal(138.2); + expect(telemetry.throttle_time_number_s.avg).to.equal(115.5); expect(telemetry.throttle_time_number_s.max).to.equal(600); // schedule interval stats @@ -281,8 +282,8 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider expect(telemetry.schedule_time_number_s.max).to.equal(300); // attached connectors stats - expect(telemetry.connectors_per_alert.min).to.equal(1); - expect(telemetry.connectors_per_alert.avg).to.equal(1.5); + expect(telemetry.connectors_per_alert.min).to.equal(0); + expect(telemetry.connectors_per_alert.avg).to.equal(1); expect(telemetry.connectors_per_alert.max).to.equal(3); // number of spaces with rules @@ -290,7 +291,7 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider // number of rule executions - just checking for non-zero as we can't set an exact number // each rule should have had a chance to execute once - expect(telemetry.count_rules_executions_per_day >= 18).to.be(true); + expect(telemetry.count_rules_executions_per_day >= 21).to.be(true); // number of rule executions broken down by rule type expect(telemetry.count_by_type.test__onlyContextVariables >= 3).to.be(true); @@ -298,6 +299,7 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider expect(telemetry.count_by_type.test__throw >= 3).to.be(true); expect(telemetry.count_by_type.test__noop >= 3).to.be(true); expect(telemetry.count_by_type.test__multipleSearches >= 3).to.be(true); + expect(telemetry.count_by_type['test__cumulative-firing'] >= 3).to.be(true); // average execution time - just checking for non-zero as we can't set an exact number expect(telemetry.avg_execution_time_per_day > 0).to.be(true); @@ -312,6 +314,9 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider expect(telemetry.avg_execution_time_by_type_per_day.test__throw > 0).to.be(true); expect(telemetry.avg_execution_time_by_type_per_day.test__noop > 0).to.be(true); expect(telemetry.avg_execution_time_by_type_per_day.test__multipleSearches > 0).to.be(true); + expect(telemetry.avg_execution_time_by_type_per_day['test__cumulative-firing'] > 0).to.be( + true + ); // average es search time - just checking for non-zero as we can't set an exact number expect(telemetry.avg_es_search_duration_per_day > 0).to.be(true); @@ -325,6 +330,9 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider ).to.be(true); expect(telemetry.avg_es_search_duration_by_type_per_day.test__throw === 0).to.be(true); expect(telemetry.avg_es_search_duration_by_type_per_day.test__noop === 0).to.be(true); + expect( + telemetry.avg_es_search_duration_by_type_per_day['test__cumulative-firing'] === 0 + ).to.be(true); // rule type that performs ES search expect(telemetry.avg_es_search_duration_by_type_per_day.test__multipleSearches > 0).to.be( @@ -343,6 +351,9 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider ).to.be(true); expect(telemetry.avg_total_search_duration_by_type_per_day.test__throw === 0).to.be(true); expect(telemetry.avg_total_search_duration_by_type_per_day.test__noop === 0).to.be(true); + expect( + telemetry.avg_total_search_duration_by_type_per_day['test__cumulative-firing'] === 0 + ).to.be(true); // rule type that performs ES search expect(telemetry.avg_total_search_duration_by_type_per_day.test__multipleSearches > 0).to.be( @@ -368,6 +379,70 @@ export default function createAlertingTelemetryTests({ getService }: FtrProvider expect( telemetry.count_failed_and_unrecognized_rule_tasks_by_status_by_type_per_day ).to.be.empty(); + + // percentile calculations for number of scheduled actions + expect(telemetry.percentile_num_scheduled_actions_per_day.p50 >= 0).to.be(true); + expect(telemetry.percentile_num_scheduled_actions_per_day.p90 > 0).to.be(true); + expect(telemetry.percentile_num_scheduled_actions_per_day.p99 > 0).to.be(true); + + // percentile calculations by rule type. most of these rule types don't schedule actions so they should all be 0 + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p50['example__always-firing'] + ).to.equal(0); + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p90['example__always-firing'] + ).to.equal(0); + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p99['example__always-firing'] + ).to.equal(0); + + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p50.test__onlyContextVariables + ).to.equal(0); + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p90.test__onlyContextVariables + ).to.equal(0); + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p99.test__onlyContextVariables + ).to.equal(0); + + expect(telemetry.percentile_num_scheduled_actions_by_type_per_day.p50.test__noop).to.equal(0); + expect(telemetry.percentile_num_scheduled_actions_by_type_per_day.p90.test__noop).to.equal(0); + expect(telemetry.percentile_num_scheduled_actions_by_type_per_day.p99.test__noop).to.equal(0); + + expect(telemetry.percentile_num_scheduled_actions_by_type_per_day.p50.test__throw).to.equal( + 0 + ); + expect(telemetry.percentile_num_scheduled_actions_by_type_per_day.p90.test__throw).to.equal( + 0 + ); + expect(telemetry.percentile_num_scheduled_actions_by_type_per_day.p99.test__throw).to.equal( + 0 + ); + + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p50.test__multipleSearches + ).to.equal(0); + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p90.test__multipleSearches + ).to.equal(0); + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p99.test__multipleSearches + ).to.equal(0); + + // this rule type does schedule actions so should be least 1 action scheduled + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p50['test__cumulative-firing'] >= + 1 + ).to.be(true); + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p90['test__cumulative-firing'] >= + 1 + ).to.be(true); + expect( + telemetry.percentile_num_scheduled_actions_by_type_per_day.p99['test__cumulative-firing'] >= + 1 + ).to.be(true); }); }); }