Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Uptime] Alerts - Monitor status alert - check monitor status by monitor.timespan #104541

Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 108 additions & 6 deletions x-pack/plugins/uptime/server/lib/alerts/status_check.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import {
generateFilterDSL,
hasFilters,
Expand Down Expand Up @@ -62,7 +61,12 @@ const mockOptions = (
shouldCheckStatus: true,
},
services = alertsMock.createAlertServices(),
state = {}
state = {},
rule = {
schedule: {
interval: '5m',
},
}
): any => {
services.scopedClusterClient = elasticsearchServiceMock.createScopedClusterClient();
services.scopedClusterClient.asCurrentUser = (jest.fn() as unknown) as any;
Expand All @@ -77,13 +81,16 @@ const mockOptions = (
params,
services,
state,
rule,
};
};

describe('status check alert', () => {
let toISOStringSpy: jest.SpyInstance<string, []>;
const mockDate = new Date('2021-05-13T12:33:37.000Z');
beforeEach(() => {
toISOStringSpy = jest.spyOn(Date.prototype, 'toISOString');
Date.now = jest.fn().mockReturnValue(mockDate);
});

afterEach(() => {
Expand All @@ -108,10 +115,14 @@ describe('status check alert', () => {
"filters": undefined,
"locations": Array [],
"numTimes": 5,
"timerange": Object {
"timespanRange": Object {
"from": "now-15m",
"to": "now",
},
"timestampRange": Object {
"from": 1620821917000,
"to": "now",
},
"uptimeEsClient": Object {
"baseESClient": [MockFunction],
"count": [Function],
Expand Down Expand Up @@ -163,10 +174,14 @@ describe('status check alert', () => {
"filters": undefined,
"locations": Array [],
"numTimes": 5,
"timerange": Object {
"timespanRange": Object {
"from": "now-15m",
"to": "now",
},
"timestampRange": Object {
"from": 1620821917000,
"to": "now",
},
"uptimeEsClient": Object {
"baseESClient": [MockFunction],
"count": [Function],
Expand Down Expand Up @@ -476,10 +491,14 @@ describe('status check alert', () => {
},
"locations": Array [],
"numTimes": 3,
"timerange": Object {
"timespanRange": Object {
"from": "now-15m",
"to": "now",
},
"timestampRange": Object {
"from": 1620821917000,
"to": "now",
},
"uptimeEsClient": Object {
"baseESClient": [MockFunction],
"count": [Function],
Expand Down Expand Up @@ -583,10 +602,14 @@ describe('status check alert', () => {
},
"locations": Array [],
"numTimes": 20,
"timerange": Object {
"timespanRange": Object {
"from": "now-30h",
"to": "now",
},
"timestampRange": Object {
"from": 1620714817000,
"to": "now",
},
"uptimeEsClient": Object {
"baseESClient": [MockFunction],
"count": [Function],
Expand Down Expand Up @@ -900,6 +923,85 @@ describe('status check alert', () => {
});
});

it('generates timespan and @timestamp ranges appropriately', async () => {
const mockGetter = jest.fn();
mockGetter.mockReturnValue([]);
const { server, libs, plugins } = bootstrapDependencies({
getIndexPattern: jest.fn(),
getMonitorStatus: mockGetter,
});
const alert = statusCheckAlertFactory(server, libs, plugins);
const options = mockOptions({
numTimes: 20,
timerangeCount: 30,
timerangeUnit: 'h',
filters: {
'monitor.type': ['http'],
'observer.geo.name': [],
tags: [],
'url.port': [],
},
search: 'url.full: *',
});
await alert.executor(options);

expect(mockGetter).toHaveBeenCalledTimes(1);
expect(mockGetter.mock.calls[0][0]).toEqual(
expect.objectContaining({
timespanRange: {
from: 'now-30h',
to: 'now',
},
timestampRange: {
from: mockDate.setHours(mockDate.getHours() - 54).valueOf(), // now minus the timerange (30h), plus an additional 24 hour buffer
to: 'now',
},
})
);
});

it('uses the larger of alert interval and timerange when defining timestampRange', async () => {
const mockGetter = jest.fn();
mockGetter.mockReturnValue([]);
const { server, libs, plugins } = bootstrapDependencies({
getIndexPattern: jest.fn(),
getMonitorStatus: mockGetter,
});
const alert = statusCheckAlertFactory(server, libs, plugins);
const options = mockOptions(
{
numTimes: 20,
timerangeCount: 30,
timerangeUnit: 'h',
filters: {
'monitor.type': ['http'],
'observer.geo.name': [],
tags: [],
'url.port': [],
},
search: 'url.full: *',
},
undefined,
undefined,
{ schedule: { interval: '60h' } }
);
await alert.executor(options);

expect(mockGetter).toHaveBeenCalledTimes(1);
expect(mockGetter.mock.calls[0][0]).toEqual(
expect.objectContaining({
timespanRange: {
from: 'now-30h',
to: 'now',
},
timestampRange: {
from: mockDate.setHours(mockDate.getHours() - 60).valueOf(), // 60h rule schedule interval is larger than 30h timerange, so use now - 60h to define timestamp range
to: 'now',
},
})
);
});

describe('hasFilters', () => {
it('returns false for undefined filters', () => {
expect(hasFilters()).toBe(false);
Expand Down
51 changes: 46 additions & 5 deletions x-pack/plugins/uptime/server/lib/alerts/status_check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import datemath from '@elastic/datemath';
import { min } from 'lodash';
import { schema } from '@kbn/config-schema';
import { i18n } from '@kbn/i18n';
import Mustache from 'mustache';
Expand All @@ -31,6 +32,34 @@ import { UMServerLibs, UptimeESClient } from '../lib';

export type ActionGroupIds = ActionGroupIdsOf<typeof MONITOR_STATUS>;

/**
* Returns the appropriate range for filtering the documents by `@timestamp`.
*
* We check monitor status by `monitor.timespan`, but need to first cut down on the number of documents
* searched by filtering by `@timestamp`. To ensure that we catch as many documents as possible which could
* likely contain a down monitor with a `monitor.timespan` in the given timerange, we create a filter
* range for `@timestamp` that is the greater of either: from now to now - timerange interval - 24 hours
* OR from now to now - rule interval
* @param ruleScheduleLookback - string representing now minus the interval at which the rule is ran
* @param timerangeLookback - string representing now minus the timerange configured by the user for checking down monitors
*/
export function getTimestampRange({
ruleScheduleLookback,
timerangeLookback,
}: Record<'ruleScheduleLookback' | 'timerangeLookback', string>) {
const scheduleIntervalAbsoluteTime = datemath.parse(ruleScheduleLookback)?.valueOf();
const defaultIntervalAbsoluteTime = datemath
.parse(timerangeLookback)
?.subtract('24', 'hours')
.valueOf();
const absoluteFrom = min([scheduleIntervalAbsoluteTime, defaultIntervalAbsoluteTime]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const absoluteFrom = min([scheduleIntervalAbsoluteTime, defaultIntervalAbsoluteTime]);
const from = min([scheduleIntervalAbsoluteTime, defaultIntervalAbsoluteTime]) ?? 'now-24h';

Just a suggestion, do you think this is cleaner? My thinking is it simplifies the return object so much that you don't really need to look at it.


return {
to: 'now',
from: absoluteFrom ? absoluteFrom : 'now-24h',
};
}

const getMonIdByLoc = (monitorId: string, location: string) => {
return monitorId + '-' + location;
};
Expand Down Expand Up @@ -264,6 +293,9 @@ export const statusCheckAlertFactory: UptimeAlertTypeFactory<ActionGroupIds> = (
params: rawParams,
state,
services: { alertInstanceFactory },
rule: {
schedule: { interval },
},
},
uptimeEsClient,
}) {
Expand All @@ -279,22 +311,31 @@ export const statusCheckAlertFactory: UptimeAlertTypeFactory<ActionGroupIds> = (
isAutoGenerated,
timerange: oldVersionTimeRange,
} = rawParams;

const filterString = await formatFilterString(uptimeEsClient, filters, search, libs);

const timerange = oldVersionTimeRange || {
from: `now-${String(timerangeCount) + timerangeUnit}`,
const timespanInterval = `${String(timerangeCount)}${timerangeUnit}`;

// Range filter for `monitor.timespan`, the range of time the ping is valid
const timespanRange = oldVersionTimeRange || {
from: `now-${timespanInterval}`,
to: 'now',
};

// Range filter for `@timestamp`, the time the document was indexed
const timestampRange = getTimestampRange({
ruleScheduleLookback: `now-${interval}`,
timerangeLookback: timespanRange.from,
});

let downMonitorsByLocation: GetMonitorStatusResult[] = [];

// if oldVersionTimeRange present means it's 7.7 format and
// after that shouldCheckStatus should be explicitly false
if (!(!oldVersionTimeRange && shouldCheckStatus === false)) {
downMonitorsByLocation = await libs.requests.getMonitorStatus({
uptimeEsClient,
timerange,
timespanRange,
timestampRange,
numTimes,
locations: [],
filters: filterString,
Expand Down
Loading