Skip to content

Commit

Permalink
Remove tasks with cleanup logic instead of marking them as failed (#1…
Browse files Browse the repository at this point in the history
…52841)

Part of #79977 (step 1 and 3).

In this PR, I'm making Task Manager remove tasks instead of updating
them with `status: failed` whenever a task is out of attempts. I've also
added an optional `cleanup` hook to the task runner that can be defined
if additional cleanup is necessary whenever a task has been deleted (ex:
delete `action_task_params`).

## To verify an ad-hoc task that always fails

1. With this PR codebase, modify an action to always throw an error
2. Create an alerting rule that will invoke the action once
3. See the action fail three times
4. Observe the task SO is deleted (search by task type / action type)
alongside the action_task_params SO

## To verify Kibana crashing on the last ad-hoc task attempt

1. With this PR codebase, modify an action to always throw an error
(similar to scenario above) but also add a delay of 10s before the error
is thrown (`await new Promise((resolve) => setTimeout(resolve, 10000));`
and a log message before the delay begins
2. Create an alerting rule that will invoke the action once
3. See the action fail twice
4. On the third run, crash Kibana while the action is waiting for the
10s delay, this will cause the action to still be marked as running
while it no longer is
5. Restart Kibana
6. Wait 5-10m until the task's retryAt is overdue
7. Observe the task getting deleted and the action_task_params getting
deleted

## To verify recurring tasks that continuously fail

1. With this PR codebase, modify a rule type to always throw an error
when it runs
2. Create an alerting rule of that type (with a short interval)
3. Observe the rule continuously running and not getting trapped into
the PR changes

Flaky test runner:
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/2036

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
mikecote and kibanamachine committed Mar 27, 2023
1 parent dec52ef commit 676aec7
Show file tree
Hide file tree
Showing 27 changed files with 493 additions and 545 deletions.
3 changes: 1 addition & 2 deletions x-pack/plugins/actions/server/action_type_registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,7 @@ export class ActionTypeRegistry {
[`actions:${actionType.id}`]: {
title: actionType.name,
maxAttempts,
createTaskRunner: (context: RunContext) =>
this.taskRunnerFactory.create(context, maxAttempts),
createTaskRunner: (context: RunContext) => this.taskRunnerFactory.create(context),
},
});
// No need to notify usage on basic action types
Expand Down
155 changes: 40 additions & 115 deletions x-pack/plugins/actions/server/lib/task_runner_factory.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,22 @@ import { TaskRunnerFactory } from './task_runner_factory';
import { actionTypeRegistryMock } from '../action_type_registry.mock';
import { actionExecutorMock } from './action_executor.mock';
import { encryptedSavedObjectsMock } from '@kbn/encrypted-saved-objects-plugin/server/mocks';
import { savedObjectsClientMock, loggingSystemMock, httpServiceMock } from '@kbn/core/server/mocks';
import {
savedObjectsClientMock,
loggingSystemMock,
httpServiceMock,
savedObjectsRepositoryMock,
} from '@kbn/core/server/mocks';
import { eventLoggerMock } from '@kbn/event-log-plugin/server/mocks';
import { ActionTypeDisabledError } from './errors';
import { actionsClientMock } from '../mocks';
import { inMemoryMetricsMock } from '../monitoring/in_memory_metrics.mock';
import { IN_MEMORY_METRICS } from '../monitoring';
import { pick } from 'lodash';
import { isRetryableError } from '@kbn/task-manager-plugin/server/task_running';
import {
isRetryableError,
isUnrecoverableError,
} from '@kbn/task-manager-plugin/server/task_running';

const executeParamsFields = [
'actionId',
Expand Down Expand Up @@ -86,15 +94,12 @@ const taskRunnerFactoryInitializerParams = {
logger: loggingSystemMock.create().get(),
encryptedSavedObjectsClient: mockedEncryptedSavedObjectsClient,
basePathService: httpServiceMock.createBasePath(),
getUnsecuredSavedObjectsClient: jest.fn().mockReturnValue(services.savedObjectsClient),
savedObjectsRepository: savedObjectsRepositoryMock.create(),
};

beforeEach(() => {
jest.resetAllMocks();
actionExecutorInitializerParams.getServices.mockReturnValue(services);
taskRunnerFactoryInitializerParams.getUnsecuredSavedObjectsClient.mockReturnValue(
services.savedObjectsClient
);
});

test(`throws an error if factory isn't initialized`, () => {
Expand Down Expand Up @@ -410,36 +415,18 @@ test('executes the task by calling the executor with proper parameters when noti
);
});

test('cleans up action_task_params object', async () => {
test('cleans up action_task_params object through the cleanup runner method', async () => {
const taskRunner = taskRunnerFactory.create({
taskInstance: mockedTaskInstance,
});

mockedActionExecutor.execute.mockResolvedValueOnce({ status: 'ok', actionId: '2' });
spaceIdToNamespace.mockReturnValueOnce('namespace-test');
mockedEncryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({
id: '3',
type: 'action_task_params',
attributes: {
actionId: '2',
params: { baz: true },
executionId: '123abc',
apiKey: Buffer.from('123:abc').toString('base64'),
},
references: [
{
id: '2',
name: 'actionRef',
type: 'action',
},
],
});

await taskRunner.run();
await taskRunner.cleanup();

expect(services.savedObjectsClient.delete).toHaveBeenCalledWith('action_task_params', '3', {
refresh: false,
});
expect(taskRunnerFactoryInitializerParams.savedObjectsRepository.delete).toHaveBeenCalledWith(
'action_task_params',
'3',
{ refresh: false }
);
});

test('task runner should implement CancellableTask cancel method with logging warning message', async () => {
Expand Down Expand Up @@ -474,37 +461,22 @@ test('task runner should implement CancellableTask cancel method with logging wa
);
});

test('runs successfully when cleanup fails and logs the error', async () => {
test('cleanup runs successfully when action_task_params cleanup fails and logs the error', async () => {
const taskRunner = taskRunnerFactory.create({
taskInstance: mockedTaskInstance,
});

mockedActionExecutor.execute.mockResolvedValueOnce({ status: 'ok', actionId: '2' });
spaceIdToNamespace.mockReturnValueOnce('namespace-test');
mockedEncryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({
id: '3',
type: 'action_task_params',
attributes: {
actionId: '2',
params: { baz: true },
executionId: '123abc',
apiKey: Buffer.from('123:abc').toString('base64'),
},
references: [
{
id: '2',
name: 'actionRef',
type: 'action',
},
],
});
services.savedObjectsClient.delete.mockRejectedValueOnce(new Error('Fail'));
taskRunnerFactoryInitializerParams.savedObjectsRepository.delete.mockRejectedValueOnce(
new Error('Fail')
);

await taskRunner.run();
await taskRunner.cleanup();

expect(services.savedObjectsClient.delete).toHaveBeenCalledWith('action_task_params', '3', {
refresh: false,
});
expect(taskRunnerFactoryInitializerParams.savedObjectsRepository.delete).toHaveBeenCalledWith(
'action_task_params',
'3',
{ refresh: false }
);
expect(taskRunnerFactoryInitializerParams.logger.error).toHaveBeenCalledWith(
'Failed to cleanup action_task_params object [id="3"]: Fail'
);
Expand Down Expand Up @@ -814,15 +786,12 @@ test(`doesn't use API key when not provided`, async () => {
});

test(`throws an error when license doesn't support the action type`, async () => {
const taskRunner = taskRunnerFactory.create(
{
taskInstance: {
...mockedTaskInstance,
attempts: 1,
},
const taskRunner = taskRunnerFactory.create({
taskInstance: {
...mockedTaskInstance,
attempts: 1,
},
2
);
});

mockedEncryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({
id: '3',
Expand All @@ -849,7 +818,7 @@ test(`throws an error when license doesn't support the action type`, async () =>
await taskRunner.run();
throw new Error('Should have thrown');
} catch (e) {
expect(isRetryableError(e)).toEqual(true);
expect(isUnrecoverableError(e)).toEqual(true);
}
});

Expand Down Expand Up @@ -895,56 +864,11 @@ test(`will throw an error with retry: false if the task is not retryable`, async
expect(err).toBeDefined();
expect(isRetryableError(err)).toEqual(false);
expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith(
`Action '2' failed and will not retry: Error message`
);
});

test(`treats errors as successes if the task is not retryable`, async () => {
const taskRunner = taskRunnerFactory.create({
taskInstance: {
...mockedTaskInstance,
attempts: 1,
},
});

mockedEncryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({
id: '3',
type: 'action_task_params',
attributes: {
actionId: '2',
params: { baz: true },
executionId: '123abc',
apiKey: Buffer.from('123:abc').toString('base64'),
},
references: [
{
id: '2',
name: 'actionRef',
type: 'action',
},
],
});
mockedActionExecutor.execute.mockResolvedValueOnce({
status: 'error',
actionId: '2',
message: 'Error message',
data: { foo: true },
retry: false,
});

let err;
try {
await taskRunner.run();
} catch (e) {
err = e;
}
expect(err).toBeUndefined();
expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith(
`Action '2' failed and will not retry: Error message`
`Action '2' failed: Error message`
);
});

test('will throw a retry error if the error is thrown instead of returned', async () => {
test('will rethrow the error if the error is thrown instead of returned', async () => {
const taskRunner = taskRunnerFactory.create({
taskInstance: {
...mockedTaskInstance,
Expand All @@ -969,7 +893,8 @@ test('will throw a retry error if the error is thrown instead of returned', asyn
},
],
});
mockedActionExecutor.execute.mockRejectedValueOnce({});
const thrownError = new Error('Fail');
mockedActionExecutor.execute.mockRejectedValueOnce(thrownError);

let err;
try {
Expand All @@ -978,10 +903,10 @@ test('will throw a retry error if the error is thrown instead of returned', asyn
err = e;
}
expect(err).toBeDefined();
expect(isRetryableError(err)).toEqual(true);
expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith(
`Action '2' failed and will retry: undefined`
`Action '2' failed: Fail`
);
expect(thrownError).toEqual(err);
});

test('increments monitoring metrics after execution', async () => {
Expand Down
Loading

0 comments on commit 676aec7

Please sign in to comment.