Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[eventLog] retry resource creation at initialization time #136363

Merged
merged 3 commits into from
Jul 19, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions x-pack/plugins/event_log/server/es/context.mock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import { namesMock } from './names.mock';
import { IClusterClientAdapter } from './cluster_client_adapter';
import { clusterClientAdapterMock } from './cluster_client_adapter.mock';

export const MOCK_RETRY_DELAY = 20;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The mock uses a retry of 20ms, so we can run the retries without special jest clock stuff and have the tests run in a short amount of time, but the REAL context object uses 2000ms.

I did take a stab at using the special jest clock stuff, but looks like it's going to be difficult to get working, as we need to "run all the timers" at points where we currently have no control.


const createContextMock = () => {
const mock: jest.Mocked<EsContext> & {
esAdapter: jest.Mocked<IClusterClientAdapter>;
Expand All @@ -23,6 +25,7 @@ const createContextMock = () => {
waitTillReady: jest.fn(async () => true),
esAdapter: clusterClientAdapterMock.create(),
initialized: true,
retryDelay: MOCK_RETRY_DELAY,
};
return mock;
};
Expand Down
13 changes: 9 additions & 4 deletions x-pack/plugins/event_log/server/es/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@ import { initializeEs } from './init';
import { ClusterClientAdapter, IClusterClientAdapter } from './cluster_client_adapter';
import { createReadySignal, ReadySignal } from '../lib/ready_signal';

export const RETRY_DELAY = 2000;

export interface EsContext {
logger: Logger;
esNames: EsNames;
esAdapter: IClusterClientAdapter;
readonly logger: Logger;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just some clean-up I noticed while editing the file. This structure is not used outside of the event log, but the fields should be read-only as they are populated via a function call which creates an object that returns this interface. Figured it would be helpful to make sure we don't accidentally update these fields in our code ...

Same with initialized a few lines below, and the new retryDelay uses the same pattern.

readonly esNames: EsNames;
readonly esAdapter: IClusterClientAdapter;
initialize(): void;
shutdown(): Promise<void>;
waitTillReady(): Promise<boolean>;
initialized: boolean;
readonly initialized: boolean;
readonly retryDelay: number;
}

export interface EsError {
Expand All @@ -44,12 +47,14 @@ class EsContextImpl implements EsContext {
public esAdapter: IClusterClientAdapter;
private readonly readySignal: ReadySignal<boolean>;
public initialized: boolean;
public readonly retryDelay: number;

constructor(params: EsContextCtorParams) {
this.logger = params.logger;
this.esNames = getEsNames(params.indexNameRoot, params.kibanaVersion);
this.readySignal = createReadySignal();
this.initialized = false;
this.retryDelay = RETRY_DELAY;
this.esAdapter = new ClusterClientAdapter({
logger: params.logger,
elasticsearchClientPromise: params.elasticsearchClientPromise,
Expand Down
90 changes: 89 additions & 1 deletion x-pack/plugins/event_log/server/es/init.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* 2.0.
*/

import { contextMock } from './context.mock';
import { contextMock, MOCK_RETRY_DELAY } from './context.mock';
import { initializeEs, parseIndexAliases } from './init';

describe('initializeEs', () => {
Expand Down Expand Up @@ -454,3 +454,91 @@ describe('parseIndexAliases', () => {
]);
});
});

describe('retries', () => {
let esContext = contextMock.create();
// set up context APIs to return defaults indicating already created
beforeEach(() => {
esContext = contextMock.create();
esContext.esAdapter.getExistingLegacyIndexTemplates.mockResolvedValue({});
esContext.esAdapter.getExistingIndices.mockResolvedValue({});
esContext.esAdapter.getExistingIndexAliases.mockResolvedValue({});
esContext.esAdapter.doesIlmPolicyExist.mockResolvedValue(true);
esContext.esAdapter.doesIndexTemplateExist.mockResolvedValue(true);
esContext.esAdapter.doesAliasExist.mockResolvedValue(true);
});

test('createIlmPolicyIfNotExists with 1 retry', async () => {
esContext.esAdapter.doesIlmPolicyExist.mockRejectedValueOnce(new Error('retry 1'));

const timeStart = Date.now();
await initializeEs(esContext);
const timeElapsed = Date.now() - timeStart;

expect(timeElapsed).toBeGreaterThanOrEqual(MOCK_RETRY_DELAY);

expect(esContext.esAdapter.getExistingLegacyIndexTemplates).toHaveBeenCalledTimes(1);
expect(esContext.esAdapter.doesIlmPolicyExist).toHaveBeenCalledTimes(2);
expect(esContext.esAdapter.doesIndexTemplateExist).toHaveBeenCalledTimes(1);
expect(esContext.esAdapter.doesAliasExist).toHaveBeenCalledTimes(1);

const prefix = `eventLog initialization operation failed and will be retried: createIlmPolicyIfNotExists`;
expect(esContext.logger.warn).toHaveBeenCalledTimes(1);
expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 4 more times; error: retry 1`);
});

test('createIndexTemplateIfNotExists with 2 retries', async () => {
esContext.esAdapter.doesIndexTemplateExist.mockRejectedValueOnce(new Error('retry 2a'));
esContext.esAdapter.doesIndexTemplateExist.mockRejectedValueOnce(new Error('retry 2b'));

const timeStart = Date.now();
await initializeEs(esContext);
const timeElapsed = Date.now() - timeStart;

expect(timeElapsed).toBeGreaterThanOrEqual(MOCK_RETRY_DELAY * (1 + 2));

expect(esContext.esAdapter.getExistingLegacyIndexTemplates).toHaveBeenCalledTimes(1);
expect(esContext.esAdapter.doesIlmPolicyExist).toHaveBeenCalledTimes(1);
expect(esContext.esAdapter.doesIndexTemplateExist).toHaveBeenCalledTimes(3);
expect(esContext.esAdapter.doesAliasExist).toHaveBeenCalledTimes(1);

const prefix = `eventLog initialization operation failed and will be retried: createIndexTemplateIfNotExists`;
expect(esContext.logger.warn).toHaveBeenCalledTimes(2);
expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 4 more times; error: retry 2a`);
expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 3 more times; error: retry 2b`);
});

test('createInitialIndexIfNotExists', async () => {
esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5a'));
esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5b'));
esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5c'));
esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5d'));
esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5e'));
// make sure it only tries 5 times - this one should not be reported
esContext.esAdapter.doesAliasExist.mockRejectedValueOnce(new Error('retry 5f'));

const timeStart = Date.now();
await initializeEs(esContext);
const timeElapsed = Date.now() - timeStart;

expect(timeElapsed).toBeGreaterThanOrEqual(MOCK_RETRY_DELAY * (1 + 2 + 4 + 8));

expect(esContext.esAdapter.getExistingLegacyIndexTemplates).toHaveBeenCalledTimes(1);
expect(esContext.esAdapter.doesIlmPolicyExist).toHaveBeenCalledTimes(1);
expect(esContext.esAdapter.doesIndexTemplateExist).toHaveBeenCalledTimes(1);
expect(esContext.esAdapter.doesAliasExist).toHaveBeenCalledTimes(5);

const prefix = `eventLog initialization operation failed and will be retried: createInitialIndexIfNotExists`;
expect(esContext.logger.warn).toHaveBeenCalledTimes(5);
expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 4 more times; error: retry 5a`);
expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 3 more times; error: retry 5b`);
expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 2 more times; error: retry 5c`);
expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 1 more times; error: retry 5d`);
expect(esContext.logger.warn).toHaveBeenCalledWith(`${prefix}; 0 more times; error: retry 5e`);

expect(esContext.logger.error).toHaveBeenCalledTimes(1);
expect(esContext.logger.error).toHaveBeenCalledWith(
`error initializing elasticsearch resources: retry 5e`
);
});
});
34 changes: 30 additions & 4 deletions x-pack/plugins/event_log/server/es/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { asyncForEach } from '@kbn/std';
import { groupBy } from 'lodash';
import pRetry, { FailedAttemptError } from 'p-retry';
import { getIlmPolicy, getIndexTemplate } from './documents';
import { EsContext } from './context';

const MAX_RETRY_DELAY = 30000;

export async function initializeEs(esContext: EsContext): Promise<boolean> {
esContext.logger.debug('initializing elasticsearch resources starting');

Expand All @@ -28,10 +31,33 @@ export async function initializeEs(esContext: EsContext): Promise<boolean> {
async function initializeEsResources(esContext: EsContext) {
const steps = new EsInitializationSteps(esContext);

await steps.setExistingAssetsToHidden();
await steps.createIlmPolicyIfNotExists();
await steps.createIndexTemplateIfNotExists();
await steps.createInitialIndexIfNotExists();
// today, setExistingAssetsToHidden() never throws, but just in case ...
await retry(steps.setExistingAssetsToHidden);
await retry(steps.createIlmPolicyIfNotExists);
await retry(steps.createIndexTemplateIfNotExists);
await retry(steps.createInitialIndexIfNotExists);

async function retry(stepMethod: () => Promise<void>): Promise<void> {
// call the step method with retry options via p-retry
await pRetry(() => stepMethod.call(steps), getRetryOptions(esContext, stepMethod.name));
}
}

function getRetryOptions(esContext: EsContext, operation: string) {
const logger = esContext.logger;
// should retry on the order of 2s, 4s, 8s, 16s
// see: https://github.com/tim-kos/node-retry#retryoperationoptions
return {
minTimeout: esContext.retryDelay,
maxTimeout: MAX_RETRY_DELAY,
retries: 4,
factor: 2,
randomize: true,
onFailedAttempt: (err: FailedAttemptError) => {
const message = `eventLog initialization operation failed and will be retried: ${operation}; ${err.retriesLeft} more times; error: ${err.message}`;
logger.warn(message);
},
};
}

export interface ParsedIndexAlias extends estypes.IndicesAliasDefinition {
Expand Down