Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(logs): log retention fails with OperationAbortedException #17688

Merged
merged 2 commits into from
Dec 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options
throw new Error('Out of attempts to create a logGroup');
}
}
// Any other error
console.error(error);
throw error;
}
} while (true); // exit happens on retry count check
Expand All @@ -62,12 +60,36 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options
* @param retentionInDays the number of days to retain the log events in the specified log group.
*/
async function setRetentionPolicy(logGroupName: string, region?: string, options?: SdkRetryOptions, retentionInDays?: number) {
const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options });
if (!retentionInDays) {
await cloudwatchlogs.deleteRetentionPolicy({ logGroupName }).promise();
} else {
await cloudwatchlogs.putRetentionPolicy({ logGroupName, retentionInDays }).promise();
}
// The same as in createLogGroupSafe(), here we could end up with the race
// condition where a log group is either already being created or its retention
// policy is being updated. This would result in an OperationAbortedException,
// which we will try to catch and retry the command a number of times before failing
let retryCount = options?.maxRetries == undefined ? 10 : options.maxRetries;
const delay = options?.retryOptions?.base == undefined ? 10 : options.retryOptions.base;
do {
try {
const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options });
if (!retentionInDays) {
await cloudwatchlogs.deleteRetentionPolicy({ logGroupName }).promise();
} else {
await cloudwatchlogs.putRetentionPolicy({ logGroupName, retentionInDays }).promise();
}
return;

} catch (error) {
if (error.code === 'OperationAbortedException') {
if (retryCount > 0) {
retryCount--;
await new Promise(resolve => setTimeout(resolve, delay));
continue;
} else {
// The log group is still being created by another execution but we are out of retries
throw new Error('Out of attempts to create a logGroup');
}
}
throw error;
}
} while (true); // exit happens on retry count check
}

export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent, context: AWSLambda.Context) {
Expand All @@ -92,10 +114,10 @@ export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent
// Set a retention policy of 1 day on the logs of this very function.
// Due to the async nature of the log group creation, the log group for this function might
// still be not created yet at this point. Therefore we attempt to create it.
// In case it is being created, createLogGroupSafe will handle the conflic.
// In case it is being created, createLogGroupSafe will handle the conflict.
const region = process.env.AWS_REGION;
await createLogGroupSafe(`/aws/lambda/${context.functionName}`, region, retryOptions);
// If createLogGroupSafe fails, the log group is not created even after multiple attempts
// If createLogGroupSafe fails, the log group is not created even after multiple attempts.
// In this case we have nothing to set the retention policy on but an exception will skip
// the next line.
await setRetentionPolicy(`/aws/lambda/${context.functionName}`, region, retryOptions, 1);
Expand Down
204 changes: 201 additions & 3 deletions packages/@aws-cdk/aws-logs/test/log-retention-provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ describe('log retention provider', () => {

});

test('does not if when operations on provider log group fails', async () => {
test('succeeds when createLogGroup for provider log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === '/aws/lambda/provider') {
Expand Down Expand Up @@ -280,7 +280,7 @@ describe('log retention provider', () => {

});

test('does not fail if operations on CDK lambda log group fails twice', async () => {
test('succeeds when createLogGroup for CDK lambda log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
Expand Down Expand Up @@ -322,7 +322,7 @@ describe('log retention provider', () => {

});

test('does fail if operations on CDK lambda log group fails indefinitely', async () => {
test('fails when createLogGroup for CDK lambda log group fails with OperationAbortedException indefinitely', async () => {
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
return Promise.reject(new MyError(
Expand Down Expand Up @@ -356,6 +356,204 @@ describe('log retention provider', () => {
expect(request.isDone()).toEqual(true);


});

test('succeeds when putRetentionPolicy for provider log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === '/aws/lambda/provider') {
if (attempt > 0) {
attempt--;
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
} else {
return Promise.resolve({});
}
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const deleteRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '30',
LogGroupName: 'group',
},
};

const request = createRequest('SUCCESS');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('succeeds when putRetentionPolicy for CDK lambda log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
if (attempt > 0) {
attempt--;
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
} else {
return Promise.resolve({});
}
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const deleteRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '30',
LogGroupName: 'group',
},
};

const request = createRequest('SUCCESS');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('fails when putRetentionPolicy for CDK lambda log group fails with OperationAbortedException indefinitely', async () => {
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const deleteRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '30',
LogGroupName: 'group',
},
};

const request = createRequest('FAILED');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('succeeds when deleteRetentionPolicy for provider log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const deleteRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === '/aws/lambda/provider') {
if (attempt > 0) {
attempt--;
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
} else {
return Promise.resolve({});
}
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const putRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '0', // Setting this to 0 triggers the call to deleteRetentionPolicy
LogGroupName: 'group',
},
};

const request = createRequest('SUCCESS');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('fails when deleteRetentionPolicy for provider log group fails with OperationAbortedException indefinitely', async () => {
const deleteRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const putRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '0', // Setting this to 0 triggers the call to deleteRetentionPolicy
LogGroupName: 'group',
},
};

const request = createRequest('FAILED');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('response data contains the log group name', async () => {
Expand Down