Skip to content

Commit

Permalink
feat(s3-deployment): enable efs support for handling large files in l…
Browse files Browse the repository at this point in the history
…ambda (#15220)

feat(s3-deployment): enable efs support for handling large files in lambda

It allows to attach EFS storage in worker lambda to handle large files upload;
Refactored unused imports

Verified that changes work by uploading large file (~350MB) using EFS support


----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
keshav0891 authored Sep 15, 2021
1 parent 23d9b6a commit 2737119
Show file tree
Hide file tree
Showing 9 changed files with 1,423 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1219,7 +1219,7 @@
"Properties": {
"Code": {
"S3Bucket": {
"Ref": "AssetParameters1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24eaS3BucketD8D20B9A"
"Ref": "AssetParametersa3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8S3BucketD1AD544E"
},
"S3Key": {
"Fn::Join": [
Expand All @@ -1232,7 +1232,7 @@
"Fn::Split": [
"||",
{
"Ref": "AssetParameters1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24eaS3VersionKeyCD2774D3"
"Ref": "AssetParametersa3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8S3VersionKey93A19D70"
}
]
}
Expand All @@ -1245,7 +1245,7 @@
"Fn::Split": [
"||",
{
"Ref": "AssetParameters1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24eaS3VersionKeyCD2774D3"
"Ref": "AssetParametersa3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8S3VersionKey93A19D70"
}
]
}
Expand Down Expand Up @@ -1348,17 +1348,17 @@
"Type": "String",
"Description": "Artifact hash for asset \"e9882ab123687399f934da0d45effe675ecc8ce13b40cb946f3e1d6141fe8d68\""
},
"AssetParameters1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24eaS3BucketD8D20B9A": {
"AssetParametersa3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8S3BucketD1AD544E": {
"Type": "String",
"Description": "S3 bucket for asset \"1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24ea\""
"Description": "S3 bucket for asset \"a3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8\""
},
"AssetParameters1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24eaS3VersionKeyCD2774D3": {
"AssetParametersa3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8S3VersionKey93A19D70": {
"Type": "String",
"Description": "S3 key for asset version \"1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24ea\""
"Description": "S3 key for asset version \"a3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8\""
},
"AssetParameters1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24eaArtifactHash3943F7F3": {
"AssetParametersa3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8ArtifactHash238275D6": {
"Type": "String",
"Description": "Artifact hash for asset \"1f6de40da10b415b255c07df709f791e772ffb9f7bdd14ad81fb75643aad24ea\""
"Description": "Artifact hash for asset \"a3058ccb468d757ebb89df5363a1c20f5307c6911136f29d00e1a68c9b2aa7e8\""
},
"AssetParameters972240f9dd6e036a93d5f081af9a24315b2053828ac049b3b19b2fa12d7ae64aS3Bucket1F1A8472": {
"Type": "String",
Expand Down
20 changes: 20 additions & 0 deletions packages/@aws-cdk/aws-s3-deployment/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,26 @@ size of the AWS Lambda resource handler.
> NOTE: a new AWS Lambda handler will be created in your stack for each memory
> limit configuration.
## EFS Support

If your workflow needs more disk space than default (512 MB) disk space, you may attach an EFS storage to underlying
lambda function. To Enable EFS support set `efs` and `vpc` props for BucketDeployment.

Check sample usage below.
Please note that creating VPC inline may cause stack deletion failures. It is shown as below for simplicity.
To avoid such condition, keep your network infra (VPC) in a separate stack and pass as props.

```ts
new s3deploy.BucketDeployment(this, 'DeployMeWithEfsStorage', {
sources: [s3deploy.Source.asset(path.join(__dirname, 'my-website'))],
destinationBucket,
destinationKeyPrefix: 'efs/',
useEfs: true,
vpc: new ec2.Vpc(this, 'Vpc'),
retainOnDelete: false,
});
```

## Notes

- This library uses an AWS CloudFormation custom resource which about 10MiB in
Expand Down
75 changes: 73 additions & 2 deletions packages/@aws-cdk/aws-s3-deployment/lib/bucket-deployment.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as path from 'path';
import * as cloudfront from '@aws-cdk/aws-cloudfront';
import * as ec2 from '@aws-cdk/aws-ec2';
import * as efs from '@aws-cdk/aws-efs';
import * as iam from '@aws-cdk/aws-iam';
import * as lambda from '@aws-cdk/aws-lambda';
import * as s3 from '@aws-cdk/aws-s3';
Expand Down Expand Up @@ -107,6 +108,14 @@ export interface BucketDeploymentProps {
*/
readonly memoryLimit?: number;

/**
* Mount an EFS file system. Enable this if your assets are large and you encounter disk space errors.
* Enabling this option will require a VPC to be specified.
*
* @default - No EFS. Lambda has access only to 512MB of disk space.
*/
readonly useEfs?: boolean

/**
* Execution role associated with this function
*
Expand Down Expand Up @@ -197,6 +206,7 @@ export interface BucketDeploymentProps {

/**
* The VPC network to place the deployment lambda handler in.
* This is required if `useEfs` is set.
*
* @default None
*/
Expand Down Expand Up @@ -230,18 +240,59 @@ export class BucketDeployment extends CoreConstruct {
}
}

if (props.useEfs && !props.vpc) {
throw new Error('Vpc must be specified if useEfs is set');
}

const accessPointPath = '/lambda';
let accessPoint;
if (props.useEfs && props.vpc) {
const accessMode = '0777';
const fileSystem = this.getOrCreateEfsFileSystem(scope, {
vpc: props.vpc,
removalPolicy: cdk.RemovalPolicy.DESTROY,
});
accessPoint = fileSystem.addAccessPoint('AccessPoint', {
path: accessPointPath,
createAcl: {
ownerUid: '1001',
ownerGid: '1001',
permissions: accessMode,
},
posixUser: {
uid: '1001',
gid: '1001',
},
});
accessPoint.node.addDependency(fileSystem.mountTargetsAvailable);
}

// Making VPC dependent on BucketDeployment so that CFN stack deletion is smooth.
// Refer comments on https://github.com/aws/aws-cdk/pull/15220 for more details.
if (props.vpc) {
this.node.addDependency(props.vpc);
}

const mountPath = `/mnt${accessPointPath}`;
const handler = new lambda.SingletonFunction(this, 'CustomResourceHandler', {
uuid: this.renderSingletonUuid(props.memoryLimit),
uuid: this.renderSingletonUuid(props.memoryLimit, props.vpc),
code: lambda.Code.fromAsset(path.join(__dirname, 'lambda')),
layers: [new AwsCliLayer(this, 'AwsCliLayer')],
runtime: lambda.Runtime.PYTHON_3_6,
environment: props.useEfs ? {
MOUNT_PATH: mountPath,
} : undefined,
handler: 'index.handler',
lambdaPurpose: 'Custom::CDKBucketDeployment',
timeout: cdk.Duration.minutes(15),
role: props.role,
memorySize: props.memoryLimit,
vpc: props.vpc,
vpcSubnets: props.vpcSubnets,
filesystem: accessPoint ? lambda.FileSystem.fromEfsAccessPoint(
accessPoint,
mountPath,
): undefined,
});

const handlerRole = handler.role;
Expand Down Expand Up @@ -279,7 +330,7 @@ export class BucketDeployment extends CoreConstruct {

}

private renderSingletonUuid(memoryLimit?: number) {
private renderSingletonUuid(memoryLimit?: number, vpc?: ec2.IVpc) {
let uuid = '8693BB64-9689-44B6-9AAF-B0CC9EB8756C';

// if user specify a custom memory limit, define another singleton handler
Expand All @@ -293,8 +344,28 @@ export class BucketDeployment extends CoreConstruct {
uuid += `-${memoryLimit.toString()}MiB`;
}

// if user specify to use VPC, define another singleton handler
// with this configuration. otherwise, it won't be possible to use multiple
// configurations since we have a singleton.
// A VPC is a must if EFS storage is used and that's why we are only using VPC in uuid.
if (vpc) {
uuid += `-${vpc.node.addr}`;
}

return uuid;
}

/**
* Function to get/create a stack singleton instance of EFS FileSystem per vpc.
*
* @param scope Construct
* @param fileSystemProps EFS FileSystemProps
*/
private getOrCreateEfsFileSystem(scope: Construct, fileSystemProps: efs.FileSystemProps): efs.FileSystem {
const stack = cdk.Stack.of(scope);
const uuid = `BucketDeploymentEFS-VPC-${fileSystemProps.vpc.node.addr}`;
return stack.node.tryFindChild(uuid) as efs.FileSystem ?? new efs.FileSystem(scope, uuid, fileSystemProps);
}
}

/**
Expand Down
86 changes: 45 additions & 41 deletions packages/@aws-cdk/aws-s3-deployment/lib/lambda/index.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
import subprocess
import os
import tempfile
import json
import contextlib
import json
import traceback
import logging
import os
import shutil
import boto3
import contextlib
from datetime import datetime
from uuid import uuid4

import subprocess
import tempfile
from urllib.request import Request, urlopen
from uuid import uuid4
from zipfile import ZipFile

import boto3

logger = logging.getLogger()
logger.setLevel(logging.INFO)

cloudfront = boto3.client('cloudfront')

CFN_SUCCESS = "SUCCESS"
CFN_FAILED = "FAILED"
ENV_KEY_MOUNT_PATH = "MOUNT_PATH"

def handler(event, context):

Expand Down Expand Up @@ -117,43 +115,49 @@ def cfn_error(message=None):
#---------------------------------------------------------------------------------------------------
# populate all files from s3_source_zips to a destination bucket
def s3_deploy(s3_source_zips, s3_dest, user_metadata, system_metadata, prune, exclude, include):
# create a temporary working directory
workdir=tempfile.mkdtemp()
# create a temporary working directory in /tmp or if enabled an attached efs volume
if ENV_KEY_MOUNT_PATH in os.environ:
workdir = os.getenv(ENV_KEY_MOUNT_PATH) + "/" + str(uuid4())
os.mkdir(workdir)
else:
workdir = tempfile.mkdtemp()

logger.info("| workdir: %s" % workdir)

# create a directory into which we extract the contents of the zip file
contents_dir=os.path.join(workdir, 'contents')
os.mkdir(contents_dir)

# download the archive from the source and extract to "contents"
for s3_source_zip in s3_source_zips:
archive=os.path.join(workdir, str(uuid4()))
logger.info("archive: %s" % archive)
aws_command("s3", "cp", s3_source_zip, archive)
logger.info("| extracting archive to: %s\n" % contents_dir)
with ZipFile(archive, "r") as zip:
zip.extractall(contents_dir)

# sync from "contents" to destination

s3_command = ["s3", "sync"]

if prune:
s3_command.append("--delete")

if exclude:
for filter in exclude:
s3_command.extend(["--exclude", filter])

if include:
for filter in include:
s3_command.extend(["--include", filter])

s3_command.extend([contents_dir, s3_dest])
s3_command.extend(create_metadata_args(user_metadata, system_metadata))
aws_command(*s3_command)

shutil.rmtree(workdir)
try:
# download the archive from the source and extract to "contents"
for s3_source_zip in s3_source_zips:
archive=os.path.join(workdir, str(uuid4()))
logger.info("archive: %s" % archive)
aws_command("s3", "cp", s3_source_zip, archive)
logger.info("| extracting archive to: %s\n" % contents_dir)
with ZipFile(archive, "r") as zip:
zip.extractall(contents_dir)

# sync from "contents" to destination

s3_command = ["s3", "sync"]

if prune:
s3_command.append("--delete")

if exclude:
for filter in exclude:
s3_command.extend(["--exclude", filter])

if include:
for filter in include:
s3_command.extend(["--include", filter])

s3_command.extend([contents_dir, s3_dest])
s3_command.extend(create_metadata_args(user_metadata, system_metadata))
aws_command(*s3_command)
finally:
shutil.rmtree(workdir)

#---------------------------------------------------------------------------------------------------
# invalidate files in the CloudFront distribution edge caches
Expand Down
2 changes: 2 additions & 0 deletions packages/@aws-cdk/aws-s3-deployment/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
"dependencies": {
"@aws-cdk/aws-cloudfront": "0.0.0",
"@aws-cdk/aws-ec2": "0.0.0",
"@aws-cdk/aws-efs": "0.0.0",
"@aws-cdk/aws-iam": "0.0.0",
"@aws-cdk/aws-lambda": "0.0.0",
"@aws-cdk/aws-s3": "0.0.0",
Expand All @@ -102,6 +103,7 @@
"homepage": "https://github.com/aws/aws-cdk",
"peerDependencies": {
"@aws-cdk/aws-cloudfront": "0.0.0",
"@aws-cdk/aws-efs": "0.0.0",
"@aws-cdk/aws-ec2": "0.0.0",
"@aws-cdk/aws-iam": "0.0.0",
"@aws-cdk/aws-lambda": "0.0.0",
Expand Down
Loading

0 comments on commit 2737119

Please sign in to comment.