diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index d56b5606aac5e..eed65bf3970d2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -39,6 +39,13 @@ export interface PySparkEtlJobProps extends JobProperties { */ readonly extraFiles?: Code[]; + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + * @default - no extra jar files + */ + readonly extraJars?: Code[]; + /** * Specifies whether job run queuing is enabled for the job runs for this job. * A value of true means job run queuing is enabled for the job runs. @@ -159,6 +166,9 @@ export class PySparkEtlJob extends Job { if (props.extraFiles && props.extraFiles.length > 0) { args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); } + if (props.extraJars && props.extraJars?.length > 0) { + args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + } return args; } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts index 603cf7d14592e..6819602094838 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts @@ -48,6 +48,13 @@ export interface PySparkFlexEtlJobProps extends JobProperties { */ readonly extraFiles?: Code[]; + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + * @default - no extra jar files + */ + readonly extraJars?: Code[]; + } /** @@ -160,6 +167,9 @@ export class PySparkFlexEtlJob extends Job { if (props.extraFiles && props.extraFiles.length > 0) { args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); } + if (props.extraJars && props.extraJars?.length > 0) { + args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + } return args; } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index 7079897f7d352..f444ba72c56a7 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -39,6 +39,13 @@ export interface PySparkStreamingJobProps extends JobProperties { */ readonly extraFiles?: Code[]; + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + * @default - no extra jar files + */ + readonly extraJars?: Code[]; + /** * Specifies whether job run queuing is enabled for the job runs for this job. * A value of true means job run queuing is enabled for the job runs. @@ -159,6 +166,9 @@ export class PySparkStreamingJob extends Job { if (props.extraFiles && props.extraFiles.length > 0) { args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); } + if (props.extraJars && props.extraJars?.length > 0) { + args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + } return args; } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts index 5a8d5b67f7956..2017b44d9795f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts @@ -365,7 +365,7 @@ describe('Job', () => { }); }); - describe('Create PySpark ETL Job with extraPythonFiles and extraFiles', () => { + describe('Create PySpark ETL Job with extraPythonFiles, extraFiles and extraJars', () => { beforeEach(() => { job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { role, @@ -381,6 +381,11 @@ describe('Job', () => { s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'), 'prefix/file.txt'), ], + extraJars: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), + 'prefix/file.jar'), + ], }); }); @@ -408,6 +413,7 @@ describe('Job', () => { '--enable-continuous-cloudwatch-log': 'true', '--extra-py-files': 's3://extra-python-files-bucket/prefix/file.py', '--extra-files': 's3://extra-files-bucket/prefix/file.txt', + '--extra-jars': 's3://extra-jars-bucket/prefix/file.jar', }), }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts index b47095d61e5e8..e96ad46f4d715 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts @@ -377,7 +377,7 @@ describe('Job', () => { }); }); - describe('Create PySpark Streaming Job with extraPythonFiles and extraFiles', () => { + describe('Create PySpark Streaming Job with extraPythonFiles, extraFiles and extraJars', () => { beforeEach(() => { job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { role, @@ -393,6 +393,11 @@ describe('Job', () => { s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'), 'prefix/file.txt'), ], + extraJars: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), + 'prefix/file.jar'), + ], }); }); @@ -420,6 +425,7 @@ describe('Job', () => { '--enable-continuous-cloudwatch-log': 'true', '--extra-py-files': 's3://extra-python-files-bucket/prefix/file.py', '--extra-files': 's3://extra-files-bucket/prefix/file.txt', + '--extra-jars': 's3://extra-jars-bucket/prefix/file.jar', }), }); });