Skip to content

Commit

Permalink
feat(glue-alpha): include extra jars parameter in pyspark jobs (aws#3…
Browse files Browse the repository at this point in the history
…3238)

### Issue # (if applicable)

Closes aws#33225.

### Reason for this change

PySpark jobs with extra JAR dependencies cannot be defined with the new L2 constructs introduced in [v2.177.0](https://github.com/aws/aws-cdk/releases/tag/v2.177.0).

### Description of changes

Add the `extraJars` parameter in the PySpark job L2 constructs.

### Checklist
- [x] My code adheres to the [CONTRIBUTING GUIDE](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md) and [DESIGN GUIDELINES](https://github.com/aws/aws-cdk/blob/main/docs/DESIGN_GUIDELINES.md)

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
gontzalm authored Feb 19, 2025
1 parent 6f1aa80 commit be3bce3
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 2 deletions.
10 changes: 10 additions & 0 deletions packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ export interface PySparkEtlJobProps extends JobProperties {
*/
readonly extraFiles?: Code[];

/**
* Extra Jars S3 URL (optional)
* S3 URL where additional jar dependencies are located
* @default - no extra jar files
*/
readonly extraJars?: Code[];

/**
* Specifies whether job run queuing is enabled for the job runs for this job.
* A value of true means job run queuing is enabled for the job runs.
Expand Down Expand Up @@ -159,6 +166,9 @@ export class PySparkEtlJob extends Job {
if (props.extraFiles && props.extraFiles.length > 0) {
args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
}
if (props.extraJars && props.extraJars?.length > 0) {
args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(',');
}

return args;
}
Expand Down
10 changes: 10 additions & 0 deletions packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ export interface PySparkFlexEtlJobProps extends JobProperties {
*/
readonly extraFiles?: Code[];

/**
* Extra Jars S3 URL (optional)
* S3 URL where additional jar dependencies are located
* @default - no extra jar files
*/
readonly extraJars?: Code[];

}

/**
Expand Down Expand Up @@ -160,6 +167,9 @@ export class PySparkFlexEtlJob extends Job {
if (props.extraFiles && props.extraFiles.length > 0) {
args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
}
if (props.extraJars && props.extraJars?.length > 0) {
args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(',');
}

return args;
}
Expand Down
10 changes: 10 additions & 0 deletions packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ export interface PySparkStreamingJobProps extends JobProperties {
*/
readonly extraFiles?: Code[];

/**
* Extra Jars S3 URL (optional)
* S3 URL where additional jar dependencies are located
* @default - no extra jar files
*/
readonly extraJars?: Code[];

/**
* Specifies whether job run queuing is enabled for the job runs for this job.
* A value of true means job run queuing is enabled for the job runs.
Expand Down Expand Up @@ -159,6 +166,9 @@ export class PySparkStreamingJob extends Job {
if (props.extraFiles && props.extraFiles.length > 0) {
args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
}
if (props.extraJars && props.extraJars?.length > 0) {
args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(',');
}

return args;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ describe('Job', () => {
});
});

describe('Create PySpark ETL Job with extraPythonFiles and extraFiles', () => {
describe('Create PySpark ETL Job with extraPythonFiles, extraFiles and extraJars', () => {
beforeEach(() => {
job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', {
role,
Expand All @@ -381,6 +381,11 @@ describe('Job', () => {
s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'),
'prefix/file.txt'),
],
extraJars: [
glue.Code.fromBucket(
s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'),
'prefix/file.jar'),
],
});
});

Expand Down Expand Up @@ -408,6 +413,7 @@ describe('Job', () => {
'--enable-continuous-cloudwatch-log': 'true',
'--extra-py-files': 's3://extra-python-files-bucket/prefix/file.py',
'--extra-files': 's3://extra-files-bucket/prefix/file.txt',
'--extra-jars': 's3://extra-jars-bucket/prefix/file.jar',
}),
});
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ describe('Job', () => {
});
});

describe('Create PySpark Streaming Job with extraPythonFiles and extraFiles', () => {
describe('Create PySpark Streaming Job with extraPythonFiles, extraFiles and extraJars', () => {
beforeEach(() => {
job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', {
role,
Expand All @@ -393,6 +393,11 @@ describe('Job', () => {
s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'),
'prefix/file.txt'),
],
extraJars: [
glue.Code.fromBucket(
s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'),
'prefix/file.jar'),
],
});
});

Expand Down Expand Up @@ -420,6 +425,7 @@ describe('Job', () => {
'--enable-continuous-cloudwatch-log': 'true',
'--extra-py-files': 's3://extra-python-files-bucket/prefix/file.py',
'--extra-files': 's3://extra-files-bucket/prefix/file.txt',
'--extra-jars': 's3://extra-jars-bucket/prefix/file.jar',
}),
});
});
Expand Down

0 comments on commit be3bce3

Please sign in to comment.