From bfd21d6d6b73d08125202e34f319c13f9f47aaea Mon Sep 17 00:00:00 2001 From: Nicholas Thomson Date: Mon, 11 May 2020 22:06:21 -0700 Subject: [PATCH] [AWS SageMaker] Specify component input types (#3683) * Replace all string types with Python types * Update HPO yaml * Update Batch YAML * Update Deploy YAML * Update GroundTruth YAML * Update Model YAML * Update Train YAML * Update WorkTeam YAML * Updated samples to remove strings * Update to temporary image * Remove unnecessary imports * Update image to newer image * Update components to python3 * Update bool parser type * Remove empty ContentType in samples * Update to temporary image * Update to version 0.3.1 * Update deploy to login * Update deploy load config path * Fix export environment variable in deploy * Fix env name * Update deploy reflow env paths * Add debug config line * Use username and password directly * Updated to 0.3.1 * Update field types to JsonObject and JsonArray --- .../aws/sagemaker/THIRD-PARTY-LICENSES.txt | 2 +- .../sagemaker/batch_transform/component.yaml | 28 ++++++++- .../batch_transform/src/batch_transform.py | 46 +++++++------- .../sagemaker/codebuild/deploy.buildspec.yml | 6 +- .../aws/sagemaker/codebuild/scripts/deploy.sh | 7 ++- components/aws/sagemaker/common/_utils.py | 49 +++++---------- .../aws/sagemaker/deploy/component.yaml | 29 ++++++++- components/aws/sagemaker/deploy/src/deploy.py | 46 +++++++------- .../aws/sagemaker/ground_truth/component.yaml | 35 ++++++++++- .../ground_truth/src/ground_truth.py | 52 ++++++++-------- .../hyperparameter_tuning/component.yaml | 39 +++++++++++- .../src/hyperparameter_tuning.py | 62 +++++++++---------- components/aws/sagemaker/model/component.yaml | 18 +++++- .../aws/sagemaker/model/src/create_model.py | 22 +++---- components/aws/sagemaker/train/component.yaml | 29 ++++++++- components/aws/sagemaker/train/src/train.py | 40 ++++++------ .../aws/sagemaker/workteam/component.yaml | 13 +++- .../aws/sagemaker/workteam/src/workteam.py | 14 ++--- .../mini-image-classification-pipeline.py | 16 ++--- .../kmeans-hpo-pipeline.py | 37 ++++++----- .../mnist-classification-pipeline.py | 43 +++++++------ .../training-pipeline.py | 21 +++---- 22 files changed, 398 insertions(+), 256 deletions(-) diff --git a/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt b/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt index 179d9b3ec3f2..fc268748af99 100644 --- a/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt +++ b/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt @@ -1,4 +1,4 @@ -** Amazon SageMaker Components for Kubeflow Pipelines; version 0.3.0 -- +** Amazon SageMaker Components for Kubeflow Pipelines; version 0.3.1 -- https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. ** boto3; version 1.12.33 -- https://github.com/boto/boto3/ diff --git a/components/aws/sagemaker/batch_transform/component.yaml b/components/aws/sagemaker/batch_transform/component.yaml index 6e0ad637ac81..b585ecbcf569 100644 --- a/components/aws/sagemaker/batch_transform/component.yaml +++ b/components/aws/sagemaker/batch_transform/component.yaml @@ -4,78 +4,102 @@ description: | inputs: - name: region description: 'The region where the cluster launches.' + type: String - name: job_name description: 'The name of the batch transform job.' default: '' + type: String - name: model_name description: 'The name of the model that you want to use for the transform job.' + type: String - name: max_concurrent description: 'The maximum number of parallel requests that can be sent to each instance in a transform job.' default: '0' + type: Integer - name: max_payload description: 'The maximum allowed size of the payload, in MB.' default: '6' + type: Integer - name: batch_strategy description: 'The number of records to include in a mini-batch for an HTTP inference request.' default: '' + type: String - name: environment description: 'The environment variables to set in the Docker container. Up to 16 key-value entries in the map.' default: '{}' + type: JsonObject - name: input_location description: 'The S3 location of the data source that is associated with a channel.' + type: String - name: data_type description: 'Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.' default: 'S3Prefix' + type: String - name: content_type description: 'The multipurpose internet mail extension (MIME) type of the data.' default: '' + type: String - name: split_type description: 'The method to use to split the transform job data files into smaller batches.' default: 'None' + type: String - name: compression_type description: 'If the transform data is compressed, the specification of the compression type.' default: 'None' + type: String - name: output_location description: 'The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.' + type: String - name: accept description: 'The MIME type used to specify the output data.' default: '' + type: String - name: assemble_with description: 'Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.' default: '' + type: String - name: output_encryption_key description: 'The AWS Key Management Service ID of the key used to encrypt the output data.' default: '' + type: String - name: input_filter description: 'A JSONPath expression used to select a portion of the input data to pass to the algorithm.' default: '' + type: String - name: output_filter description: 'A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.' default: '' + type: String - name: join_source description: 'Specifies the source of the data to join with the transformed data.' default: 'None' + type: String - name: instance_type description: 'The ML compute instance type.' default: 'ml.m4.xlarge' + type: String - name: instance_count description: 'The number of ML compute instances to use in each training job.' default: '1' + type: Integer - name: resource_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' default: '' + type: String - name: endpoint_url description: 'The endpoint URL for the private link VPC endpoint.' default: '' + type: String - name: tags description: 'Key-value pairs to categorize AWS resources.' default: '{}' + type: JsonObject outputs: - {name: output_location, description: 'S3 URI of the transform job results.'} implementation: container: - image: amazon/aws-sagemaker-kfp-components:0.3.0 - command: ['python'] + image: amazon/aws-sagemaker-kfp-components:0.3.1 + command: ['python3'] args: [ batch_transform.py, --region, {inputValue: region}, diff --git a/components/aws/sagemaker/batch_transform/src/batch_transform.py b/components/aws/sagemaker/batch_transform/src/batch_transform.py index 9ff2b85fcb97..b658dad730be 100644 --- a/components/aws/sagemaker/batch_transform/src/batch_transform.py +++ b/components/aws/sagemaker/batch_transform/src/batch_transform.py @@ -26,31 +26,31 @@ def create_parser(): parser = argparse.ArgumentParser(description='SageMaker Batch Transformation Job') _utils.add_default_client_arguments(parser) - parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the transform job.', default='') - parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the model that you want to use for the transform job.') - parser.add_argument('--max_concurrent', type=_utils.str_to_int, required=False, help='The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0') - parser.add_argument('--max_payload', type=_utils.str_to_int, required=False, help='The maximum allowed size of the payload, in MB.', default='6') - parser.add_argument('--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str.strip, required=False, help='The number of records to include in a mini-batch for an HTTP inference request.', default='') - parser.add_argument('--environment', type=_utils.str_to_json_dict, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}') - parser.add_argument('--input_location', type=str.strip, required=True, help='The S3 location of the data source that is associated with a channel.') - parser.add_argument('--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str.strip, required=False, help='Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix') - parser.add_argument('--content_type', type=str.strip, required=False, help='The multipurpose internet mail extension (MIME) type of the data.', default='') - parser.add_argument('--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str.strip, required=False, help='The method to use to split the transform job data files into smaller batches.', default='None') - parser.add_argument('--compression_type', choices=['None', 'Gzip', ''], type=str.strip, required=False, help='If the transform data is compressed, the specification of the compression type.', default='None') - parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.') - parser.add_argument('--accept', type=str.strip, required=False, help='The MIME type used to specify the output data.') - parser.add_argument('--assemble_with', choices=['None', 'Line', ''], type=str.strip, required=False, help='Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.') - parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') - parser.add_argument('--input_filter', type=str.strip, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='') - parser.add_argument('--output_filter', type=str.strip, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='') - parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str.strip, required=False, help='Specifies the source of the data to join with the transformed data.', default='None') + parser.add_argument('--job_name', type=str, required=False, help='The name of the transform job.', default='') + parser.add_argument('--model_name', type=str, required=True, help='The name of the model that you want to use for the transform job.') + parser.add_argument('--max_concurrent', type=int, required=False, help='The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0') + parser.add_argument('--max_payload', type=int, required=False, help='The maximum allowed size of the payload, in MB.', default='6') + parser.add_argument('--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str, required=False, help='The number of records to include in a mini-batch for an HTTP inference request.', default='') + parser.add_argument('--environment', type=_utils.yaml_or_json_str, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default={}) + parser.add_argument('--input_location', type=str, required=True, help='The S3 location of the data source that is associated with a channel.') + parser.add_argument('--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str, required=False, help='Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix') + parser.add_argument('--content_type', type=str, required=False, help='The multipurpose internet mail extension (MIME) type of the data.', default='') + parser.add_argument('--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str, required=False, help='The method to use to split the transform job data files into smaller batches.', default='None') + parser.add_argument('--compression_type', choices=['None', 'Gzip', ''], type=str, required=False, help='If the transform data is compressed, the specification of the compression type.', default='None') + parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.') + parser.add_argument('--accept', type=str, required=False, help='The MIME type used to specify the output data.') + parser.add_argument('--assemble_with', choices=['None', 'Line', ''], type=str, required=False, help='Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.') + parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') + parser.add_argument('--input_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='') + parser.add_argument('--output_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='') + parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str, required=False, help='Specifies the source of the data to join with the transformed data.', default='None') parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', - 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge') - parser.add_argument('--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in the transform job.') - parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') - parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') - parser.add_argument('--output_location_file', type=str.strip, required=True, help='File path where the program will write the Amazon S3 URI of the transform job results.') + 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge') + parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in the transform job.') + parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') + parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={}) + parser.add_argument('--output_location_file', type=str, required=True, help='File path where the program will write the Amazon S3 URI of the transform job results.') return parser diff --git a/components/aws/sagemaker/codebuild/deploy.buildspec.yml b/components/aws/sagemaker/codebuild/deploy.buildspec.yml index f2acff30a45c..4c3c7534f0fe 100644 --- a/components/aws/sagemaker/codebuild/deploy.buildspec.yml +++ b/components/aws/sagemaker/codebuild/deploy.buildspec.yml @@ -1,10 +1,10 @@ -version: 0.2 +version: 0.2 + phases: pre_build: commands: # Log in to Dockerhub - - mkdir -p ~/.docker - - echo $DOCKER_CONFIG > ~/.docker/config.json + - docker login -u $DOCKER_CONFIG_USERNAME -p $DOCKER_CONFIG_PASSWORD build: commands: diff --git a/components/aws/sagemaker/codebuild/scripts/deploy.sh b/components/aws/sagemaker/codebuild/scripts/deploy.sh index aa023f00512f..09bf596b1d2d 100755 --- a/components/aws/sagemaker/codebuild/scripts/deploy.sh +++ b/components/aws/sagemaker/codebuild/scripts/deploy.sh @@ -5,6 +5,7 @@ set -e REMOTE_REPOSITORY="amazon/aws-sagemaker-kfp-components" DRYRUN="true" FULL_VERSION_TAG="" +DOCKER_CONFIG_PATH=${DOCKER_CONFIG_PATH:-"/root/.docker"} while getopts ":d:v:" opt; do case ${opt} in @@ -64,13 +65,13 @@ echo "Tagged image with ${MAJOR_VERSION_IMAGE}" # Push to the remote repository if [ "${DRYRUN}" == "false" ]; then - docker push "${FULL_VERSION_IMAGE}" + docker --config "$DOCKER_CONFIG_PATH" push "${FULL_VERSION_IMAGE}" echo "Successfully pushed tag ${FULL_VERSION_IMAGE} to Docker Hub" - docker push "${MINOR_VERSION_IMAGE}" + docker --config "$DOCKER_CONFIG_PATH" push "${MINOR_VERSION_IMAGE}" echo "Successfully pushed tag ${MINOR_VERSION_IMAGE} to Docker Hub" - docker push "${MAJOR_VERSION_IMAGE}" + docker --config "$DOCKER_CONFIG_PATH" push "${MAJOR_VERSION_IMAGE}" echo "Successfully pushed tag ${MAJOR_VERSION_IMAGE} to Docker Hub" else echo "Dry run detected. Not pushing images." diff --git a/components/aws/sagemaker/common/_utils.py b/components/aws/sagemaker/common/_utils.py index bc58e1c0c697..69ae36748aec 100644 --- a/components/aws/sagemaker/common/_utils.py +++ b/components/aws/sagemaker/common/_utils.py @@ -13,6 +13,7 @@ import os import argparse from time import gmtime, strftime +from distutils.util import strtobool import time import string import random @@ -63,7 +64,7 @@ def nullable_string_argument(value): def add_default_client_arguments(parser): - parser.add_argument('--region', type=str.strip, required=True, help='The region where the training job launches.') + parser.add_argument('--region', type=str, required=True, help='The region where the training job launches.') parser.add_argument('--endpoint_url', type=nullable_string_argument, required=False, help='The URL to use when communicating with the Sagemaker service.') @@ -71,7 +72,7 @@ def get_component_version(): """Get component version from the first line of License file""" component_version = 'NULL' - with open('/THIRD-PARTY-LICENSES.txt', 'r') as license_file: + with open('THIRD-PARTY-LICENSES.txt', 'r') as license_file: version_match = re.search('Amazon SageMaker Components for Kubeflow Pipelines; version (([0-9]+[.])+[0-9]+)', license_file.readline()) if version_match is not None: @@ -858,35 +859,15 @@ def enable_spot_instance_support(training_job_config, args): def id_generator(size=4, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) - -def str_to_bool(s): - if s.lower().strip() == 'true': - return True - elif s.lower().strip() == 'false': - return False - else: - raise argparse.ArgumentTypeError('"True" or "False" expected.') - -def str_to_int(s): - if s: - return int(s) - else: - return 0 - -def str_to_float(s): - if s: - return float(s) - else: - return 0.0 - -def str_to_json_dict(s): - if s != '': - return json.loads(s) - else: - return {} - -def str_to_json_list(s): - if s != '': - return json.loads(s) - else: - return [] +def yaml_or_json_str(str): + if str == "" or str == None: + return None + try: + return json.loads(str) + except: + return yaml.safe_load(str) + +def str_to_bool(str): + # This distutils function returns an integer representation of the boolean + # rather than a True/False value. This simply hard casts it. + return bool(strtobool(str)) \ No newline at end of file diff --git a/components/aws/sagemaker/deploy/component.yaml b/components/aws/sagemaker/deploy/component.yaml index 15ae51073f29..349bfb62a8e2 100644 --- a/components/aws/sagemaker/deploy/component.yaml +++ b/components/aws/sagemaker/deploy/component.yaml @@ -4,83 +4,108 @@ description: | inputs: - name: region description: 'The region to deploy your model endpoints.' + type: String - name: endpoint_config_name description: 'The name of the endpoint configuration.' default: '' + type: String - name: variant_name_1 description: 'The name of the production variant.' default: 'variant-name-1' + type: String - name: model_name_1 description: 'The model name used for endpoint deployment.' + type: String - name: initial_instance_count_1 description: 'Number of instances to launch initially.' default: '1' + type: Integer - name: instance_type_1 description: 'The ML compute instance type.' default: 'ml.m4.xlarge' + type: String - name: initial_variant_weight_1 description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.' default: '1.0' + type: Float - name: accelerator_type_1 description: 'The size of the Elastic Inference (EI) instance to use for the production variant.' default: '' + type: String - name: variant_name_2 description: 'The name of the production variant.' default: 'variant-name-2' + type: String - name: model_name_2 description: 'The model name used for endpoint deployment.' default: '' + type: String - name: initial_instance_count_2 description: 'Number of instances to launch initially.' default: '1' + type: Integer - name: instance_type_2 description: 'The ML compute instance type.' default: 'ml.m4.xlarge' + type: String - name: initial_variant_weight_2 description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.' default: '1.0' + type: Float - name: accelerator_type_2 description: 'The size of the Elastic Inference (EI) instance to use for the production variant.' default: '' + type: String - name: variant_name_3 description: 'The name of the production variant.' default: 'variant-name-3' + type: String - name: model_name_3 description: 'The model name used for endpoint deployment' default: '' + type: String - name: initial_instance_count_3 description: 'Number of instances to launch initially.' default: '1' + type: Integer - name: instance_type_3 description: 'The ML compute instance type.' default: 'ml.m4.xlarge' + type: String - name: initial_variant_weight_3 description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.' default: '1.0' + type: Float - name: accelerator_type_3 description: 'The size of the Elastic Inference (EI) instance to use for the production variant.' default: '' + type: String - name: resource_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.' default: '' + type: String - name: endpoint_url description: 'The endpoint URL for the private link VPC endpoint.' default: '' + type: String - name: endpoint_config_tags description: 'Key-value pairs to categorize AWS resources.' default: '{}' + type: JsonObject - name: endpoint_name description: 'The name of the endpoint.' default: '' + type: String - name: endpoint_tags description: 'Key-value pairs to categorize AWS resources.' default: '{}' + type: JsonObject outputs: - {name: endpoint_name, description: 'Endpoint name'} implementation: container: - image: amazon/aws-sagemaker-kfp-components:0.3.0 - command: ['python'] + image: amazon/aws-sagemaker-kfp-components:0.3.1 + command: ['python3'] args: [ deploy.py, --region, {inputValue: region}, diff --git a/components/aws/sagemaker/deploy/src/deploy.py b/components/aws/sagemaker/deploy/src/deploy.py index 519b3f5d0b2f..1888e1b45d2c 100644 --- a/components/aws/sagemaker/deploy/src/deploy.py +++ b/components/aws/sagemaker/deploy/src/deploy.py @@ -19,36 +19,36 @@ def create_parser(): parser = argparse.ArgumentParser(description='SageMaker Training Job') _utils.add_default_client_arguments(parser) - parser.add_argument('--endpoint_config_name', type=str.strip, required=False, help='The name of the endpoint configuration.', default='') - parser.add_argument('--variant_name_1', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-1') - parser.add_argument('--model_name_1', type=str.strip, required=True, help='The model name used for endpoint deployment.') - parser.add_argument('--initial_instance_count_1', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) + parser.add_argument('--endpoint_config_name', type=str, required=False, help='The name of the endpoint configuration.', default='') + parser.add_argument('--variant_name_1', type=str, required=False, help='The name of the production variant.', default='variant-name-1') + parser.add_argument('--model_name_1', type=str, required=True, help='The model name used for endpoint deployment.') + parser.add_argument('--initial_instance_count_1', type=int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument('--instance_type_1', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', - 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') - parser.add_argument('--initial_variant_weight_1', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) - parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') - parser.add_argument('--variant_name_2', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-2') - parser.add_argument('--model_name_2', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='') - parser.add_argument('--initial_instance_count_2', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) + 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') + parser.add_argument('--initial_variant_weight_1', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) + parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') + parser.add_argument('--variant_name_2', type=str, required=False, help='The name of the production variant.', default='variant-name-2') + parser.add_argument('--model_name_2', type=str, required=False, help='The model name used for endpoint deployment.', default='') + parser.add_argument('--initial_instance_count_2', type=int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument('--instance_type_2', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', - 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') - parser.add_argument('--initial_variant_weight_2', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) - parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') - parser.add_argument('--variant_name_3', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-3') - parser.add_argument('--model_name_3', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='') - parser.add_argument('--initial_instance_count_3', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1) + 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') + parser.add_argument('--initial_variant_weight_2', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) + parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') + parser.add_argument('--variant_name_3', type=str, required=False, help='The name of the production variant.', default='variant-name-3') + parser.add_argument('--model_name_3', type=str, required=False, help='The model name used for endpoint deployment.', default='') + parser.add_argument('--initial_instance_count_3', type=int, required=False, help='Number of instances to launch initially.', default=1) parser.add_argument('--instance_type_3', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', - 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') - parser.add_argument('--initial_variant_weight_3', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) - parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') - parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') - parser.add_argument('--endpoint_config_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') + 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') + parser.add_argument('--initial_variant_weight_3', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0) + parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='') + parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') + parser.add_argument('--endpoint_config_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={}) - parser.add_argument('--endpoint_name', type=str.strip, required=False, help='The name of the endpoint.', default='') - parser.add_argument('--endpoint_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') + parser.add_argument('--endpoint_name', type=str, required=False, help='The name of the endpoint.', default='') + parser.add_argument('--endpoint_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={}) return parser diff --git a/components/aws/sagemaker/ground_truth/component.yaml b/components/aws/sagemaker/ground_truth/component.yaml index 06823416e140..3a143cc77521 100644 --- a/components/aws/sagemaker/ground_truth/component.yaml +++ b/components/aws/sagemaker/ground_truth/component.yaml @@ -4,92 +4,123 @@ description: | inputs: - name: region description: 'The region where the cluster launches.' + type: String - name: role description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' + type: String - name: job_name description: 'The name of the labeling job.' + type: String - name: label_attribute_name description: 'The attribute name to use for the label in the output manifest file. Default is the job name.' default: '' + type: String - name: manifest_location description: 'The Amazon S3 location of the manifest file that describes the input data objects.' + type: String - name: output_location description: 'The Amazon S3 location to write output data.' + type: String - name: output_encryption_key description: 'The AWS Key Management Service ID of the key used to encrypt the output data.' default: '' + type: String - name: task_type description: 'Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.' + type: String - name: worker_type description: 'The workteam for data labeling, either public, private, or vendor.' + type: String - name: workteam_arn description: 'The ARN of the work team assigned to complete the tasks.' default: '' + type: String - name: no_adult_content description: 'If true, your data is free of adult content.' default: 'False' + type: Bool - name: no_ppi description: 'If true, your data is free of personally identifiable information.' default: 'False' + type: Bool - name: label_category_config description: 'The S3 URL of the JSON structured file that defines the categories used to label the data objects.' default: '' + type: String - name: max_human_labeled_objects description: 'The maximum number of objects that can be labeled by human workers.' default: '' + type: Integer - name: max_percent_objects description: 'The maximum number of input data objects that should be labeled.' default: '' + type: Integer - name: enable_auto_labeling description: 'Enables auto-labeling, only for bounding box, text classification, and image classification.' default: 'False' + type: Bool - name: initial_model_arn description: 'The ARN of the final model used for a previous auto-labeling job.' default: '' + type: String - name: resource_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' default: '' + type: String - name: ui_template description: 'The Amazon S3 bucket location of the UI template.' + type: String - name: pre_human_task_function description: 'The ARN of a Lambda function that is run before a data object is sent to a human worker.' default: '' + type: String - name: post_human_task_function description: 'The ARN of a Lambda function implements the logic for annotation consolidation.' default: '' + type: String - name: task_keywords description: 'Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.' default: '' + type: String - name: title description: 'A title for the task for your human workers.' + type: String - name: description description: 'A description of the task for your human workers.' + type: String - name: num_workers_per_object description: 'The number of human workers that will label an object.' + type: Integer - name: time_limit description: 'The amount of time that a worker has to complete a task in seconds' + type: Integer - name: task_availibility description: 'The length of time that a task remains available for labeling by human workers.' default: '' + type: Integer - name: max_concurrent_tasks description: 'The maximum number of data objects that can be labeled by human workers at the same time.' default: '' + type: Integer - name: workforce_task_price description: 'The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".' default: '0.000' + type: Float - name: endpoint_url description: 'The endpoint URL for the private link VPC endpoint.' default: '' + type: String - name: tags description: 'Key-value pairs to categorize AWS resources.' default: '{}' + type: JsonObject outputs: - {name: output_manifest_location, description: 'The Amazon S3 bucket location of the manifest file for labeled data.'} - {name: active_learning_model_arn, description: 'The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.'} implementation: container: - image: amazon/aws-sagemaker-kfp-components:0.3.0 - command: ['python'] + image: amazon/aws-sagemaker-kfp-components:0.3.1 + command: ['python3'] args: [ ground_truth.py, --region, {inputValue: region}, diff --git a/components/aws/sagemaker/ground_truth/src/ground_truth.py b/components/aws/sagemaker/ground_truth/src/ground_truth.py index 8f6db7d544a8..68f7a557dd2e 100644 --- a/components/aws/sagemaker/ground_truth/src/ground_truth.py +++ b/components/aws/sagemaker/ground_truth/src/ground_truth.py @@ -19,35 +19,35 @@ def create_parser(): parser = argparse.ArgumentParser(description='SageMaker Ground Truth Job') _utils.add_default_client_arguments(parser) - parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') - parser.add_argument('--job_name', type=str.strip, required=True, help='The name of the labeling job.') - parser.add_argument('--label_attribute_name', type=str.strip, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='') - parser.add_argument('--manifest_location', type=str.strip, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.') - parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 location to write output data.') - parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') - parser.add_argument('--task_type', type=str.strip, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.') - parser.add_argument('--worker_type', type=str.strip, required=True, help='The workteam for data labeling, either public, private, or vendor.') - parser.add_argument('--workteam_arn', type=str.strip, required=False, help='The ARN of the work team assigned to complete the tasks.') + parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') + parser.add_argument('--job_name', type=str, required=True, help='The name of the labeling job.') + parser.add_argument('--label_attribute_name', type=str, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='') + parser.add_argument('--manifest_location', type=str, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.') + parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 location to write output data.') + parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') + parser.add_argument('--task_type', type=str, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.') + parser.add_argument('--worker_type', type=str, required=True, help='The workteam for data labeling, either public, private, or vendor.') + parser.add_argument('--workteam_arn', type=str, required=False, help='The ARN of the work team assigned to complete the tasks.') parser.add_argument('--no_adult_content', type=_utils.str_to_bool, required=False, help='If true, your data is free of adult content.', default='False') parser.add_argument('--no_ppi', type=_utils.str_to_bool, required=False, help='If true, your data is free of personally identifiable information.', default='False') - parser.add_argument('--label_category_config', type=str.strip, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='') - parser.add_argument('--max_human_labeled_objects', type=_utils.str_to_int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0) - parser.add_argument('--max_percent_objects', type=_utils.str_to_int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0) + parser.add_argument('--label_category_config', type=str, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='') + parser.add_argument('--max_human_labeled_objects', type=int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0) + parser.add_argument('--max_percent_objects', type=int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0) parser.add_argument('--enable_auto_labeling', type=_utils.str_to_bool, required=False, help='Enables auto-labeling, only for bounding box, text classification, and image classification.', default=False) - parser.add_argument('--initial_model_arn', type=str.strip, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='') - parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') - parser.add_argument('--ui_template', type=str.strip, required=True, help='The Amazon S3 bucket location of the UI template.') - parser.add_argument('--pre_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='') - parser.add_argument('--post_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='') - parser.add_argument('--task_keywords', type=str.strip, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='') - parser.add_argument('--title', type=str.strip, required=True, help='A title for the task for your human workers.') - parser.add_argument('--description', type=str.strip, required=True, help='A description of the task for your human workers.') - parser.add_argument('--num_workers_per_object', type=_utils.str_to_int, required=True, help='The number of human workers that will label an object.') - parser.add_argument('--time_limit', type=_utils.str_to_int, required=True, help='The amount of time that a worker has to complete a task in seconds') - parser.add_argument('--task_availibility', type=_utils.str_to_int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0) - parser.add_argument('--max_concurrent_tasks', type=_utils.str_to_int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0) - parser.add_argument('--workforce_task_price', type=_utils.str_to_float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000) - parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') + parser.add_argument('--initial_model_arn', type=str, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='') + parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') + parser.add_argument('--ui_template', type=str, required=True, help='The Amazon S3 bucket location of the UI template.') + parser.add_argument('--pre_human_task_function', type=str, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='') + parser.add_argument('--post_human_task_function', type=str, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='') + parser.add_argument('--task_keywords', type=str, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='') + parser.add_argument('--title', type=str, required=True, help='A title for the task for your human workers.') + parser.add_argument('--description', type=str, required=True, help='A description of the task for your human workers.') + parser.add_argument('--num_workers_per_object', type=int, required=True, help='The number of human workers that will label an object.') + parser.add_argument('--time_limit', type=int, required=True, help='The amount of time that a worker has to complete a task in seconds') + parser.add_argument('--task_availibility', type=int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0) + parser.add_argument('--max_concurrent_tasks', type=int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0) + parser.add_argument('--workforce_task_price', type=float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000) + parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={}) return parser diff --git a/components/aws/sagemaker/hyperparameter_tuning/component.yaml b/components/aws/sagemaker/hyperparameter_tuning/component.yaml index 7d34164a6a91..396495f0934f 100644 --- a/components/aws/sagemaker/hyperparameter_tuning/component.yaml +++ b/components/aws/sagemaker/hyperparameter_tuning/component.yaml @@ -7,101 +7,136 @@ inputs: - name: job_name description: 'The name of the tuning job. Must be unique within the same AWS account and AWS region.' default: '' + type: String - name: role description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' + type: String - name: image description: 'The registry path of the Docker image that contains the training algorithm.' default: '' + type: String - name: algorithm_name description: 'The name of the algorithm resource to use for the hyperparameter tuning job. Do not specify a value for this if using training image.' default: '' + type: String - name: training_input_mode description: 'The input mode that the algorithm supports. File or Pipe.' default: 'File' + type: String - name: metric_definitions description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.' default: '{}' + type: JsonObject - name: strategy description: 'How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.' default: 'Bayesian' + type: String - name: metric_name description: 'The name of the metric to use for the objective metric.' + type: String - name: metric_type description: 'Whether to minimize or maximize the objective metric.' + type: String - name: early_stopping_type description: 'Whether to use early stopping for training jobs launched by the tuning job.' default: 'Off' + type: String - name: static_parameters description: 'The values of hyperparameters that do not change for the tuning job.' default: '{}' + type: JsonObject - name: integer_parameters description: 'The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.' default: '[]' + type: JsonArray - name: continuous_parameters description: 'The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.' default: '[]' + type: JsonObject - name: categorical_parameters description: 'The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.' default: '[]' + type: JsonArray - name: channels description: 'A list of dicts specifying the input channels. Must have at least one.' + type: JsonArray - name: output_location description: 'The Amazon S3 path where you want Amazon SageMaker to store the model artifacts is from the best training job.' + type: String - name: output_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.' default: '' + type: String - name: instance_type description: 'The ML compute instance type.' default: 'ml.m4.xlarge' + type: String - name: instance_count description: 'The number of ML compute instances to use in each training job.' default: '1' + type: Integer - name: volume_size description: 'The size of the ML storage volume that you want to provision.' default: '30' + type: Integer - name: max_num_jobs description: 'The maximum number of training jobs that a hyperparameter tuning job can launch.' + type: Integer - name: max_parallel_jobs description: 'The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.' + type: Integer - name: max_run_time description: 'The maximum run time in seconds per training job.' default: '86400' + type: Integer - name: resource_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' default: '' + type: String - name: vpc_security_group_ids description: 'The VPC security group IDs, in the form sg-xxxxxxxx.' default: '' + type: String - name: vpc_subnets description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.' default: '' + type: String - name: network_isolation description: 'Isolates the training container.' default: 'True' + type: Bool - name: traffic_encryption description: 'Encrypts all communications between ML compute instances in distributed training.' default: 'False' + type: Bool - name: spot_instance description: 'Use managed spot training.' default: 'False' + type: Bool - name: max_wait_time description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.' default: '86400' + type: Integer - name: checkpoint_config description: 'Dictionary of information about the output location for managed spot training checkpoint data.' default: '{}' + type: JsonObject - name: warm_start_type description: 'Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"' default: '' + type: String - name: parent_hpo_jobs description: 'List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.' default: '' + type: String - name: endpoint_url description: 'The endpoint URL for the private link VPC endpoint.' default: '' + type: String - name: tags description: 'Key-value pairs, to categorize AWS resources.' default: '{}' + type: JsonObject outputs: - name: hpo_job_name description: 'The name of the hyper parameter tuning job' @@ -115,8 +150,8 @@ outputs: description: 'The registry path of the Docker image that contains the training algorithm' implementation: container: - image: amazon/aws-sagemaker-kfp-components:0.3.0 - command: ['python'] + image: amazon/aws-sagemaker-kfp-components:0.3.1 + command: ['python3'] args: [ hyperparameter_tuning.py, --region, {inputValue: region}, diff --git a/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py b/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py index 68fe2fefa584..df44a4098a34 100644 --- a/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py +++ b/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py @@ -20,46 +20,46 @@ def create_parser(): parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job') _utils.add_default_client_arguments(parser) - parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.') - parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') - parser.add_argument('--image', type=str.strip, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='') - parser.add_argument('--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.', default='') - parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str.strip, required=False, help='The input mode that the algorithm supports. File or Pipe.', default='File') - parser.add_argument('--metric_definitions', type=_utils.str_to_json_dict, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}') - parser.add_argument('--strategy', choices=['Bayesian', 'Random'], type=str.strip, required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian') - parser.add_argument('--metric_name', type=str.strip, required=True, help='The name of the metric to use for the objective metric.') - parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], type=str.strip, required=True, help='Whether to minimize or maximize the objective metric.') - parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], type=str.strip, required=False, help='Whether to minimize or maximize the objective metric.', default='Off') - parser.add_argument('--static_parameters', type=_utils.str_to_json_dict, required=False, help='The values of hyperparameters that do not change for the tuning job.', default='{}') - parser.add_argument('--integer_parameters', type=_utils.str_to_json_list, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default='[]') - parser.add_argument('--continuous_parameters', type=_utils.str_to_json_list, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default='[]') - parser.add_argument('--categorical_parameters', type=_utils.str_to_json_list, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default='[]') - parser.add_argument('--channels', type=_utils.str_to_json_list, required=True, help='A list of dicts specifying the input channels. Must have at least one.') - parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.') - parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') + parser.add_argument('--job_name', type=str, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.') + parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') + parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='') + parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.', default='') + parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, required=False, help='The input mode that the algorithm supports. File or Pipe.', default='File') + parser.add_argument('--metric_definitions', type=_utils.yaml_or_json_str, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default={}) + parser.add_argument('--strategy', choices=['Bayesian', 'Random'], type=str, required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian') + parser.add_argument('--metric_name', type=str, required=True, help='The name of the metric to use for the objective metric.') + parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], type=str, required=True, help='Whether to minimize or maximize the objective metric.') + parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], type=str, required=False, help='Whether to minimize or maximize the objective metric.', default='Off') + parser.add_argument('--static_parameters', type=_utils.yaml_or_json_str, required=False, help='The values of hyperparameters that do not change for the tuning job.', default={}) + parser.add_argument('--integer_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default=[]) + parser.add_argument('--continuous_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default=[]) + parser.add_argument('--categorical_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default=[]) + parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.') + parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.') + parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', - 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') - parser.add_argument('--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in each training job.', default=1) - parser.add_argument('--volume_size', type=_utils.str_to_int, required=False, help='The size of the ML storage volume that you want to provision.', default=1) - parser.add_argument('--max_num_jobs', type=_utils.str_to_int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.') - parser.add_argument('--max_parallel_jobs', type=_utils.str_to_int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.') - parser.add_argument('--max_run_time', type=_utils.str_to_int, required=False, help='The maximum run time in seconds per training job.', default=86400) - parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') - parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.') - parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.') + 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge') + parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in each training job.', default=1) + parser.add_argument('--volume_size', type=int, required=False, help='The size of the ML storage volume that you want to provision.', default=1) + parser.add_argument('--max_num_jobs', type=int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.') + parser.add_argument('--max_parallel_jobs', type=int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.') + parser.add_argument('--max_run_time', type=int, required=False, help='The maximum run time in seconds per training job.', default=86400) + parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') + parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.') + parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.') parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True) parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False) - parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], type=str.strip, required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"') - parser.add_argument('--parent_hpo_jobs', type=str.strip, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='') + parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], type=str, required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"') + parser.add_argument('--parent_hpo_jobs', type=str, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='') ### Start spot instance support parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False) - parser.add_argument('--max_wait_time', type=_utils.str_to_int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400) - parser.add_argument('--checkpoint_config', type=_utils.str_to_json_dict, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default='{}') + parser.add_argument('--max_wait_time', type=int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400) + parser.add_argument('--checkpoint_config', type=_utils.yaml_or_json_str, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default={}) ### End spot instance support - parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') + parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={}) return parser diff --git a/components/aws/sagemaker/model/component.yaml b/components/aws/sagemaker/model/component.yaml index 52ddcc2d6635..31d66ef4cc99 100644 --- a/components/aws/sagemaker/model/component.yaml +++ b/components/aws/sagemaker/model/component.yaml @@ -4,49 +4,63 @@ description: | inputs: - name: region description: 'The region where the training job launches.' + type: String - name: model_name description: 'The name of the new model.' + type: String - name: role description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' + type: String - name: container_host_name description: 'When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.' default: '' + type: String - name: image description: 'The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.' default: '' + type: String - name: model_artifact_url description: 'S3 path where Amazon SageMaker to store the model artifacts.' default: '' + type: String - name: environment description: 'The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.' default: '{}' + type: JsonObject - name: model_package description: 'The name or Amazon Resource Name (ARN) of the model package to use to create the model.' default: '' + type: String - name: secondary_containers description: 'A list of dicts that specifies the additional containers in the inference pipeline.' default: '[]' + type: JsonArray - name: vpc_security_group_ids description: 'The VPC security group IDs, in the form sg-xxxxxxxx.' default: '' + type: String - name: vpc_subnets description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.' default: '' + type: String - name: network_isolation description: 'Isolates the training container.' default: 'True' + type: Bool - name: endpoint_url description: 'The endpoint URL for the private link VPC endpoint.' default: '' + type: String - name: tags description: 'Key-value pairs to categorize AWS resources.' default: '{}' + type: JsonObject outputs: - {name: model_name, description: 'The model name Sagemaker created'} implementation: container: - image: amazon/aws-sagemaker-kfp-components:0.3.0 - command: ['python'] + image: amazon/aws-sagemaker-kfp-components:0.3.1 + command: ['python3'] args: [ create_model.py, --region, {inputValue: region}, diff --git a/components/aws/sagemaker/model/src/create_model.py b/components/aws/sagemaker/model/src/create_model.py index 7fbe211d542b..c6fcebd246c9 100644 --- a/components/aws/sagemaker/model/src/create_model.py +++ b/components/aws/sagemaker/model/src/create_model.py @@ -19,18 +19,18 @@ def create_parser(): parser = argparse.ArgumentParser(description='SageMaker Training Job') _utils.add_default_client_arguments(parser) - parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the new model.') - parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') - parser.add_argument('--container_host_name', type=str.strip, required=False, help='When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='') - parser.add_argument('--image', type=str.strip, required=False, help='The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='') - parser.add_argument('--model_artifact_url', type=str.strip, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='') - parser.add_argument('--environment', type=_utils.str_to_json_dict, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}') - parser.add_argument('--model_package', type=str.strip, required=False, help='The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='') - parser.add_argument('--secondary_containers', type=_utils.str_to_json_list, required=False, help='A list of dicts that specifies the additional containers in the inference pipeline.', default='{}') - parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='') - parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.', default='') + parser.add_argument('--model_name', type=str, required=True, help='The name of the new model.') + parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') + parser.add_argument('--container_host_name', type=str, required=False, help='When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='') + parser.add_argument('--image', type=str, required=False, help='The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='') + parser.add_argument('--model_artifact_url', type=str, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='') + parser.add_argument('--environment', type=_utils.yaml_or_json_str, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default={}) + parser.add_argument('--model_package', type=str, required=False, help='The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='') + parser.add_argument('--secondary_containers', type=_utils.yaml_or_json_str, required=False, help='A list of dicts that specifies the additional containers in the inference pipeline.', default={}) + parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='') + parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.', default='') parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True) - parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') + parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={}) return parser diff --git a/components/aws/sagemaker/train/component.yaml b/components/aws/sagemaker/train/component.yaml index e26f82d57338..d27a3864e834 100644 --- a/components/aws/sagemaker/train/component.yaml +++ b/components/aws/sagemaker/train/component.yaml @@ -4,83 +4,108 @@ description: | inputs: - name: region description: 'The region where the training job launches.' + type: String - name: job_name description: 'The name of the batch training job.' default: '' + type: String - name: role description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.' + type: String - name: image description: 'The registry path of the Docker image that contains the training algorithm.' default: '' + type: String - name: algorithm_name description: 'The name of the algorithm resource to use for the training job. Do not specify a value for this if using training image.' default: '' + type: String - name: metric_definitions description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.' default: '{}' + type: JsonObject - name: training_input_mode description: 'The input mode that the algorithm supports. File or Pipe.' default: 'File' + type: String - name: hyperparameters description: 'Dictionary of hyperparameters for the the algorithm.' default: '{}' + type: JsonObject - name: channels description: 'A list of dicts specifying the input channels. Must have at least one.' + type: JsonArray - name: instance_type description: 'The ML compute instance type.' default: 'ml.m4.xlarge' + type: String - name: instance_count description: 'The number of ML compute instances to use in each training job.' default: '1' + type: Integer - name: volume_size description: 'The size of the ML storage volume that you want to provision.' default: '30' + type: Integer - name: resource_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).' default: '' + type: String - name: max_run_time description: 'The maximum run time in seconds for the training job.' default: '86400' + type: Integer - name: model_artifact_path description: 'Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.' + type: String - name: output_encryption_key description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.' default: '' + type: String - name: vpc_security_group_ids description: 'The VPC security group IDs, in the form sg-xxxxxxxx.' default: '' + type: String - name: vpc_subnets description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.' default: '' + type: String - name: network_isolation description: 'Isolates the training container.' default: 'True' + type: Bool - name: traffic_encryption description: 'Encrypts all communications between ML compute instances in distributed training.' default: 'False' + type: Bool - name: spot_instance description: 'Use managed spot training.' default: 'False' + type: Bool - name: max_wait_time description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.' default: '86400' + type: Integer - name: checkpoint_config description: 'Dictionary of information about the output location for managed spot training checkpoint data.' default: '{}' + type: JsonObject - name: endpoint_url description: 'The endpoint URL for the private link VPC endpoint.' default: '' + type: String - name: tags description: 'Key-value pairs, to categorize AWS resources.' default: '{}' + type: JsonObject outputs: - {name: model_artifact_url, description: 'Model artifacts url'} - {name: job_name, description: 'Training job name'} - {name: training_image, description: 'The registry path of the Docker image that contains the training algorithm'} implementation: container: - image: amazon/aws-sagemaker-kfp-components:0.3.0 - command: ['python'] + image: amazon/aws-sagemaker-kfp-components:0.3.1 + command: ['python3'] args: [ train.py, --region, {inputValue: region}, diff --git a/components/aws/sagemaker/train/src/train.py b/components/aws/sagemaker/train/src/train.py index 448e86341b3b..a6ae3bdbf11b 100644 --- a/components/aws/sagemaker/train/src/train.py +++ b/components/aws/sagemaker/train/src/train.py @@ -19,35 +19,35 @@ def create_parser(): parser = argparse.ArgumentParser(description='SageMaker Training Job') _utils.add_default_client_arguments(parser) - parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the training job.', default='') - parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') - parser.add_argument('--image', type=str.strip, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='') - parser.add_argument('--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the training job.', default='') - parser.add_argument('--metric_definitions', type=_utils.str_to_json_dict, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}') - parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str.strip, help='The input mode that the algorithm supports. File or Pipe.', default='File') - parser.add_argument('--hyperparameters', type=_utils.str_to_json_dict, help='Dictionary of hyperparameters for the the algorithm.', default='{}') - parser.add_argument('--channels', type=_utils.str_to_json_list, required=True, help='A list of dicts specifying the input channels. Must have at least one.') + parser.add_argument('--job_name', type=str, required=False, help='The name of the training job.', default='') + parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.') + parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='') + parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the training job.', default='') + parser.add_argument('--metric_definitions', type=_utils.yaml_or_json_str, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default={}) + parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, help='The input mode that the algorithm supports. File or Pipe.', default='File') + parser.add_argument('--hyperparameters', type=_utils.yaml_or_json_str, help='Dictionary of hyperparameters for the the algorithm.', default={}) + parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.') parser.add_argument('--instance_type', required=True, choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge', 'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge', - 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, help='The ML compute instance type.', default='ml.m4.xlarge') - parser.add_argument('--instance_count', required=True, type=_utils.str_to_int, help='The registry path of the Docker image that contains the training algorithm.', default=1) - parser.add_argument('--volume_size', type=_utils.str_to_int, required=True, help='The size of the ML storage volume that you want to provision.', default=1) - parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') - parser.add_argument('--max_run_time', type=_utils.str_to_int, required=True, help='The maximum run time in seconds for the training job.', default=86400) - parser.add_argument('--model_artifact_path', type=str.strip, required=True, help='Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.') - parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') - parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.') - parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.') + 'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, help='The ML compute instance type.', default='ml.m4.xlarge') + parser.add_argument('--instance_count', required=True, type=int, help='The registry path of the Docker image that contains the training algorithm.', default=1) + parser.add_argument('--volume_size', type=int, required=True, help='The size of the ML storage volume that you want to provision.', default=1) + parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='') + parser.add_argument('--max_run_time', type=int, required=True, help='The maximum run time in seconds for the training job.', default=86400) + parser.add_argument('--model_artifact_path', type=str, required=True, help='Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.') + parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='') + parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.') + parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.') parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True) parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False) ### Start spot instance support parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False) - parser.add_argument('--max_wait_time', type=_utils.str_to_int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400) - parser.add_argument('--checkpoint_config', type=_utils.str_to_json_dict, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default='{}') + parser.add_argument('--max_wait_time', type=int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400) + parser.add_argument('--checkpoint_config', type=_utils.yaml_or_json_str, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default={}) ### End spot instance support - parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') + parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={}) return parser diff --git a/components/aws/sagemaker/workteam/component.yaml b/components/aws/sagemaker/workteam/component.yaml index af37ae614835..589f25d709c5 100644 --- a/components/aws/sagemaker/workteam/component.yaml +++ b/components/aws/sagemaker/workteam/component.yaml @@ -4,31 +4,40 @@ description: | inputs: - name: region description: 'The region where the cluster launches.' + type: String - name: team_name description: 'The name of your work team.' + type: String - name: description description: 'A description of the work team.' + type: String - name: user_pool description: 'An identifier for a user pool. The user pool must be in the same region as the service that you are calling.' + type: String - name: user_groups description: 'An identifier for a user group.' + type: String - name: client_id description: 'An identifier for an application client. You must create the app client ID using Amazon Cognito.' + type: String - name: sns_topic description: 'The ARN for the SNS topic to which notifications should be published.' default: '' + type: String - name: endpoint_url description: 'The endpoint URL for the private link VPC endpoint.' default: '' + type: String - name: tags description: 'Key-value pairs to categorize AWS resources.' default: '{}' + type: JsonObject outputs: - {name: workteam_arn, description: 'The ARN of the workteam.'} implementation: container: - image: amazon/aws-sagemaker-kfp-components:0.3.0 - command: ['python'] + image: amazon/aws-sagemaker-kfp-components:0.3.1 + command: ['python3'] args: [ workteam.py, --region, {inputValue: region}, diff --git a/components/aws/sagemaker/workteam/src/workteam.py b/components/aws/sagemaker/workteam/src/workteam.py index cda4d0a7d501..5f9b6caaf4d7 100644 --- a/components/aws/sagemaker/workteam/src/workteam.py +++ b/components/aws/sagemaker/workteam/src/workteam.py @@ -19,13 +19,13 @@ def create_parser(): parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job') _utils.add_default_client_arguments(parser) - parser.add_argument('--team_name', type=str.strip, required=True, help='The name of your work team.') - parser.add_argument('--description', type=str.strip, required=True, help='A description of the work team.') - parser.add_argument('--user_pool', type=str.strip, required=False, help='An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='') - parser.add_argument('--user_groups', type=str.strip, required=False, help='A list of identifiers for user groups separated by commas.', default='') - parser.add_argument('--client_id', type=str.strip, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='') - parser.add_argument('--sns_topic', type=str.strip, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='') - parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}') + parser.add_argument('--team_name', type=str, required=True, help='The name of your work team.') + parser.add_argument('--description', type=str, required=True, help='A description of the work team.') + parser.add_argument('--user_pool', type=str, required=False, help='An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='') + parser.add_argument('--user_groups', type=str, required=False, help='A list of identifiers for user groups separated by commas.', default='') + parser.add_argument('--client_id', type=str, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='') + parser.add_argument('--sns_topic', type=str, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='') + parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={}) return parser diff --git a/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py b/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py index efabfd7e637b..cba865328f65 100644 --- a/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py +++ b/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py @@ -51,18 +51,18 @@ def ground_truth_test(region='us-west-2', ground_truth_ui_template='s3://your-bucket-name/mini-image-classification/ground-truth-demo/instructions.template', ground_truth_title='Mini image classification', ground_truth_description='Test for Ground Truth KFP component', - ground_truth_num_workers_per_object='1', - ground_truth_time_limit='30', - ground_truth_task_availibility='3600', - ground_truth_max_concurrent_tasks='20', + ground_truth_num_workers_per_object=1, + ground_truth_time_limit=30, + ground_truth_task_availibility=3600, + ground_truth_max_concurrent_tasks=20, training_algorithm_name='image classification', training_input_mode='Pipe', - training_hyperparameters='{"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"}', + training_hyperparameters={"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"}, training_output_location='s3://your-bucket-name/mini-image-classification/training-output', training_instance_type='ml.p2.xlarge', - training_instance_count='1', - training_volume_size='50', - training_max_run_time='3600', + training_instance_count=1, + training_volume_size=50, + training_max_run_time=3600, role_arn='' ): diff --git a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py index c3cf49f14d95..4b94a182c3fb 100644 --- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py +++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py @@ -21,7 +21,6 @@ 'S3DataDistributionType': 'FullyReplicated' } }, - 'ContentType': '', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'File' @@ -44,37 +43,37 @@ def hpo_test(region='us-west-2', image='', algorithm_name='K-Means', training_input_mode='File', - metric_definitions='{}', + metric_definitions={}, strategy='Bayesian', metric_name='test:msd', metric_type='Minimize', early_stopping_type='Off', - static_parameters='{"k": "10", "feature_dim": "784"}', - integer_parameters='[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \ - {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]', - continuous_parameters='[]', - categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]', - channels=json.dumps(channelObjList), + static_parameters={"k": "10", "feature_dim": "784"}, + integer_parameters=[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \ + {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}], + continuous_parameters=[], + categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}], + channels=channelObjList, output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output', output_encryption_key='', instance_type='ml.p2.16xlarge', - instance_count='1', - volume_size='50', - max_num_jobs='1', - max_parallel_jobs='1', + instance_count=1, + volume_size=50, + max_num_jobs=1, + max_parallel_jobs=1, resource_encryption_key='', - max_run_time='3600', + max_run_time=3600, vpc_security_group_ids='', vpc_subnets='', endpoint_url='', - network_isolation='True', - traffic_encryption='False', + network_isolation=True, + traffic_encryption=False, warm_start_type='', parent_hpo_jobs='', - spot_instance='False', - max_wait_time='3600', - checkpoint_config='{}', - tags='{}', + spot_instance=False, + max_wait_time=3600, + checkpoint_config={}, + tags={}, role_arn='', ): diff --git a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py index f02d47b817d7..3b2003911f94 100644 --- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py +++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py @@ -26,7 +26,6 @@ 'S3DataDistributionType': 'FullyReplicated' } }, - 'ContentType': '', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'File' @@ -52,37 +51,37 @@ def mnist_classification(region='us-west-2', hpo_metric_name='test:msd', hpo_metric_type='Minimize', hpo_early_stopping_type='Off', - hpo_static_parameters='{"k": "10", "feature_dim": "784"}', - hpo_integer_parameters='[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]', - hpo_continuous_parameters='[]', - hpo_categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]', - hpo_channels=json.dumps(hpoChannels), - hpo_spot_instance='False', - hpo_max_wait_time='3600', - hpo_checkpoint_config='{}', + hpo_static_parameters={"k": "10", "feature_dim": "784"}, + hpo_integer_parameters=[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}], + hpo_continuous_parameters=[], + hpo_categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}], + hpo_channels=hpoChannels, + hpo_spot_instance=False, + hpo_max_wait_time=3600, + hpo_checkpoint_config={}, output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output', output_encryption_key='', instance_type='ml.p2.16xlarge', - instance_count='1', - volume_size='50', - hpo_max_num_jobs='9', - hpo_max_parallel_jobs='3', - max_run_time='3600', + instance_count=1, + volume_size=50, + hpo_max_num_jobs=9, + hpo_max_parallel_jobs=3, + max_run_time=3600, endpoint_url='', - network_isolation='True', - traffic_encryption='False', - train_channels=json.dumps(trainChannels), - train_spot_instance='False', - train_max_wait_time='3600', - train_checkpoint_config='{}', + network_isolation=True, + traffic_encryption=False, + train_channels=trainChannels, + train_spot_instance=False, + train_max_wait_time=3600, + train_checkpoint_config={}, batch_transform_instance_type='ml.m4.xlarge', batch_transform_input='s3://kubeflow-pipeline-data/mnist_kmeans_example/input', batch_transform_data_type='S3Prefix', batch_transform_content_type='text/csv', batch_transform_compression_type='None', batch_transform_ouput='s3://kubeflow-pipeline-data/mnist_kmeans_example/output', - batch_transform_max_concurrent='4', - batch_transform_max_payload='6', + batch_transform_max_concurrent=4, + batch_transform_max_payload=6, batch_strategy='MultiRecord', batch_transform_split_type='Line', role_arn='' diff --git a/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py b/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py index d21320fdf4a8..a07c087c85b7 100644 --- a/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py +++ b/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py @@ -20,7 +20,6 @@ 'S3DataDistributionType': 'FullyReplicated' } }, - 'ContentType': '', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'File' @@ -40,19 +39,19 @@ def training( endpoint_url='', image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1', training_input_mode='File', - hyperparameters='{"k": "10", "feature_dim": "784"}', - channels=json.dumps(channelObjList), + hyperparameters={"k": "10", "feature_dim": "784"}, + channels=channelObjList, instance_type='ml.p2.xlarge', - instance_count='1', - volume_size='50', - max_run_time='3600', + instance_count=1, + volume_size=50, + max_run_time=3600, model_artifact_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/data', output_encryption_key='', - network_isolation='True', - traffic_encryption='False', - spot_instance='False', - max_wait_time='3600', - checkpoint_config='{}', + network_isolation=True, + traffic_encryption=False, + spot_instance=False, + max_wait_time=3600, + checkpoint_config={}, role='' ): training = sagemaker_train_op(