From bfd21d6d6b73d08125202e34f319c13f9f47aaea Mon Sep 17 00:00:00 2001
From: Nicholas Thomson <RedbackThomson@users.noreply.github.com>
Date: Mon, 11 May 2020 22:06:21 -0700
Subject: [PATCH] [AWS SageMaker] Specify component input types (#3683)

* Replace all string types with Python types

* Update HPO yaml

* Update Batch YAML

* Update Deploy YAML

* Update GroundTruth YAML

* Update Model YAML

* Update Train YAML

* Update WorkTeam YAML

* Updated samples to remove strings

* Update to temporary image

* Remove unnecessary imports

* Update image to newer image

* Update components to python3

* Update bool parser type

* Remove empty ContentType in samples

* Update to temporary image

* Update to version 0.3.1

* Update deploy to login

* Update deploy load config path

* Fix export environment variable in deploy

* Fix env name

* Update deploy reflow env paths

* Add debug config line

* Use username and password directly

* Updated to 0.3.1

* Update field types to JsonObject and JsonArray
---
 .../aws/sagemaker/THIRD-PARTY-LICENSES.txt    |  2 +-
 .../sagemaker/batch_transform/component.yaml  | 28 ++++++++-
 .../batch_transform/src/batch_transform.py    | 46 +++++++-------
 .../sagemaker/codebuild/deploy.buildspec.yml  |  6 +-
 .../aws/sagemaker/codebuild/scripts/deploy.sh |  7 ++-
 components/aws/sagemaker/common/_utils.py     | 49 +++++----------
 .../aws/sagemaker/deploy/component.yaml       | 29 ++++++++-
 components/aws/sagemaker/deploy/src/deploy.py | 46 +++++++-------
 .../aws/sagemaker/ground_truth/component.yaml | 35 ++++++++++-
 .../ground_truth/src/ground_truth.py          | 52 ++++++++--------
 .../hyperparameter_tuning/component.yaml      | 39 +++++++++++-
 .../src/hyperparameter_tuning.py              | 62 +++++++++----------
 components/aws/sagemaker/model/component.yaml | 18 +++++-
 .../aws/sagemaker/model/src/create_model.py   | 22 +++----
 components/aws/sagemaker/train/component.yaml | 29 ++++++++-
 components/aws/sagemaker/train/src/train.py   | 40 ++++++------
 .../aws/sagemaker/workteam/component.yaml     | 13 +++-
 .../aws/sagemaker/workteam/src/workteam.py    | 14 ++---
 .../mini-image-classification-pipeline.py     | 16 ++---
 .../kmeans-hpo-pipeline.py                    | 37 ++++++-----
 .../mnist-classification-pipeline.py          | 43 +++++++------
 .../training-pipeline.py                      | 21 +++----
 22 files changed, 398 insertions(+), 256 deletions(-)

diff --git a/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt b/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt
index 179d9b3ec3f2..fc268748af99 100644
--- a/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt
+++ b/components/aws/sagemaker/THIRD-PARTY-LICENSES.txt
@@ -1,4 +1,4 @@
-** Amazon SageMaker Components for Kubeflow Pipelines; version 0.3.0 --
+** Amazon SageMaker Components for Kubeflow Pipelines; version 0.3.1 --
 https://github.com/kubeflow/pipelines/tree/master/components/aws/sagemaker
 Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 ** boto3; version 1.12.33 -- https://github.com/boto/boto3/
diff --git a/components/aws/sagemaker/batch_transform/component.yaml b/components/aws/sagemaker/batch_transform/component.yaml
index 6e0ad637ac81..b585ecbcf569 100644
--- a/components/aws/sagemaker/batch_transform/component.yaml
+++ b/components/aws/sagemaker/batch_transform/component.yaml
@@ -4,78 +4,102 @@ description: |
 inputs:
   - name: region
     description: 'The region where the cluster launches.'
+    type: String
   - name: job_name
     description: 'The name of the batch transform job.'
     default: ''
+    type: String
   - name: model_name
     description: 'The name of the model that you want to use for the transform job.'
+    type: String
   - name: max_concurrent
     description: 'The maximum number of parallel requests that can be sent to each instance in a transform job.'
     default: '0'
+    type: Integer
   - name: max_payload
     description: 'The maximum allowed size of the payload, in MB.'
     default: '6'
+    type: Integer
   - name: batch_strategy
     description: 'The number of records to include in a mini-batch for an HTTP inference request.'
     default: ''
+    type: String
   - name: environment
     description: 'The environment variables to set in the Docker container. Up to 16 key-value entries in the map.'
     default: '{}'
+    type: JsonObject
   - name: input_location
     description: 'The S3 location of the data source that is associated with a channel.'
+    type: String
   - name: data_type
     description: 'Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.'
     default: 'S3Prefix'
+    type: String
   - name: content_type
     description: 'The multipurpose internet mail extension (MIME) type of the data.'
     default: ''
+    type: String
   - name: split_type
     description: 'The method to use to split the transform job data files into smaller batches.'
     default: 'None'
+    type: String
   - name: compression_type
     description: 'If the transform data is compressed, the specification of the compression type.'
     default: 'None'
+    type: String
   - name: output_location
     description: 'The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.'
+    type: String
   - name: accept
     description: 'The MIME type used to specify the output data.'
     default: ''
+    type: String
   - name: assemble_with
     description: 'Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.'
     default: ''
+    type: String
   - name: output_encryption_key
     description: 'The AWS Key Management Service ID of the key used to encrypt the output data.'
     default: ''
+    type: String
   - name: input_filter
     description: 'A JSONPath expression used to select a portion of the input data to pass to the algorithm.'
     default: ''
+    type: String
   - name: output_filter
     description: 'A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.'
     default: ''
+    type: String
   - name: join_source
     description: 'Specifies the source of the data to join with the transformed data.'
     default: 'None'
+    type: String
   - name: instance_type
     description: 'The ML compute instance type.'
     default: 'ml.m4.xlarge'
+    type: String
   - name: instance_count
     description: 'The number of ML compute instances to use in each training job.'
     default: '1'
+    type: Integer
   - name: resource_encryption_key
     description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
     default: ''
+    type: String
   - name: endpoint_url
     description: 'The endpoint URL for the private link VPC endpoint.'
     default: ''
+    type: String
   - name: tags
     description: 'Key-value pairs to categorize AWS resources.'
     default: '{}'
+    type: JsonObject
 outputs:
   - {name: output_location,    description: 'S3 URI of the transform job results.'}
 implementation:
   container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
     args: [
       batch_transform.py,
       --region, {inputValue: region},
diff --git a/components/aws/sagemaker/batch_transform/src/batch_transform.py b/components/aws/sagemaker/batch_transform/src/batch_transform.py
index 9ff2b85fcb97..b658dad730be 100644
--- a/components/aws/sagemaker/batch_transform/src/batch_transform.py
+++ b/components/aws/sagemaker/batch_transform/src/batch_transform.py
@@ -26,31 +26,31 @@ def create_parser():
   parser = argparse.ArgumentParser(description='SageMaker Batch Transformation Job')
   _utils.add_default_client_arguments(parser)
   
-  parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the transform job.', default='')
-  parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the model that you want to use for the transform job.')
-  parser.add_argument('--max_concurrent', type=_utils.str_to_int, required=False, help='The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0')
-  parser.add_argument('--max_payload', type=_utils.str_to_int, required=False, help='The maximum allowed size of the payload, in MB.', default='6')
-  parser.add_argument('--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str.strip, required=False, help='The number of records to include in a mini-batch for an HTTP inference request.', default='')
-  parser.add_argument('--environment', type=_utils.str_to_json_dict, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}')
-  parser.add_argument('--input_location', type=str.strip, required=True, help='The S3 location of the data source that is associated with a channel.')
-  parser.add_argument('--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str.strip, required=False, help='Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix')
-  parser.add_argument('--content_type', type=str.strip, required=False, help='The multipurpose internet mail extension (MIME) type of the data.', default='')
-  parser.add_argument('--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str.strip, required=False, help='The method to use to split the transform job data files into smaller batches.', default='None')
-  parser.add_argument('--compression_type', choices=['None', 'Gzip', ''], type=str.strip, required=False, help='If the transform data is compressed, the specification of the compression type.', default='None')
-  parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
-  parser.add_argument('--accept', type=str.strip, required=False, help='The MIME type used to specify the output data.')
-  parser.add_argument('--assemble_with', choices=['None', 'Line', ''], type=str.strip, required=False, help='Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.')
-  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
-  parser.add_argument('--input_filter', type=str.strip, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='')
-  parser.add_argument('--output_filter', type=str.strip, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='')
-  parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str.strip, required=False, help='Specifies the source of the data to join with the transformed data.', default='None')
+  parser.add_argument('--job_name', type=str, required=False, help='The name of the transform job.', default='')
+  parser.add_argument('--model_name', type=str, required=True, help='The name of the model that you want to use for the transform job.')
+  parser.add_argument('--max_concurrent', type=int, required=False, help='The maximum number of parallel requests that can be sent to each instance in a transform job.', default='0')
+  parser.add_argument('--max_payload', type=int, required=False, help='The maximum allowed size of the payload, in MB.', default='6')
+  parser.add_argument('--batch_strategy', choices=['MultiRecord', 'SingleRecord', ''], type=str, required=False, help='The number of records to include in a mini-batch for an HTTP inference request.', default='')
+  parser.add_argument('--environment', type=_utils.yaml_or_json_str, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default={})
+  parser.add_argument('--input_location', type=str, required=True, help='The S3 location of the data source that is associated with a channel.')
+  parser.add_argument('--data_type', choices=['ManifestFile', 'S3Prefix', 'AugmentedManifestFile', ''], type=str, required=False, help='Data type of the input. Can be ManifestFile, S3Prefix, or AugmentedManifestFile.', default='S3Prefix')
+  parser.add_argument('--content_type', type=str, required=False, help='The multipurpose internet mail extension (MIME) type of the data.', default='')
+  parser.add_argument('--split_type', choices=['None', 'Line', 'RecordIO', 'TFRecord', ''], type=str, required=False, help='The method to use to split the transform job data files into smaller batches.', default='None')
+  parser.add_argument('--compression_type', choices=['None', 'Gzip', ''], type=str, required=False, help='If the transform data is compressed, the specification of the compression type.', default='None')
+  parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
+  parser.add_argument('--accept', type=str, required=False, help='The MIME type used to specify the output data.')
+  parser.add_argument('--assemble_with', choices=['None', 'Line', ''], type=str, required=False, help='Defines how to assemble the results of the transform job as a single S3 object. Either None or Line.')
+  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
+  parser.add_argument('--input_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the input data to pass to the algorithm.', default='')
+  parser.add_argument('--output_filter', type=str, required=False, help='A JSONPath expression used to select a portion of the joined dataset to save in the output file for a batch transform job.', default='')
+  parser.add_argument('--join_source', choices=['None', 'Input', ''], type=str, required=False, help='Specifies the source of the data to join with the transformed data.', default='None')
   parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
     'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
-  parser.add_argument('--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in the transform job.')
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
-  parser.add_argument('--output_location_file', type=str.strip, required=True, help='File path where the program will write the Amazon S3 URI of the transform job results.')
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=True, help='The ML compute instance type for the transform job.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in the transform job.')
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
+  parser.add_argument('--output_location_file', type=str, required=True, help='File path where the program will write the Amazon S3 URI of the transform job results.')
 
   return parser
 
diff --git a/components/aws/sagemaker/codebuild/deploy.buildspec.yml b/components/aws/sagemaker/codebuild/deploy.buildspec.yml
index f2acff30a45c..4c3c7534f0fe 100644
--- a/components/aws/sagemaker/codebuild/deploy.buildspec.yml
+++ b/components/aws/sagemaker/codebuild/deploy.buildspec.yml
@@ -1,10 +1,10 @@
-version: 0.2      
+version: 0.2
+
 phases:
   pre_build:
     commands:
       # Log in to Dockerhub
-      - mkdir -p ~/.docker
-      - echo $DOCKER_CONFIG > ~/.docker/config.json
+      - docker login -u $DOCKER_CONFIG_USERNAME -p $DOCKER_CONFIG_PASSWORD
 
   build:
     commands:
diff --git a/components/aws/sagemaker/codebuild/scripts/deploy.sh b/components/aws/sagemaker/codebuild/scripts/deploy.sh
index aa023f00512f..09bf596b1d2d 100755
--- a/components/aws/sagemaker/codebuild/scripts/deploy.sh
+++ b/components/aws/sagemaker/codebuild/scripts/deploy.sh
@@ -5,6 +5,7 @@ set -e
 REMOTE_REPOSITORY="amazon/aws-sagemaker-kfp-components"
 DRYRUN="true"
 FULL_VERSION_TAG=""
+DOCKER_CONFIG_PATH=${DOCKER_CONFIG_PATH:-"/root/.docker"}
 
 while getopts ":d:v:" opt; do
 	case ${opt} in
@@ -64,13 +65,13 @@ echo "Tagged image with ${MAJOR_VERSION_IMAGE}"
 
 # Push to the remote repository
 if [ "${DRYRUN}" == "false" ]; then
-  docker push "${FULL_VERSION_IMAGE}"
+  docker --config "$DOCKER_CONFIG_PATH" push "${FULL_VERSION_IMAGE}"
   echo "Successfully pushed tag ${FULL_VERSION_IMAGE} to Docker Hub"
 
-	docker push "${MINOR_VERSION_IMAGE}"
+	docker --config "$DOCKER_CONFIG_PATH" push "${MINOR_VERSION_IMAGE}"
   echo "Successfully pushed tag ${MINOR_VERSION_IMAGE} to Docker Hub"
 
-	docker push "${MAJOR_VERSION_IMAGE}"
+	docker --config "$DOCKER_CONFIG_PATH" push "${MAJOR_VERSION_IMAGE}"
   echo "Successfully pushed tag ${MAJOR_VERSION_IMAGE} to Docker Hub"
 else
   echo "Dry run detected. Not pushing images."
diff --git a/components/aws/sagemaker/common/_utils.py b/components/aws/sagemaker/common/_utils.py
index bc58e1c0c697..69ae36748aec 100644
--- a/components/aws/sagemaker/common/_utils.py
+++ b/components/aws/sagemaker/common/_utils.py
@@ -13,6 +13,7 @@
 import os
 import argparse
 from time import gmtime, strftime
+from distutils.util import strtobool
 import time
 import string
 import random
@@ -63,7 +64,7 @@ def nullable_string_argument(value):
 
 
 def add_default_client_arguments(parser):
-    parser.add_argument('--region', type=str.strip, required=True, help='The region where the training job launches.')
+    parser.add_argument('--region', type=str, required=True, help='The region where the training job launches.')
     parser.add_argument('--endpoint_url', type=nullable_string_argument, required=False, help='The URL to use when communicating with the Sagemaker service.')
 
 
@@ -71,7 +72,7 @@ def get_component_version():
     """Get component version from the first line of License file"""
     component_version = 'NULL'
 
-    with open('/THIRD-PARTY-LICENSES.txt', 'r') as license_file:
+    with open('THIRD-PARTY-LICENSES.txt', 'r') as license_file:
         version_match = re.search('Amazon SageMaker Components for Kubeflow Pipelines; version (([0-9]+[.])+[0-9]+)',
                         license_file.readline())
         if version_match is not None:
@@ -858,35 +859,15 @@ def enable_spot_instance_support(training_job_config, args):
 def id_generator(size=4, chars=string.ascii_uppercase + string.digits):
   return ''.join(random.choice(chars) for _ in range(size))
 
-
-def str_to_bool(s):
-  if s.lower().strip() == 'true':
-    return True
-  elif s.lower().strip() == 'false':
-    return False
-  else:
-    raise argparse.ArgumentTypeError('"True" or "False" expected.')
-
-def str_to_int(s):
-  if s:
-    return int(s)
-  else:
-    return 0
-
-def str_to_float(s):
-  if s:
-    return float(s)
-  else:
-    return 0.0
-
-def str_to_json_dict(s):
-  if s != '':
-      return json.loads(s)
-  else:
-      return {}
-
-def str_to_json_list(s):
-  if s != '':
-      return json.loads(s)
-  else:
-      return []
+def yaml_or_json_str(str):
+  if str == "" or str == None:
+    return None
+  try:
+    return json.loads(str)
+  except:
+    return yaml.safe_load(str)
+
+def str_to_bool(str):
+    # This distutils function returns an integer representation of the boolean
+    # rather than a True/False value. This simply hard casts it.
+    return bool(strtobool(str))
\ No newline at end of file
diff --git a/components/aws/sagemaker/deploy/component.yaml b/components/aws/sagemaker/deploy/component.yaml
index 15ae51073f29..349bfb62a8e2 100644
--- a/components/aws/sagemaker/deploy/component.yaml
+++ b/components/aws/sagemaker/deploy/component.yaml
@@ -4,83 +4,108 @@ description: |
 inputs:
   - name: region
     description: 'The region to deploy your model endpoints.'
+    type: String
   - name: endpoint_config_name
     description: 'The name of the endpoint configuration.'
     default: ''
+    type: String
   - name: variant_name_1
     description: 'The name of the production variant.'
     default: 'variant-name-1'
+    type: String
   - name: model_name_1
     description: 'The model name used for endpoint deployment.'
+    type: String
   - name: initial_instance_count_1
     description: 'Number of instances to launch initially.'
     default: '1'
+    type: Integer
   - name: instance_type_1
     description: 'The ML compute instance type.'
     default: 'ml.m4.xlarge'
+    type: String
   - name: initial_variant_weight_1
     description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
     default: '1.0'
+    type: Float
   - name: accelerator_type_1
     description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
     default: ''
+    type: String
   - name: variant_name_2
     description: 'The name of the production variant.'
     default: 'variant-name-2'
+    type: String
   - name: model_name_2
     description: 'The model name used for endpoint deployment.'
     default: ''
+    type: String
   - name: initial_instance_count_2
     description: 'Number of instances to launch initially.'
     default: '1'
+    type: Integer
   - name: instance_type_2
     description: 'The ML compute instance type.'
     default: 'ml.m4.xlarge'
+    type: String
   - name: initial_variant_weight_2
     description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
     default: '1.0'
+    type: Float
   - name: accelerator_type_2
     description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
     default: ''
+    type: String
   - name: variant_name_3
     description: 'The name of the production variant.'
     default: 'variant-name-3'
+    type: String
   - name: model_name_3
     description: 'The model name used for endpoint deployment'
     default: ''
+    type: String
   - name: initial_instance_count_3
     description: 'Number of instances to launch initially.'
     default: '1'
+    type: Integer
   - name: instance_type_3
     description: 'The ML compute instance type.'
     default: 'ml.m4.xlarge'
+    type: String
   - name: initial_variant_weight_3
     description: 'Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.'
     default: '1.0'
+    type: Float
   - name: accelerator_type_3
     description: 'The size of the Elastic Inference (EI) instance to use for the production variant.'
     default: ''
+    type: String
   - name: resource_encryption_key
     description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.'
     default: ''
+    type: String
   - name: endpoint_url
     description: 'The endpoint URL for the private link VPC endpoint.'
     default: ''
+    type: String
   - name: endpoint_config_tags
     description: 'Key-value pairs to categorize AWS resources.'
     default: '{}'
+    type: JsonObject
   - name: endpoint_name
     description: 'The name of the endpoint.'
     default: ''
+    type: String
   - name: endpoint_tags
     description: 'Key-value pairs to categorize AWS resources.'
     default: '{}'
+    type: JsonObject
 outputs:
   - {name: endpoint_name,          description: 'Endpoint name'}
 implementation:
   container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
     args: [
       deploy.py,
       --region, {inputValue: region},
diff --git a/components/aws/sagemaker/deploy/src/deploy.py b/components/aws/sagemaker/deploy/src/deploy.py
index 519b3f5d0b2f..1888e1b45d2c 100644
--- a/components/aws/sagemaker/deploy/src/deploy.py
+++ b/components/aws/sagemaker/deploy/src/deploy.py
@@ -19,36 +19,36 @@ def create_parser():
   parser = argparse.ArgumentParser(description='SageMaker Training Job')
   _utils.add_default_client_arguments(parser)
   
-  parser.add_argument('--endpoint_config_name', type=str.strip, required=False, help='The name of the endpoint configuration.', default='')
-  parser.add_argument('--variant_name_1', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-1')
-  parser.add_argument('--model_name_1', type=str.strip, required=True, help='The model name used for endpoint deployment.')
-  parser.add_argument('--initial_instance_count_1', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1)
+  parser.add_argument('--endpoint_config_name', type=str, required=False, help='The name of the endpoint configuration.', default='')
+  parser.add_argument('--variant_name_1', type=str, required=False, help='The name of the production variant.', default='variant-name-1')
+  parser.add_argument('--model_name_1', type=str, required=True, help='The model name used for endpoint deployment.')
+  parser.add_argument('--initial_instance_count_1', type=int, required=False, help='Number of instances to launch initially.', default=1)
   parser.add_argument('--instance_type_1', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
     'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--initial_variant_weight_1', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
-  parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
-  parser.add_argument('--variant_name_2', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-2')
-  parser.add_argument('--model_name_2', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='')
-  parser.add_argument('--initial_instance_count_2', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1)
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--initial_variant_weight_1', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
+  parser.add_argument('--accelerator_type_1', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
+  parser.add_argument('--variant_name_2', type=str, required=False, help='The name of the production variant.', default='variant-name-2')
+  parser.add_argument('--model_name_2', type=str, required=False, help='The model name used for endpoint deployment.', default='')
+  parser.add_argument('--initial_instance_count_2', type=int, required=False, help='Number of instances to launch initially.', default=1)
   parser.add_argument('--instance_type_2', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
     'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--initial_variant_weight_2', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
-  parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
-  parser.add_argument('--variant_name_3', type=str.strip, required=False, help='The name of the production variant.', default='variant-name-3')
-  parser.add_argument('--model_name_3', type=str.strip, required=False, help='The model name used for endpoint deployment.', default='')
-  parser.add_argument('--initial_instance_count_3', type=_utils.str_to_int, required=False, help='Number of instances to launch initially.', default=1)
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--initial_variant_weight_2', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
+  parser.add_argument('--accelerator_type_2', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
+  parser.add_argument('--variant_name_3', type=str, required=False, help='The name of the production variant.', default='variant-name-3')
+  parser.add_argument('--model_name_3', type=str, required=False, help='The model name used for endpoint deployment.', default='')
+  parser.add_argument('--initial_instance_count_3', type=int, required=False, help='Number of instances to launch initially.', default=1)
   parser.add_argument('--instance_type_3', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
     'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--initial_variant_weight_3', type=_utils.str_to_float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
-  parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str.strip, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--endpoint_config_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge', ''], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--initial_variant_weight_3', type=float, required=False, help='Determines initial traffic distribution among all of the models that you specify in the endpoint configuration.', default=1.0)
+  parser.add_argument('--accelerator_type_3', choices=['ml.eia1.medium', 'ml.eia1.large', 'ml.eia1.xlarge', ''], type=str, required=False, help='The size of the Elastic Inference (EI) instance to use for the production variant.', default='')
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--endpoint_config_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
 
-  parser.add_argument('--endpoint_name', type=str.strip, required=False, help='The name of the endpoint.', default='')
-  parser.add_argument('--endpoint_tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--endpoint_name', type=str, required=False, help='The name of the endpoint.', default='')
+  parser.add_argument('--endpoint_tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
 
   return parser
 
diff --git a/components/aws/sagemaker/ground_truth/component.yaml b/components/aws/sagemaker/ground_truth/component.yaml
index 06823416e140..3a143cc77521 100644
--- a/components/aws/sagemaker/ground_truth/component.yaml
+++ b/components/aws/sagemaker/ground_truth/component.yaml
@@ -4,92 +4,123 @@ description: |
 inputs:
   - name: region
     description: 'The region where the cluster launches.'
+    type: String
   - name: role
     description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
+    type: String
   - name: job_name
     description: 'The name of the labeling job.'
+    type: String
   - name: label_attribute_name
     description: 'The attribute name to use for the label in the output manifest file. Default is the job name.'
     default: ''
+    type: String
   - name: manifest_location
     description: 'The Amazon S3 location of the manifest file that describes the input data objects.'
+    type: String
   - name: output_location
     description: 'The Amazon S3 location to write output data.'
+    type: String
   - name: output_encryption_key
     description: 'The AWS Key Management Service ID of the key used to encrypt the output data.'
     default: ''
+    type: String
   - name: task_type
     description: 'Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.'
+    type: String
   - name: worker_type
     description: 'The workteam for data labeling, either public, private, or vendor.'
+    type: String
   - name: workteam_arn
     description: 'The ARN of the work team assigned to complete the tasks.'
     default: ''
+    type: String
   - name: no_adult_content
     description: 'If true, your data is free of adult content.'
     default: 'False'
+    type: Bool
   - name: no_ppi
     description: 'If true, your data is free of personally identifiable information.'
     default: 'False'
+    type: Bool
   - name: label_category_config
     description: 'The S3 URL of the JSON structured file that defines the categories used to label the data objects.'
     default: ''
+    type: String
   - name: max_human_labeled_objects
     description: 'The maximum number of objects that can be labeled by human workers.'
     default: ''
+    type: Integer
   - name: max_percent_objects
     description: 'The maximum number of input data objects that should be labeled.'
     default: ''
+    type: Integer
   - name: enable_auto_labeling
     description: 'Enables auto-labeling, only for bounding box, text classification, and image classification.'
     default: 'False'
+    type: Bool
   - name: initial_model_arn
     description: 'The ARN of the final model used for a previous auto-labeling job.'
     default: ''
+    type: String
   - name: resource_encryption_key
     description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
     default: ''
+    type: String
   - name: ui_template
     description: 'The Amazon S3 bucket location of the UI template.'
+    type: String
   - name: pre_human_task_function
     description: 'The ARN of a Lambda function that is run before a data object is sent to a human worker.'
     default: ''
+    type: String
   - name: post_human_task_function
     description: 'The ARN of a Lambda function implements the logic for annotation consolidation.'
     default: ''
+    type: String
   - name: task_keywords
     description: 'Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.'
     default: ''
+    type: String
   - name: title
     description: 'A title for the task for your human workers.'
+    type: String
   - name: description
     description: 'A description of the task for your human workers.'
+    type: String
   - name: num_workers_per_object
     description: 'The number of human workers that will label an object.'
+    type: Integer
   - name: time_limit
     description: 'The amount of time that a worker has to complete a task in seconds'
+    type: Integer
   - name: task_availibility
     description: 'The length of time that a task remains available for labeling by human workers.'
     default: ''
+    type: Integer
   - name: max_concurrent_tasks
     description: 'The maximum number of data objects that can be labeled by human workers at the same time.'
     default: ''
+    type: Integer
   - name: workforce_task_price
     description: 'The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".'
     default: '0.000'
+    type: Float
   - name: endpoint_url
     description: 'The endpoint URL for the private link VPC endpoint.'
     default: ''
+    type: String
   - name: tags
     description: 'Key-value pairs to categorize AWS resources.'
     default: '{}'
+    type: JsonObject
 outputs:
   - {name: output_manifest_location,  description: 'The Amazon S3 bucket location of the manifest file for labeled data.'}
   - {name: active_learning_model_arn, description: 'The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.'}
 implementation:
   container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
     args: [
       ground_truth.py,
       --region, {inputValue: region},
diff --git a/components/aws/sagemaker/ground_truth/src/ground_truth.py b/components/aws/sagemaker/ground_truth/src/ground_truth.py
index 8f6db7d544a8..68f7a557dd2e 100644
--- a/components/aws/sagemaker/ground_truth/src/ground_truth.py
+++ b/components/aws/sagemaker/ground_truth/src/ground_truth.py
@@ -19,35 +19,35 @@ def create_parser():
   parser = argparse.ArgumentParser(description='SageMaker Ground Truth Job')
   _utils.add_default_client_arguments(parser)
   
-  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
-  parser.add_argument('--job_name', type=str.strip, required=True, help='The name of the labeling job.')
-  parser.add_argument('--label_attribute_name', type=str.strip, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='')
-  parser.add_argument('--manifest_location', type=str.strip, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.')
-  parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 location to write output data.')
-  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
-  parser.add_argument('--task_type', type=str.strip, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.')
-  parser.add_argument('--worker_type', type=str.strip, required=True, help='The workteam for data labeling, either public, private, or vendor.')
-  parser.add_argument('--workteam_arn', type=str.strip, required=False, help='The ARN of the work team assigned to complete the tasks.')
+  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
+  parser.add_argument('--job_name', type=str, required=True, help='The name of the labeling job.')
+  parser.add_argument('--label_attribute_name', type=str, required=False, help='The attribute name to use for the label in the output manifest file. Default is the job name.', default='')
+  parser.add_argument('--manifest_location', type=str, required=True, help='The Amazon S3 location of the manifest file that describes the input data objects.')
+  parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 location to write output data.')
+  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
+  parser.add_argument('--task_type', type=str, required=True, help='Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.')
+  parser.add_argument('--worker_type', type=str, required=True, help='The workteam for data labeling, either public, private, or vendor.')
+  parser.add_argument('--workteam_arn', type=str, required=False, help='The ARN of the work team assigned to complete the tasks.')
   parser.add_argument('--no_adult_content', type=_utils.str_to_bool, required=False, help='If true, your data is free of adult content.', default='False')
   parser.add_argument('--no_ppi', type=_utils.str_to_bool, required=False, help='If true, your data is free of personally identifiable information.', default='False')
-  parser.add_argument('--label_category_config', type=str.strip, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='')
-  parser.add_argument('--max_human_labeled_objects', type=_utils.str_to_int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0)
-  parser.add_argument('--max_percent_objects', type=_utils.str_to_int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0)
+  parser.add_argument('--label_category_config', type=str, required=False, help='The S3 URL of the JSON structured file that defines the categories used to label the data objects.', default='')
+  parser.add_argument('--max_human_labeled_objects', type=int, required=False, help='The maximum number of objects that can be labeled by human workers.', default=0)
+  parser.add_argument('--max_percent_objects', type=int, required=False, help='The maximum percentatge of input data objects that should be labeled.', default=0)
   parser.add_argument('--enable_auto_labeling', type=_utils.str_to_bool, required=False, help='Enables auto-labeling, only for bounding box, text classification, and image classification.', default=False)
-  parser.add_argument('--initial_model_arn', type=str.strip, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='')
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--ui_template', type=str.strip, required=True, help='The Amazon S3 bucket location of the UI template.')
-  parser.add_argument('--pre_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='')
-  parser.add_argument('--post_human_task_function', type=str.strip, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='')
-  parser.add_argument('--task_keywords', type=str.strip, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='')
-  parser.add_argument('--title', type=str.strip, required=True, help='A title for the task for your human workers.')
-  parser.add_argument('--description', type=str.strip, required=True, help='A description of the task for your human workers.')
-  parser.add_argument('--num_workers_per_object', type=_utils.str_to_int, required=True, help='The number of human workers that will label an object.')
-  parser.add_argument('--time_limit', type=_utils.str_to_int, required=True, help='The amount of time that a worker has to complete a task in seconds')
-  parser.add_argument('--task_availibility', type=_utils.str_to_int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0)
-  parser.add_argument('--max_concurrent_tasks', type=_utils.str_to_int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0)
-  parser.add_argument('--workforce_task_price', type=_utils.str_to_float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000)
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--initial_model_arn', type=str, required=False, help='The ARN of the final model used for a previous auto-labeling job.', default='')
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--ui_template', type=str, required=True, help='The Amazon S3 bucket location of the UI template.')
+  parser.add_argument('--pre_human_task_function', type=str, required=False, help='The ARN of a Lambda function that is run before a data object is sent to a human worker.', default='')
+  parser.add_argument('--post_human_task_function', type=str, required=False, help='The ARN of a Lambda function implements the logic for annotation consolidation.', default='')
+  parser.add_argument('--task_keywords', type=str, required=False, help='Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task.', default='')
+  parser.add_argument('--title', type=str, required=True, help='A title for the task for your human workers.')
+  parser.add_argument('--description', type=str, required=True, help='A description of the task for your human workers.')
+  parser.add_argument('--num_workers_per_object', type=int, required=True, help='The number of human workers that will label an object.')
+  parser.add_argument('--time_limit', type=int, required=True, help='The amount of time that a worker has to complete a task in seconds')
+  parser.add_argument('--task_availibility', type=int, required=False, help='The length of time that a task remains available for labelling by human workers.', default=0)
+  parser.add_argument('--max_concurrent_tasks', type=int, required=False, help='The maximum number of data objects that can be labeled by human workers at the same time.', default=0)
+  parser.add_argument('--workforce_task_price', type=float, required=False, help='The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as "0.000".', default=0.000)
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
 
   return parser
 
diff --git a/components/aws/sagemaker/hyperparameter_tuning/component.yaml b/components/aws/sagemaker/hyperparameter_tuning/component.yaml
index 7d34164a6a91..396495f0934f 100644
--- a/components/aws/sagemaker/hyperparameter_tuning/component.yaml
+++ b/components/aws/sagemaker/hyperparameter_tuning/component.yaml
@@ -7,101 +7,136 @@ inputs:
   - name: job_name
     description: 'The name of the tuning job. Must be unique within the same AWS account and AWS region.'
     default: ''
+    type: String
   - name: role
     description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
+    type: String
   - name: image
     description: 'The registry path of the Docker image that contains the training algorithm.'
     default: ''
+    type: String
   - name: algorithm_name
     description: 'The name of the algorithm resource to use for the hyperparameter tuning job. Do not specify a value for this if using training image.'
     default: ''
+    type: String
   - name: training_input_mode
     description: 'The input mode that the algorithm supports. File or Pipe.'
     default: 'File'
+    type: String
   - name: metric_definitions
     description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.'
     default: '{}'
+    type: JsonObject
   - name: strategy
     description: 'How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.'
     default: 'Bayesian'
+    type: String
   - name: metric_name
     description: 'The name of the metric to use for the objective metric.'
+    type: String
   - name: metric_type
     description: 'Whether to minimize or maximize the objective metric.'
+    type: String
   - name: early_stopping_type
     description: 'Whether to use early stopping for training jobs launched by the tuning job.'
     default: 'Off'
+    type: String
   - name: static_parameters
     description: 'The values of hyperparameters that do not change for the tuning job.'
     default: '{}'
+    type: JsonObject
   - name: integer_parameters
     description: 'The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.'
     default: '[]'
+    type: JsonArray
   - name: continuous_parameters
     description: 'The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.'
     default: '[]'
+    type: JsonObject
   - name: categorical_parameters
     description: 'The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.'
     default: '[]'
+    type: JsonArray
   - name: channels
     description: 'A list of dicts specifying the input channels. Must have at least one.'
+    type: JsonArray
   - name: output_location
     description: 'The Amazon S3 path where you want Amazon SageMaker to store the model artifacts is from the best training job.'
+    type: String
   - name: output_encryption_key
     description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.'
     default: ''
+    type: String
   - name: instance_type
     description: 'The ML compute instance type.'
     default: 'ml.m4.xlarge'
+    type: String
   - name: instance_count
     description: 'The number of ML compute instances to use in each training job.'
     default: '1'
+    type: Integer
   - name: volume_size
     description: 'The size of the ML storage volume that you want to provision.'
     default: '30'
+    type: Integer
   - name: max_num_jobs
     description: 'The maximum number of training jobs that a hyperparameter tuning job can launch.'
+    type: Integer
   - name: max_parallel_jobs
     description: 'The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.'
+    type: Integer
   - name: max_run_time
     description: 'The maximum run time in seconds per training job.'
     default: '86400'
+    type: Integer
   - name: resource_encryption_key
     description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
     default: ''
+    type: String
   - name: vpc_security_group_ids
     description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
     default: ''
+    type: String
   - name: vpc_subnets
     description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
     default: ''
+    type: String
   - name: network_isolation
     description: 'Isolates the training container.'
     default: 'True'
+    type: Bool
   - name: traffic_encryption
     description: 'Encrypts all communications between ML compute instances in distributed training.'
     default: 'False'
+    type: Bool
   - name: spot_instance
     description: 'Use managed spot training.'
     default: 'False'
+    type: Bool
   - name: max_wait_time
     description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.'
     default: '86400'
+    type: Integer
   - name: checkpoint_config
     description: 'Dictionary of information about the output location for managed spot training checkpoint data.'
     default: '{}'
+    type: JsonObject
   - name: warm_start_type
     description: 'Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"'
     default: ''
+    type: String
   - name: parent_hpo_jobs
     description: 'List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.'
     default: ''
+    type: String
   - name: endpoint_url
     description: 'The endpoint URL for the private link VPC endpoint.'
     default: ''
+    type: String
   - name: tags
     description: 'Key-value pairs, to categorize AWS resources.'
     default: '{}'
+    type: JsonObject
 outputs:
   - name: hpo_job_name
     description: 'The name of the hyper parameter tuning job'
@@ -115,8 +150,8 @@ outputs:
     description: 'The registry path of the Docker image that contains the training algorithm'
 implementation:
   container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
     args: [
       hyperparameter_tuning.py,
       --region, {inputValue: region},
diff --git a/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py b/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py
index 68fe2fefa584..df44a4098a34 100644
--- a/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py
+++ b/components/aws/sagemaker/hyperparameter_tuning/src/hyperparameter_tuning.py
@@ -20,46 +20,46 @@ def create_parser():
   parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job')
   _utils.add_default_client_arguments(parser)
   
-  parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.')
-  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
-  parser.add_argument('--image', type=str.strip, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
-  parser.add_argument('--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.', default='')
-  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str.strip, required=False, help='The input mode that the algorithm supports. File or Pipe.', default='File')
-  parser.add_argument('--metric_definitions', type=_utils.str_to_json_dict, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}')
-  parser.add_argument('--strategy', choices=['Bayesian', 'Random'], type=str.strip, required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian')
-  parser.add_argument('--metric_name', type=str.strip, required=True, help='The name of the metric to use for the objective metric.')
-  parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], type=str.strip, required=True, help='Whether to minimize or maximize the objective metric.')
-  parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], type=str.strip, required=False, help='Whether to minimize or maximize the objective metric.', default='Off')
-  parser.add_argument('--static_parameters', type=_utils.str_to_json_dict, required=False, help='The values of hyperparameters that do not change for the tuning job.', default='{}')
-  parser.add_argument('--integer_parameters', type=_utils.str_to_json_list, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default='[]')
-  parser.add_argument('--continuous_parameters', type=_utils.str_to_json_list, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default='[]')
-  parser.add_argument('--categorical_parameters', type=_utils.str_to_json_list, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default='[]')
-  parser.add_argument('--channels', type=_utils.str_to_json_list, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
-  parser.add_argument('--output_location', type=str.strip, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
-  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
+  parser.add_argument('--job_name', type=str, required=False, help='The name of the tuning job. Must be unique within the same AWS account and AWS region.')
+  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
+  parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
+  parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the hyperparameter tuning job.', default='')
+  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, required=False, help='The input mode that the algorithm supports. File or Pipe.', default='File')
+  parser.add_argument('--metric_definitions', type=_utils.yaml_or_json_str, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default={})
+  parser.add_argument('--strategy', choices=['Bayesian', 'Random'], type=str, required=False, help='How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches.', default='Bayesian')
+  parser.add_argument('--metric_name', type=str, required=True, help='The name of the metric to use for the objective metric.')
+  parser.add_argument('--metric_type', choices=['Maximize', 'Minimize'], type=str, required=True, help='Whether to minimize or maximize the objective metric.')
+  parser.add_argument('--early_stopping_type', choices=['Off', 'Auto'], type=str, required=False, help='Whether to minimize or maximize the objective metric.', default='Off')
+  parser.add_argument('--static_parameters', type=_utils.yaml_or_json_str, required=False, help='The values of hyperparameters that do not change for the tuning job.', default={})
+  parser.add_argument('--integer_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search.', default=[])
+  parser.add_argument('--continuous_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search.', default=[])
+  parser.add_argument('--categorical_parameters', type=_utils.yaml_or_json_str, required=False, help='The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search.', default=[])
+  parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
+  parser.add_argument('--output_location', type=str, required=True, help='The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.')
+  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
   parser.add_argument('--instance_type', choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
     'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--instance_count', type=_utils.str_to_int, required=False, help='The number of ML compute instances to use in each training job.', default=1)
-  parser.add_argument('--volume_size', type=_utils.str_to_int, required=False, help='The size of the ML storage volume that you want to provision.', default=1)
-  parser.add_argument('--max_num_jobs', type=_utils.str_to_int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.')
-  parser.add_argument('--max_parallel_jobs', type=_utils.str_to_int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.')
-  parser.add_argument('--max_run_time', type=_utils.str_to_int, required=False, help='The maximum run time in seconds per training job.', default=86400)
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
-  parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, required=False, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_count', type=int, required=False, help='The number of ML compute instances to use in each training job.', default=1)
+  parser.add_argument('--volume_size', type=int, required=False, help='The size of the ML storage volume that you want to provision.', default=1)
+  parser.add_argument('--max_num_jobs', type=int, required=True, help='The maximum number of training jobs that a hyperparameter tuning job can launch.')
+  parser.add_argument('--max_parallel_jobs', type=int, required=True, help='The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.')
+  parser.add_argument('--max_run_time', type=int, required=False, help='The maximum run time in seconds per training job.', default=86400)
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
+  parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
   parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
   parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False)
-  parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], type=str.strip, required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"')
-  parser.add_argument('--parent_hpo_jobs', type=str.strip, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='')
+  parser.add_argument('--warm_start_type', choices=['IdenticalDataAndAlgorithm', 'TransferLearning', ''], type=str, required=False, help='Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning"')
+  parser.add_argument('--parent_hpo_jobs', type=str, required=False, help='List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point.', default='')
 
   ### Start spot instance support
   parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False)
-  parser.add_argument('--max_wait_time', type=_utils.str_to_int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
-  parser.add_argument('--checkpoint_config', type=_utils.str_to_json_dict, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default='{}')
+  parser.add_argument('--max_wait_time', type=int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
+  parser.add_argument('--checkpoint_config', type=_utils.yaml_or_json_str, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default={})
   ### End spot instance support
 
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
 
   return parser
 
diff --git a/components/aws/sagemaker/model/component.yaml b/components/aws/sagemaker/model/component.yaml
index 52ddcc2d6635..31d66ef4cc99 100644
--- a/components/aws/sagemaker/model/component.yaml
+++ b/components/aws/sagemaker/model/component.yaml
@@ -4,49 +4,63 @@ description: |
 inputs:
   - name: region
     description: 'The region where the training job launches.'
+    type: String
   - name: model_name
     description: 'The name of the new model.'
+    type: String
   - name: role
     description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
+    type: String
   - name: container_host_name
     description: 'When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.'
     default: ''
+    type: String
   - name: image
     description: 'The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.'
     default: ''
+    type: String
   - name: model_artifact_url
     description: 'S3 path where Amazon SageMaker to store the model artifacts.'
     default: ''
+    type: String
   - name: environment
     description: 'The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.'
     default: '{}'
+    type: JsonObject
   - name: model_package
     description: 'The name or Amazon Resource Name (ARN) of the model package to use to create the model.'
     default: ''
+    type: String
   - name: secondary_containers
     description: 'A list of dicts that specifies the additional containers in the inference pipeline.'
     default: '[]'
+    type: JsonArray
   - name: vpc_security_group_ids
     description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
     default: ''
+    type: String
   - name: vpc_subnets
     description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
     default: ''
+    type: String
   - name: network_isolation
     description: 'Isolates the training container.'
     default: 'True'
+    type: Bool
   - name: endpoint_url
     description: 'The endpoint URL for the private link VPC endpoint.'
     default: ''
+    type: String
   - name: tags
     description: 'Key-value pairs to categorize AWS resources.'
     default: '{}'
+    type: JsonObject
 outputs:
   - {name: model_name,          description: 'The model name Sagemaker created'}
 implementation:
   container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
     args: [
       create_model.py,
       --region, {inputValue: region},
diff --git a/components/aws/sagemaker/model/src/create_model.py b/components/aws/sagemaker/model/src/create_model.py
index 7fbe211d542b..c6fcebd246c9 100644
--- a/components/aws/sagemaker/model/src/create_model.py
+++ b/components/aws/sagemaker/model/src/create_model.py
@@ -19,18 +19,18 @@ def create_parser():
   parser = argparse.ArgumentParser(description='SageMaker Training Job')
   _utils.add_default_client_arguments(parser)
   
-  parser.add_argument('--model_name', type=str.strip, required=True, help='The name of the new model.')
-  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
-  parser.add_argument('--container_host_name', type=str.strip, required=False, help='When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='')
-  parser.add_argument('--image', type=str.strip, required=False, help='The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='')
-  parser.add_argument('--model_artifact_url', type=str.strip, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='')
-  parser.add_argument('--environment', type=_utils.str_to_json_dict, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default='{}')
-  parser.add_argument('--model_package', type=str.strip, required=False, help='The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='')
-  parser.add_argument('--secondary_containers', type=_utils.str_to_json_list, required=False, help='A list of dicts that specifies the additional containers in the inference pipeline.', default='{}')
-  parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='')
-  parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.', default='')
+  parser.add_argument('--model_name', type=str, required=True, help='The name of the new model.')
+  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
+  parser.add_argument('--container_host_name', type=str, required=False, help='When a ContainerDefinition is part of an inference pipeline, this value uniquely identifies the container for the purposes of logging and metrics.', default='')
+  parser.add_argument('--image', type=str, required=False, help='The Amazon EC2 Container Registry (Amazon ECR) path where inference code is stored.', default='')
+  parser.add_argument('--model_artifact_url', type=str, required=False, help='S3 path where Amazon SageMaker to store the model artifacts.', default='')
+  parser.add_argument('--environment', type=_utils.yaml_or_json_str, required=False, help='The dictionary of the environment variables to set in the Docker container. Up to 16 key-value entries in the map.', default={})
+  parser.add_argument('--model_package', type=str, required=False, help='The name or Amazon Resource Name (ARN) of the model package to use to create the model.', default='')
+  parser.add_argument('--secondary_containers', type=_utils.yaml_or_json_str, required=False, help='A list of dicts that specifies the additional containers in the inference pipeline.', default={})
+  parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.', default='')
+  parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.', default='')
   parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
 
   return parser
 
diff --git a/components/aws/sagemaker/train/component.yaml b/components/aws/sagemaker/train/component.yaml
index e26f82d57338..d27a3864e834 100644
--- a/components/aws/sagemaker/train/component.yaml
+++ b/components/aws/sagemaker/train/component.yaml
@@ -4,83 +4,108 @@ description: |
 inputs:
   - name: region
     description: 'The region where the training job launches.'
+    type: String
   - name: job_name
     description: 'The name of the batch training job.'
     default: ''
+    type: String
   - name: role
     description: 'The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.'
+    type: String
   - name: image
     description: 'The registry path of the Docker image that contains the training algorithm.'
     default: ''
+    type: String
   - name: algorithm_name
     description: 'The name of the algorithm resource to use for the training job. Do not specify a value for this if using training image.'
     default: ''
+    type: String
   - name: metric_definitions
     description: 'The dictionary of name-regex pairs specify the metrics that the algorithm emits.'
     default: '{}'
+    type: JsonObject
   - name: training_input_mode
     description: 'The input mode that the algorithm supports. File or Pipe.'
     default: 'File'
+    type: String
   - name: hyperparameters
     description: 'Dictionary of hyperparameters for the the algorithm.'
     default: '{}'
+    type: JsonObject
   - name: channels
     description: 'A list of dicts specifying the input channels. Must have at least one.'
+    type: JsonArray
   - name: instance_type
     description: 'The ML compute instance type.'
     default: 'ml.m4.xlarge'
+    type: String
   - name: instance_count
     description: 'The number of ML compute instances to use in each training job.'
     default: '1'
+    type: Integer
   - name: volume_size
     description: 'The size of the ML storage volume that you want to provision.'
     default: '30'
+    type: Integer
   - name: resource_encryption_key
     description: 'The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).'
     default: ''
+    type: String
   - name: max_run_time
     description: 'The maximum run time in seconds for the training job.'
     default: '86400'
+    type: Integer
   - name: model_artifact_path
     description: 'Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.'
+    type: String
   - name: output_encryption_key
     description: 'The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.'
     default: ''
+    type: String
   - name: vpc_security_group_ids
     description: 'The VPC security group IDs, in the form sg-xxxxxxxx.'
     default: ''
+    type: String
   - name: vpc_subnets
     description: 'The ID of the subnets in the VPC to which you want to connect your hpo job.'
     default: ''
+    type: String
   - name: network_isolation
     description: 'Isolates the training container.'
     default: 'True'
+    type: Bool
   - name: traffic_encryption
     description: 'Encrypts all communications between ML compute instances in distributed training.'
     default: 'False'
+    type: Bool
   - name: spot_instance
     description: 'Use managed spot training.'
     default: 'False'
+    type: Bool
   - name: max_wait_time
     description: 'The maximum time in seconds you are willing to wait for a managed spot training job to complete.'
     default: '86400'
+    type: Integer
   - name: checkpoint_config
     description: 'Dictionary of information about the output location for managed spot training checkpoint data.'
     default: '{}'
+    type: JsonObject
   - name: endpoint_url
     description: 'The endpoint URL for the private link VPC endpoint.'
     default: ''
+    type: String
   - name: tags
     description: 'Key-value pairs, to categorize AWS resources.'
     default: '{}'
+    type: JsonObject
 outputs:
   - {name: model_artifact_url,    description: 'Model artifacts url'}
   - {name: job_name,              description: 'Training job name'}
   - {name: training_image,        description: 'The registry path of the Docker image that contains the training algorithm'}
 implementation:
   container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
     args: [
       train.py,
       --region, {inputValue: region},
diff --git a/components/aws/sagemaker/train/src/train.py b/components/aws/sagemaker/train/src/train.py
index 448e86341b3b..a6ae3bdbf11b 100644
--- a/components/aws/sagemaker/train/src/train.py
+++ b/components/aws/sagemaker/train/src/train.py
@@ -19,35 +19,35 @@ def create_parser():
   parser = argparse.ArgumentParser(description='SageMaker Training Job')
   _utils.add_default_client_arguments(parser)
   
-  parser.add_argument('--job_name', type=str.strip, required=False, help='The name of the training job.', default='')
-  parser.add_argument('--role', type=str.strip, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
-  parser.add_argument('--image', type=str.strip, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
-  parser.add_argument('--algorithm_name', type=str.strip, required=False, help='The name of the resource algorithm to use for the training job.', default='')
-  parser.add_argument('--metric_definitions', type=_utils.str_to_json_dict, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default='{}')
-  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str.strip, help='The input mode that the algorithm supports. File or Pipe.', default='File')
-  parser.add_argument('--hyperparameters', type=_utils.str_to_json_dict, help='Dictionary of hyperparameters for the the algorithm.', default='{}')
-  parser.add_argument('--channels', type=_utils.str_to_json_list, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
+  parser.add_argument('--job_name', type=str, required=False, help='The name of the training job.', default='')
+  parser.add_argument('--role', type=str, required=True, help='The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.')
+  parser.add_argument('--image', type=str, required=True, help='The registry path of the Docker image that contains the training algorithm.', default='')
+  parser.add_argument('--algorithm_name', type=str, required=False, help='The name of the resource algorithm to use for the training job.', default='')
+  parser.add_argument('--metric_definitions', type=_utils.yaml_or_json_str, required=False, help='The dictionary of name-regex pairs specify the metrics that the algorithm emits.', default={})
+  parser.add_argument('--training_input_mode', choices=['File', 'Pipe'], type=str, help='The input mode that the algorithm supports. File or Pipe.', default='File')
+  parser.add_argument('--hyperparameters', type=_utils.yaml_or_json_str, help='Dictionary of hyperparameters for the the algorithm.', default={})
+  parser.add_argument('--channels', type=_utils.yaml_or_json_str, required=True, help='A list of dicts specifying the input channels. Must have at least one.')
   parser.add_argument('--instance_type', required=True, choices=['ml.m4.xlarge', 'ml.m4.2xlarge', 'ml.m4.4xlarge', 'ml.m4.10xlarge', 'ml.m4.16xlarge', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge', 'ml.m5.4xlarge',
     'ml.m5.12xlarge', 'ml.m5.24xlarge', 'ml.c4.xlarge', 'ml.c4.2xlarge', 'ml.c4.4xlarge', 'ml.c4.8xlarge', 'ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge', 'ml.p3.2xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge',
-    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str.strip, help='The ML compute instance type.', default='ml.m4.xlarge')
-  parser.add_argument('--instance_count', required=True, type=_utils.str_to_int, help='The registry path of the Docker image that contains the training algorithm.', default=1)
-  parser.add_argument('--volume_size', type=_utils.str_to_int, required=True, help='The size of the ML storage volume that you want to provision.', default=1)
-  parser.add_argument('--resource_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
-  parser.add_argument('--max_run_time', type=_utils.str_to_int, required=True, help='The maximum run time in seconds for the training job.', default=86400)
-  parser.add_argument('--model_artifact_path', type=str.strip, required=True, help='Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.')
-  parser.add_argument('--output_encryption_key', type=str.strip, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
-  parser.add_argument('--vpc_security_group_ids', type=str.strip, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
-  parser.add_argument('--vpc_subnets', type=str.strip, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
+    'ml.c5.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.c5.9xlarge', 'ml.c5.18xlarge'], type=str, help='The ML compute instance type.', default='ml.m4.xlarge')
+  parser.add_argument('--instance_count', required=True, type=int, help='The registry path of the Docker image that contains the training algorithm.', default=1)
+  parser.add_argument('--volume_size', type=int, required=True, help='The size of the ML storage volume that you want to provision.', default=1)
+  parser.add_argument('--resource_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s).', default='')
+  parser.add_argument('--max_run_time', type=int, required=True, help='The maximum run time in seconds for the training job.', default=86400)
+  parser.add_argument('--model_artifact_path', type=str, required=True, help='Identifies the S3 path where you want Amazon SageMaker to store the model artifacts.')
+  parser.add_argument('--output_encryption_key', type=str, required=False, help='The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts.', default='')
+  parser.add_argument('--vpc_security_group_ids', type=str, required=False, help='The VPC security group IDs, in the form sg-xxxxxxxx.')
+  parser.add_argument('--vpc_subnets', type=str, required=False, help='The ID of the subnets in the VPC to which you want to connect your hpo job.')
   parser.add_argument('--network_isolation', type=_utils.str_to_bool, required=False, help='Isolates the training container.', default=True)
   parser.add_argument('--traffic_encryption', type=_utils.str_to_bool, required=False, help='Encrypts all communications between ML compute instances in distributed training.', default=False)
 
   ### Start spot instance support
   parser.add_argument('--spot_instance', type=_utils.str_to_bool, required=False, help='Use managed spot training.', default=False)
-  parser.add_argument('--max_wait_time', type=_utils.str_to_int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
-  parser.add_argument('--checkpoint_config', type=_utils.str_to_json_dict, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default='{}')
+  parser.add_argument('--max_wait_time', type=int, required=False, help='The maximum time in seconds you are willing to wait for a managed spot training job to complete.', default=86400)
+  parser.add_argument('--checkpoint_config', type=_utils.yaml_or_json_str, required=False, help='Dictionary of information about the output location for managed spot training checkpoint data.', default={})
   ### End spot instance support
 
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
 
   return parser
 
diff --git a/components/aws/sagemaker/workteam/component.yaml b/components/aws/sagemaker/workteam/component.yaml
index af37ae614835..589f25d709c5 100644
--- a/components/aws/sagemaker/workteam/component.yaml
+++ b/components/aws/sagemaker/workteam/component.yaml
@@ -4,31 +4,40 @@ description: |
 inputs:
   - name: region
     description: 'The region where the cluster launches.'
+    type: String
   - name: team_name
     description: 'The name of your work team.'
+    type: String
   - name: description
     description: 'A description of the work team.'
+    type: String
   - name: user_pool
     description: 'An identifier for a user pool. The user pool must be in the same region as the service that you are calling.'
+    type: String
   - name: user_groups
     description: 'An identifier for a user group.'
+    type: String
   - name: client_id
     description: 'An identifier for an application client. You must create the app client ID using Amazon Cognito.'
+    type: String
   - name: sns_topic
     description: 'The ARN for the SNS topic to which notifications should be published.'
     default: ''
+    type: String
   - name: endpoint_url
     description: 'The endpoint URL for the private link VPC endpoint.'
     default: ''
+    type: String
   - name: tags
     description: 'Key-value pairs to categorize AWS resources.'
     default: '{}'
+    type: JsonObject
 outputs:
   - {name: workteam_arn, description: 'The ARN of the workteam.'}
 implementation:
   container:
-    image: amazon/aws-sagemaker-kfp-components:0.3.0
-    command: ['python']
+    image: amazon/aws-sagemaker-kfp-components:0.3.1
+    command: ['python3']
     args: [
       workteam.py,
       --region, {inputValue: region},
diff --git a/components/aws/sagemaker/workteam/src/workteam.py b/components/aws/sagemaker/workteam/src/workteam.py
index cda4d0a7d501..5f9b6caaf4d7 100644
--- a/components/aws/sagemaker/workteam/src/workteam.py
+++ b/components/aws/sagemaker/workteam/src/workteam.py
@@ -19,13 +19,13 @@ def create_parser():
   parser = argparse.ArgumentParser(description='SageMaker Hyperparameter Tuning Job')
   _utils.add_default_client_arguments(parser)
   
-  parser.add_argument('--team_name', type=str.strip, required=True, help='The name of your work team.')
-  parser.add_argument('--description', type=str.strip, required=True, help='A description of the work team.')
-  parser.add_argument('--user_pool', type=str.strip, required=False, help='An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='')
-  parser.add_argument('--user_groups', type=str.strip, required=False, help='A list of identifiers for user groups separated by commas.', default='')
-  parser.add_argument('--client_id', type=str.strip, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='')
-  parser.add_argument('--sns_topic', type=str.strip, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='')
-  parser.add_argument('--tags', type=_utils.str_to_json_dict, required=False, help='An array of key-value pairs, to categorize AWS resources.', default='{}')
+  parser.add_argument('--team_name', type=str, required=True, help='The name of your work team.')
+  parser.add_argument('--description', type=str, required=True, help='A description of the work team.')
+  parser.add_argument('--user_pool', type=str, required=False, help='An identifier for a user pool. The user pool must be in the same region as the service that you are calling.', default='')
+  parser.add_argument('--user_groups', type=str, required=False, help='A list of identifiers for user groups separated by commas.', default='')
+  parser.add_argument('--client_id', type=str, required=False, help='An identifier for an application client. You must create the app client ID using Amazon Cognito.', default='')
+  parser.add_argument('--sns_topic', type=str, required=False, help='The ARN for the SNS topic to which notifications should be published.', default='')
+  parser.add_argument('--tags', type=_utils.yaml_or_json_str, required=False, help='An array of key-value pairs, to categorize AWS resources.', default={})
 
   return parser
 
diff --git a/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py b/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
index efabfd7e637b..cba865328f65 100644
--- a/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
+++ b/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
@@ -51,18 +51,18 @@ def ground_truth_test(region='us-west-2',
     ground_truth_ui_template='s3://your-bucket-name/mini-image-classification/ground-truth-demo/instructions.template',
     ground_truth_title='Mini image classification',
     ground_truth_description='Test for Ground Truth KFP component',
-    ground_truth_num_workers_per_object='1',
-    ground_truth_time_limit='30',
-    ground_truth_task_availibility='3600',
-    ground_truth_max_concurrent_tasks='20',
+    ground_truth_num_workers_per_object=1,
+    ground_truth_time_limit=30,
+    ground_truth_task_availibility=3600,
+    ground_truth_max_concurrent_tasks=20,
     training_algorithm_name='image classification',
     training_input_mode='Pipe',
-    training_hyperparameters='{"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"}',
+    training_hyperparameters={"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"},
     training_output_location='s3://your-bucket-name/mini-image-classification/training-output',
     training_instance_type='ml.p2.xlarge',
-    training_instance_count='1',
-    training_volume_size='50',
-    training_max_run_time='3600',
+    training_instance_count=1,
+    training_volume_size=50,
+    training_max_run_time=3600,
     role_arn=''
     ):
 
diff --git a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
index c3cf49f14d95..4b94a182c3fb 100644
--- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
+++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
@@ -21,7 +21,6 @@
             'S3DataDistributionType': 'FullyReplicated'
         }
     },
-    'ContentType': '',
     'CompressionType': 'None',
     'RecordWrapperType': 'None',
     'InputMode': 'File'
@@ -44,37 +43,37 @@ def hpo_test(region='us-west-2',
     image='',
     algorithm_name='K-Means',
     training_input_mode='File',
-    metric_definitions='{}',
+    metric_definitions={},
     strategy='Bayesian',
     metric_name='test:msd',
     metric_type='Minimize',
     early_stopping_type='Off',
-    static_parameters='{"k": "10", "feature_dim": "784"}',
-    integer_parameters='[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \
-                         {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]',
-    continuous_parameters='[]',
-    categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]',
-    channels=json.dumps(channelObjList),
+    static_parameters={"k": "10", "feature_dim": "784"},
+    integer_parameters=[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \
+                         {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}],
+    continuous_parameters=[],
+    categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}],
+    channels=channelObjList,
     output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
     output_encryption_key='',
     instance_type='ml.p2.16xlarge',
-    instance_count='1',
-    volume_size='50',
-    max_num_jobs='1',
-    max_parallel_jobs='1',
+    instance_count=1,
+    volume_size=50,
+    max_num_jobs=1,
+    max_parallel_jobs=1,
     resource_encryption_key='',
-    max_run_time='3600',
+    max_run_time=3600,
     vpc_security_group_ids='',
     vpc_subnets='',
     endpoint_url='',
-    network_isolation='True',
-    traffic_encryption='False',
+    network_isolation=True,
+    traffic_encryption=False,
     warm_start_type='',
     parent_hpo_jobs='',
-    spot_instance='False',
-    max_wait_time='3600',
-    checkpoint_config='{}',
-    tags='{}',
+    spot_instance=False,
+    max_wait_time=3600,
+    checkpoint_config={},
+    tags={},
     role_arn='',
     ):
 
diff --git a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py
index f02d47b817d7..3b2003911f94 100644
--- a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py
+++ b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/mnist-classification-pipeline.py
@@ -26,7 +26,6 @@
             'S3DataDistributionType': 'FullyReplicated'
         }
     },
-    'ContentType': '',
     'CompressionType': 'None',
     'RecordWrapperType': 'None',
     'InputMode': 'File'
@@ -52,37 +51,37 @@ def mnist_classification(region='us-west-2',
     hpo_metric_name='test:msd',
     hpo_metric_type='Minimize',
     hpo_early_stopping_type='Off',
-    hpo_static_parameters='{"k": "10", "feature_dim": "784"}',
-    hpo_integer_parameters='[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]',
-    hpo_continuous_parameters='[]',
-    hpo_categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]',
-    hpo_channels=json.dumps(hpoChannels),
-    hpo_spot_instance='False',
-    hpo_max_wait_time='3600',
-    hpo_checkpoint_config='{}',
+    hpo_static_parameters={"k": "10", "feature_dim": "784"},
+    hpo_integer_parameters=[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}],
+    hpo_continuous_parameters=[],
+    hpo_categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}],
+    hpo_channels=hpoChannels,
+    hpo_spot_instance=False,
+    hpo_max_wait_time=3600,
+    hpo_checkpoint_config={},
     output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
     output_encryption_key='',
     instance_type='ml.p2.16xlarge',
-    instance_count='1',
-    volume_size='50',
-    hpo_max_num_jobs='9',
-    hpo_max_parallel_jobs='3',
-    max_run_time='3600',
+    instance_count=1,
+    volume_size=50,
+    hpo_max_num_jobs=9,
+    hpo_max_parallel_jobs=3,
+    max_run_time=3600,
     endpoint_url='',
-    network_isolation='True',
-    traffic_encryption='False',
-    train_channels=json.dumps(trainChannels),
-    train_spot_instance='False',
-    train_max_wait_time='3600',
-    train_checkpoint_config='{}',
+    network_isolation=True,
+    traffic_encryption=False,
+    train_channels=trainChannels,
+    train_spot_instance=False,
+    train_max_wait_time=3600,
+    train_checkpoint_config={},
     batch_transform_instance_type='ml.m4.xlarge',
     batch_transform_input='s3://kubeflow-pipeline-data/mnist_kmeans_example/input',
     batch_transform_data_type='S3Prefix',
     batch_transform_content_type='text/csv',
     batch_transform_compression_type='None',
     batch_transform_ouput='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
-    batch_transform_max_concurrent='4',
-    batch_transform_max_payload='6',
+    batch_transform_max_concurrent=4,
+    batch_transform_max_payload=6,
     batch_strategy='MultiRecord',
     batch_transform_split_type='Line',
     role_arn=''
diff --git a/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py b/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py
index d21320fdf4a8..a07c087c85b7 100644
--- a/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py
+++ b/samples/contrib/aws-samples/simple_train_pipeline/training-pipeline.py
@@ -20,7 +20,6 @@
             'S3DataDistributionType': 'FullyReplicated'
         }
     },
-    'ContentType': '',
     'CompressionType': 'None',
     'RecordWrapperType': 'None',
     'InputMode': 'File'
@@ -40,19 +39,19 @@ def training(
         endpoint_url='',
         image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1',
         training_input_mode='File',
-        hyperparameters='{"k": "10", "feature_dim": "784"}',
-        channels=json.dumps(channelObjList),
+        hyperparameters={"k": "10", "feature_dim": "784"},
+        channels=channelObjList,
         instance_type='ml.p2.xlarge',
-        instance_count='1',
-        volume_size='50',
-        max_run_time='3600',
+        instance_count=1,
+        volume_size=50,
+        max_run_time=3600,
         model_artifact_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/data',
         output_encryption_key='',
-        network_isolation='True',
-        traffic_encryption='False',
-        spot_instance='False',
-        max_wait_time='3600',
-        checkpoint_config='{}',
+        network_isolation=True,
+        traffic_encryption=False,
+        spot_instance=False,
+        max_wait_time=3600,
+        checkpoint_config={},
         role=''
         ):
     training = sagemaker_train_op(