Skip to content

Commit

Permalink
Integration tests for AWS SageMaker Components (#3654)
Browse files Browse the repository at this point in the history
* integration tests for aws sagemaker components with comment

* address comment related to S3 dataset creation

* rev3: bug fix in conda env yaml and resuse sagemaker method to get image URI

* Add createModel test

	- reduce code duplication
	- add some utility methods
  • Loading branch information
surajkota authored May 7, 2020
1 parent e5bd2df commit 6beab22
Show file tree
Hide file tree
Showing 16 changed files with 622 additions and 0 deletions.
5 changes: 5 additions & 0 deletions components/aws/sagemaker/tests/integration_tests/.flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[flake8]
max-line-length = 120
extend-ignore =
# See https://github.com/PyCQA/pycodestyle/issues/373
E203,
42 changes: 42 additions & 0 deletions components/aws/sagemaker/tests/integration_tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
## Requirements
1. [Conda](https://docs.conda.io/en/latest/miniconda.html)
1. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/)
1. Argo CLI: [Mac](https://github.com/argoproj/homebrew-tap), [Linux](https://eksworkshop.com/advanced/410_batch/install/)
1. K8s cluster with Kubeflow pipelines > 0.4.0 installed
1. [IAM Role](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) with a SageMakerFullAccess and S3FullAccess
1. IAM User credentials with SageMakerFullAccess permissions

## Creating S3 buckets with datasets

Change the bucket name and run the python script `[s3_sample_data_creator.py](https://github.com/kubeflow/pipelines/tree/master/samples/contrib/aws-samples/mnist-kmeans-sagemaker#the-sample-dataset)` to create S3 buckets with mnist dataset in the region where you want to run the tests

## Step to run integration tests
1. Configure AWS credentials with access to EKS cluster
1. Fetch kubeconfig to `~/.kube/config` or set `KUBECONFIG` environment variable to point to kubeconfig of the cluster
1. Create a [secret](https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/) named `aws-secret` in kubeflow namespace with credentials of IAM User for SageMakerFullAccess
```yaml
apiVersion: v1
kind: Secret
metadata:
name: aws-secret
namespace: kubeflow
type: Opaque
data:
AWS_ACCESS_KEY_ID: YOUR_BASE64_ACCESS_KEY
AWS_SECRET_ACCESS_KEY: YOUR_BASE64_SECRET_ACCESS
```
> Note: To get base64 string, try `echo -n $AWS_ACCESS_KEY_ID | base64`
1. Create conda environment using environment.yml for running tests. Run `conda env create -f environment.yml`
1. Activate the conda environment `conda activate kfp_test_env`
1. Run port-forward to minio service in background. Example: `kubectl port-forward svc/minio-service 9000:9000 -n kubeflow &`
1. Provide the following arguments to pytest:
1. `region`: AWS region where test will run. Default - us-west-2
1. `role-arn`: SageMaker execution IAM role ARN
1. `s3-data-bucket`: Regional S3 bucket in which test data is hosted
1. `minio-service-port`: Localhost port to which minio service is mapped to. Default - 9000
1. `kfp-namespace`: Cluster namespace where kubeflow pipelines is installed. Default - Kubeflow
1. cd into this directory and run
```
pytest --region <> --role-arn <> --s3-data-bucket <> --minio-service-port <> --kfp-namespace <>
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import pytest
import os
import utils

from utils import kfp_client_utils
from utils import minio_utils
from utils import sagemaker_utils


@pytest.mark.parametrize("test_file_dir", ["resources/config/kmeans-mnist-model"])
def test_createmodel(kfp_client, experiment_id, sagemaker_client, test_file_dir):

download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated"))
test_params = utils.load_params(
utils.replace_placeholders(
os.path.join(test_file_dir, "config.yaml"),
os.path.join(download_dir, "config.yaml"),
)
)

# Generate random prefix for model name to avoid errors if model with same name exists
test_params["Arguments"]["model_name"] = input_model_name = (
utils.generate_random_string(5) + "-" + test_params["Arguments"]["model_name"]
)

run_id, status, workflow_json = kfp_client_utils.compile_run_monitor_pipeline(
kfp_client,
experiment_id,
test_params["PipelineDefinition"],
test_params["Arguments"],
download_dir,
test_params["TestName"],
test_params["Timeout"],
)

outputs = {"sagemaker-create-model": ["model_name"]}

output_files = minio_utils.artifact_download_iterator(
workflow_json, outputs, download_dir
)

output_model_name = utils.extract_information(
output_files["sagemaker-create-model"]["model_name"], "model_name.txt"
)
print(f"model_name: {output_model_name.decode()}")
assert output_model_name.decode() == input_model_name
assert (
sagemaker_utils.describe_model(sagemaker_client, input_model_name) is not None
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import pytest
import os
import json
import utils
from utils import kfp_client_utils
from utils import minio_utils
from utils import sagemaker_utils


@pytest.mark.parametrize("test_file_dir", ["resources/config/simple-mnist-training"])
def test_trainingjob(kfp_client, experiment_id, sagemaker_client, test_file_dir):

download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated"))
test_params = utils.load_params(
utils.replace_placeholders(
os.path.join(test_file_dir, "config.yaml"),
os.path.join(download_dir, "config.yaml"),
)
)

test_params["Arguments"]["hyperparameters"] = json.dumps(
test_params["Arguments"]["hyperparameters"]
)
test_params["Arguments"]["channels"] = json.dumps(
test_params["Arguments"]["channels"]
)
run_id, status, workflow_json = kfp_client_utils.compile_run_monitor_pipeline(
kfp_client,
experiment_id,
test_params["PipelineDefinition"],
test_params["Arguments"],
download_dir,
test_params["TestName"],
test_params["Timeout"],
)

outputs = {"sagemaker-training-job": ["job_name", "model_artifact_url"]}
output_files = minio_utils.artifact_download_iterator(
workflow_json, outputs, download_dir
)

# Verify Training job was successful on SageMaker
training_job_name = utils.extract_information(
output_files["sagemaker-training-job"]["job_name"], "job_name.txt"
)
print(f"training job name: {training_job_name}")
train_response = sagemaker_utils.describe_training_job(
sagemaker_client, training_job_name.decode()
)
assert train_response["TrainingJobStatus"] == "Completed"

# Verify model artifacts output was generated from this run
model_artifact_url = utils.extract_information(
output_files["sagemaker-training-job"]["model_artifact_url"],
"model_artifact_url.txt",
)
print(f"model_artifact_url: {model_artifact_url}")
assert (
model_artifact_url.decode()
== train_response["ModelArtifacts"]["S3ModelArtifacts"]
)
assert (
train_response["ModelArtifacts"]["S3ModelArtifacts"]
in model_artifact_url.decode()
)
97 changes: 97 additions & 0 deletions components/aws/sagemaker/tests/integration_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import pytest
import boto3
import kfp
import os
import utils

from datetime import datetime


def pytest_addoption(parser):
parser.addoption(
"--region",
default="us-west-2",
required=False,
help="AWS region where test will run",
)
parser.addoption(
"--role-arn", required=True, help="SageMaker execution IAM role ARN",
)
parser.addoption(
"--s3-data-bucket",
required=True,
help="Regional S3 bucket name in which test data is hosted",
)
parser.addoption(
"--minio-service-port",
default="9000",
required=False,
help="Localhost port to which minio service is mapped to",
)
parser.addoption(
"--kfp-namespace",
default="kubeflow",
required=False,
help="Cluster namespace where kubeflow pipelines is installed",
)


@pytest.fixture(scope="session", autouse=True)
def region(request):
os.environ["AWS_REGION"] = request.config.getoption("--region")
return request.config.getoption("--region")


@pytest.fixture(scope="session", autouse=True)
def role_arn(request):
os.environ["ROLE_ARN"] = request.config.getoption("--role-arn")
return request.config.getoption("--role-arn")


@pytest.fixture(scope="session", autouse=True)
def s3_data_bucket(request):
os.environ["S3_DATA_BUCKET"] = request.config.getoption("--s3-data-bucket")
return request.config.getoption("--s3-data-bucket")


@pytest.fixture(scope="session", autouse=True)
def minio_service_port(request):
os.environ["MINIO_SERVICE_PORT"] = request.config.getoption("--minio-service-port")
return request.config.getoption("--minio-service-port")


@pytest.fixture(scope="session", autouse=True)
def kfp_namespace(request):
os.environ["NAMESPACE"] = request.config.getoption("--kfp-namespace")
return request.config.getoption("--kfp-namespace")


@pytest.fixture(scope="session")
def boto3_session(region):
return boto3.Session(region_name=region)


@pytest.fixture(scope="session")
def sagemaker_client(boto3_session):
return boto3_session.client(service_name="sagemaker")


@pytest.fixture(scope="session")
def s3_client(boto3_session):
return boto3_session.client(service_name="s3")


@pytest.fixture(scope="session")
def kfp_client():
kfp_installed_namespace = utils.get_kfp_namespace()
return kfp.Client(namespace=kfp_installed_namespace)


@pytest.fixture(scope="session")
def experiment_id(kfp_client):
exp_name = datetime.now().strftime("%Y-%m-%d")
try:
experiment = kfp_client.get_experiment(experiment_name=exp_name)
except ValueError:
experiment = kfp_client.create_experiment(name=exp_name)
return experiment.id
20 changes: 20 additions & 0 deletions components/aws/sagemaker/tests/integration_tests/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: kfp_test_env
channels:
- conda-forge
- defaults
dependencies:
- python 3.7.*
- pip 20.0.*
- awscli 1.18.*
- boto3 1.12.*
- pytest 5.*
- pyyaml 5.3.*
- flake8 3.7.*
- flake8-black 0.1.*
- pip:
- kubernetes==11.0.*
- kfp==0.5.*
- minio==5.0.10
- sagemaker==1.56.*


2 changes: 2 additions & 0 deletions components/aws/sagemaker/tests/integration_tests/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
addopts = -rA
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
PipelineDefinition: resources/definition/create_model_pipeline.py
TestName: kmeans-create-model-test
Timeout: 300
Arguments:
region: ((REGION))
model_name: kmeans-mnist-model
image: ((KMEANS_REGISTRY)).dkr.ecr.((REGION)).amazonaws.com/kmeans:1
model_artifact_url: s3://((DATA_BUCKET))/mnist_kmeans_example/model/kmeans-mnist-model/model.tar.gz
network_isolation: "True"
role: ((ROLE_ARN))

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
PipelineDefinition: resources/definition/training_pipeline.py
TestName: simple-mnist-training
Timeout: 3600
Arguments:
region: ((REGION))
image: ((KMEANS_REGISTRY)).dkr.ecr.((REGION)).amazonaws.com/kmeans:1
training_input_mode: File
hyperparameters:
k: "10"
feature_dim: "784"
channels:
- ChannelName: train
DataSource:
S3DataSource:
S3Uri: s3://((DATA_BUCKET))/mnist_kmeans_example/data
S3DataType: S3Prefix
S3DataDistributionType: FullyReplicated
CompressionType: None
RecordWrapperType: None
InputMode: File
instance_type: ml.p2.xlarge
instance_count: 1
volume_size: 50
max_run_time: 3600
model_artifact_path: s3://((DATA_BUCKET))/mnist_kmeans_example/output
network_isolation: "True"
traffic_encryption: "False"
spot_instance: "False"
max_wait_time: 3600
checkpoint_config: "{}"
role: ((ROLE_ARN))
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import kfp
from kfp import components
from kfp import dsl
from kfp.aws import use_aws_secret

sagemaker_model_op = components.load_component_from_file("../../model/component.yaml")


@dsl.pipeline(
name="Create Model in SageMaker", description="SageMaker model component test"
)
def create_model_pipeline(
region="",
endpoint_url="",
image="",
model_name="",
model_artifact_url="",
network_isolation="",
role="",
):
sagemaker_model_op(
region=region,
endpoint_url=endpoint_url,
model_name=model_name,
image=image,
model_artifact_url=model_artifact_url,
network_isolation=network_isolation,
role=role,
).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))


if __name__ == "__main__":
kfp.compiler.Compiler().compile(
create_model_pipeline, "SageMaker_create_model_pipeline" + ".yaml"
)
Loading

0 comments on commit 6beab22

Please sign in to comment.