diff --git a/.gitignore b/.gitignore index b1d84f8597aa..a016121ec31f 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,5 @@ _artifacts # Generated Python SDK documentation docs/_build +# Any environment variable files +**/*/.env diff --git a/components/aws/sagemaker/codebuild/integration-test.buildspec.yml b/components/aws/sagemaker/codebuild/integration-test.buildspec.yml index 0ca12b06c618..e8d6a66975db 100644 --- a/components/aws/sagemaker/codebuild/integration-test.buildspec.yml +++ b/components/aws/sagemaker/codebuild/integration-test.buildspec.yml @@ -8,7 +8,7 @@ phases: # Run the container and copy the results to /tmp # Passes all host environment variables through to the container - docker run --name integration-test-container $(env | cut -f1 -d= | sed 's/^/-e /') amazon/integration-test-image - - docker cp integration-test-container:/app/tests/integration_tests/integration_tests.log /tmp/results.xml + - docker cp integration-test-container:/tests/integration_tests/integration_tests.log /tmp/results.xml - docker rm -f integration-test-container reports: diff --git a/components/aws/sagemaker/tests/integration_tests/.env.example b/components/aws/sagemaker/tests/integration_tests/.env.example new file mode 100644 index 000000000000..fbef162e38df --- /dev/null +++ b/components/aws/sagemaker/tests/integration_tests/.env.example @@ -0,0 +1,12 @@ +# If you would like to override the credentials for the container +# AWS_ACCESS_KEY_ID= +# AWS_SECRET_ACCESS_KEY= +# AWS_SESSION_TOKEN= + +REGION=us-east-1 + +SAGEMAKER_EXECUTION_ROLE_ARN=arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole-Example +S3_DATA_BUCKET=my-data-bucket + +# If you hope to use an existing EKS cluster, rather than creating a new one. +# EKS_EXISTING_CLUSTER=sagemaker-kfp-2020-05-13-21-12-56-eks-cluster \ No newline at end of file diff --git a/components/aws/sagemaker/tests/integration_tests/Dockerfile b/components/aws/sagemaker/tests/integration_tests/Dockerfile new file mode 100644 index 000000000000..39bbd2993337 --- /dev/null +++ b/components/aws/sagemaker/tests/integration_tests/Dockerfile @@ -0,0 +1,50 @@ +FROM continuumio/miniconda:4.7.12 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + wget \ + git \ + python \ + python-pip \ + vim \ + sudo \ + jq + +# Install AWSCLI +RUN pip install awscli + +# Install eksctl +RUN curl --location "https://github.com/weaveworks/eksctl/releases/download/0.19.0/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp \ + && mv /tmp/eksctl /usr/local/bin + +# Install aws-iam-authenticator +RUN curl -S -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.8/2020-04-16/bin/linux/amd64/aws-iam-authenticator \ + && chmod +x /usr/local/bin/aws-iam-authenticator + +# Install Kubectl +RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.18.0/bin/linux/amd64/kubectl \ + && chmod +x ./kubectl \ + && mv ./kubectl /usr/local/bin/kubectl + +# Install Argo CLI +RUN curl -sSL -o /usr/local/bin/argo https://github.com/argoproj/argo/releases/download/v2.8.0/argo-linux-amd64 \ + && chmod +x /usr/local/bin/argo + +# Copy conda environment early to avoid cache busting +COPY ./sagemaker/tests/integration_tests/environment.yml environment.yml + +# Create conda environment for running tests and set as start-up environment +RUN conda env create -f environment.yml +RUN echo "source activate kfp_test_env" > ~/.bashrc +ENV PATH "/opt/conda/envs/kfp_test_env/bin":$PATH + +# Environment variables to be used by tests +ENV REGION="us-west-2" +ENV SAGEMAKER_EXECUTION_ROLE_ARN="arn:aws:iam::1234567890:role/sagemaker-role" +ENV S3_DATA_BUCKET="kfp-test-data" +ENV MINIO_LOCAL_PORT=9000 +ENV KFP_NAMESPACE="kubeflow" + +COPY ./sagemaker/ . + +ENTRYPOINT [ "/bin/bash", "./tests/integration_tests/scripts/run_integration_tests" ] \ No newline at end of file diff --git a/components/aws/sagemaker/tests/integration_tests/scripts/generate_iam_role b/components/aws/sagemaker/tests/integration_tests/scripts/generate_iam_role new file mode 100755 index 000000000000..c2f84b399ef6 --- /dev/null +++ b/components/aws/sagemaker/tests/integration_tests/scripts/generate_iam_role @@ -0,0 +1,68 @@ +#!/usr/bin/env bash + +# Helper script to generate an IAM Role needed to install operator using role-based authentication. +# +# Run as: +# $ ./generate_iam_role ${cluster_arn/cluster_name} ${role_name} ${cluster_region} [optional: ${service_namespace} ${service_account}] +# + +CLUSTER_ARN="${1}" +ROLE_NAME="${2}" +CLUSTER_REGION="${3:-us-east-1}" +SERVICE_NAMESPACE="${4:-kubeflow}" +SERVICE_ACCOUNT="${5:-pipeline-runner}" +aws_account=$(aws sts get-caller-identity --query Account --output text) +trustfile="trust.json" + +cwd=$(dirname $(realpath $0)) + +# if using an existing cluster, use the cluster arn to get the region and cluster name +# example, cluster_arn=arn:aws:eks:us-east-1:12345678910:cluster/test +cluster_name=$(echo ${CLUSTER_ARN} | cut -d'/' -f2) + +# A function to get the OIDC_ID associated with an EKS cluster +function get_oidc_id { + # TODO: Ideally this should be based on version compatibility instead of command failure + eksctl utils associate-iam-oidc-provider --cluster ${cluster_name} --region ${CLUSTER_REGION} --approve + if [[ $? -ge 1 ]]; then + eksctl utils associate-iam-oidc-provider --name ${cluster_name} --region ${CLUSTER_REGION} --approve + fi + + local oidc=$(aws eks describe-cluster --name ${cluster_name} --region ${CLUSTER_REGION} --query cluster.identity.oidc.issuer --output text) + oidc_id=$(echo ${oidc} | rev | cut -d'/' -f1 | rev) +} + +# A function that generates an IAM role for the given account, cluster, namespace, region +# Parameter: +# $1: Name of the trust file to generate. +function create_namespaced_iam_role { + local trustfile="${1}" + # Check if role already exists + aws iam get-role --role-name ${ROLE_NAME} + if [[ $? -eq 0 ]]; then + echo "A role for this cluster and namespace already exists in this account, assuming sagemaker access and proceeding." + else + echo "IAM Role does not exist, creating a new Role for the cluster" + aws iam create-role --role-name ${ROLE_NAME} --assume-role-policy-document file://${trustfile} --output=text --query "Role.Arn" + aws iam attach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess + fi +} + +# Remove the generated trust file +# Parameter: +# $1: Name of the trust file to delete. +function delete_generated_file { + rm "${1}" +} + +echo "Get the OIDC ID for the cluster" +get_oidc_id +echo "Delete the trust json file if it already exists" +delete_generated_file "${trustfile}" +echo "Generate a trust json" +"$cwd"/generate_trust_policy ${CLUSTER_REGION} ${aws_account} ${oidc_id} ${SERVICE_NAMESPACE} ${SERVICE_ACCOUNT} > "${trustfile}" +echo "Create the IAM Role using these values" +create_namespaced_iam_role "${trustfile}" +echo "Cleanup for the next run" +delete_generated_file "${trustfile}" + diff --git a/components/aws/sagemaker/tests/integration_tests/scripts/generate_trust_policy b/components/aws/sagemaker/tests/integration_tests/scripts/generate_trust_policy new file mode 100755 index 000000000000..712d4bfad91a --- /dev/null +++ b/components/aws/sagemaker/tests/integration_tests/scripts/generate_trust_policy @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Helper script to generate trust policy needed to install operator using role-based authentication. +# +# Run as: +# $ ./generate_trust_policy ${EKS_CLUSTER_REGION} ${AWS_ACCOUNT_ID} ${OIDC_ID} ${SERVICE_NAMESPACE} ${SERVICE_ACCOUNT} > trust.json +# +# For example: +# $ ./generate_trust_policy us-west-2 123456789012 D48675832CA65BD10A532F597OIDCID > trust.json +# This will create a file `trust.json` containing a role policy that enables the operator in an EKS cluster to assume AWS roles. +# +# The SERVICE_NAMESPACE parameter is for when you want to run Kubeflow in a custom namespace other than "kubeflow". +# The SERVICE_ACCOUNT parameter is for when you want to give permissions to a service account other than the default "pipeline-runner". + +cluster_region="$1" +account_number="$2" +oidc_id="$3" +service_namespace="${4}" +service_account="${5}" + +printf '{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::'"${account_number}"':oidc-provider/oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"'" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"':aud": "sts.amazonaws.com", + "oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"':sub": "system:serviceaccount:'"${service_namespace}"':'"${service_account}"'" + } + } + } + ] +} +' \ No newline at end of file diff --git a/components/aws/sagemaker/tests/integration_tests/scripts/run_integration_tests b/components/aws/sagemaker/tests/integration_tests/scripts/run_integration_tests new file mode 100755 index 000000000000..cc6725152ff5 --- /dev/null +++ b/components/aws/sagemaker/tests/integration_tests/scripts/run_integration_tests @@ -0,0 +1,150 @@ +#!/usr/bin/env bash + +set -u +set -o pipefail + +usage(){ + echo "Usage: $0 -n [-r ]" + exit 1 +} + +cwd=$(dirname $(realpath $0)) + +### Input parameters +DEPLOY_NAME="sagemaker-kfp-"$(date '+%Y-%m-%d-%H-%M-%S')"" # The name given to the entire deployment (tagging all resources) +REGION=${REGION:-"$(aws configure get region)"} # Deployment region + +### Configuration parameters +EKS_EXISTING_CLUSTER=${EKS_EXISTING_CLUSTER:-""} # Use an existing EKS cluster +EKS_CLUSTER_VERSION=${EKS_CLUSTER_VERSION:-"1.15"} # EKS cluster K8s version +EKS_NODE_COUNT=${EKS_NODE_COUNT:-"1"} # The initial node count of the EKS cluster +EKS_PUBLIC_SUBNETS=${EKS_PUBLIC_SUBNETS:-""} +EKS_PRIVATE_SUBNETS=${EKS_PRIVATE_SUBNETS:-""} + +### Testing parameters +MINIO_LOCAL_PORT=${MINIO_LOCAL_PORT:-9000} +KFP_NAMESPACE=${KFP_NAMESPACE:-"kubeflow"} +KFP_SERVICE_ACCOUNT=${KFP_SERVICE_ACCOUNT:-"pipeline-runner"} + +PYTEST_MARKER=${PYTEST_MARKER:-""} +S3_DATA_BUCKET=${S3_DATA_BUCKET:-"kfp-test-data"} +SAGEMAKER_EXECUTION_ROLE_ARN=${SAGEMAKER_EXECUTION_ROLE_ARN:-""} + +while getopts ":n:r:" opt; do + case $opt in + n) + DEPLOY_NAME="$OPTARG" + ;; + r) + REGION="$OPTARG" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + exit 1 + ;; + esac +done + +# Ensure a deployment name was specified +if [ "$DEPLOY_NAME" == "" ]; then + echo "Missing deployment name" + usage + exit 1 +fi + +function cleanup() { + set +e + + cleanup_kfp + delete_generated_role + + if [[ -z "${EKS_EXISTING_CLUSTER}" ]]; then + delete_eks + fi +} + +# Set the trap to clean up resources in the case of an error +trap cleanup EXIT +set -e + +function launch_eks() { + EKS_CLUSTER_NAME="${DEPLOY_NAME}-eks-cluster" + + echo "[Creating EKS] Launching EKS cluster $EKS_CLUSTER_NAME" + + eksctl_args=( --managed --nodes "${EKS_NODE_COUNT}" --node-type=c5.xlarge --timeout=30m --region "${REGION}" --auto-kubeconfig --version "${EKS_CLUSTER_VERSION}" ) + [ ! -z "${EKS_PUBLIC_SUBNETS}" ] && eksctl_args+=( --vpc-public-subnets="${EKS_PUBLIC_SUBNETS}" ) + [ ! -z "${EKS_PRIVATE_SUBNETS}" ] && eksctl_args+=( --vpc-private-subnets="${EKS_PRIVATE_SUBNETS}" ) + + eksctl create cluster "${EKS_CLUSTER_NAME}" "${eksctl_args[@]}" + + aws eks update-kubeconfig --name "$EKS_CLUSTER_NAME" --region "$REGION" + + echo "[Creating EKS] $EKS_CLUSTER_NAME launched" +} + +function delete_eks() { + eksctl delete cluster --name "${EKS_CLUSTER_NAME}" +} + +function install_kfp() { + echo "[Installing KFP] Applying KFP manifests" + + PIPELINE_VERSION=0.5.1 + kubectl apply -k github.com/kubeflow/pipelines/manifests/kustomize/cluster-scoped-resources?ref=$PIPELINE_VERSION + kubectl wait --for condition=established --timeout=60s crd/applications.app.k8s.io + kubectl apply -k github.com/kubeflow/pipelines/manifests/kustomize/env/dev?ref=$PIPELINE_VERSION + + echo "[Installing KFP] Port-forwarding Minio" + + kubectl port-forward -n kubeflow svc/minio-service $MINIO_LOCAL_PORT:9000 & + MINIO_PID=$! + + echo "[Installing KFP] Minio port-forwarded to ${MINIO_LOCAL_PORT}" +} + +function generate_iam_role_name() { + OIDC_ROLE_NAME="$(echo "${DEPLOY_NAME}-kubeflow-role" | cut -c1-64)" + OIDC_ROLE_ARN="arn:aws:iam::$(aws sts get-caller-identity --query=Account --output=text):role/${OIDC_ROLE_NAME}" +} + +function install_generated_role() { + kubectl patch serviceaccount -n ${KFP_NAMESPACE} ${KFP_SERVICE_ACCOUNT} --patch '{"metadata": {"annotations": {"eks.amazonaws.com/role-arn": "'"${OIDC_ROLE_ARN}"'"}}}' +} + +function delete_generated_role() { + # Delete the role associated with the cluster thats being deleted + aws iam detach-role-policy --role-name "${OIDC_ROLE_NAME}" --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess + aws iam delete-role --role-name "${OIDC_ROLE_NAME}" +} + +function cleanup_kfp() { + # Clean up Minio + if [[ ! -z "${MINIO_PID}" ]]; then + kill -9 $MINIO_PID + fi +} + +if [[ -z "${EKS_EXISTING_CLUSTER}" ]]; then + launch_eks +else + aws eks update-kubeconfig --name "${EKS_EXISTING_CLUSTER}" --region "$REGION" + EKS_CLUSTER_NAME="${EKS_EXISTING_CLUSTER}" + DEPLOY_NAME="${EKS_EXISTING_CLUSTER}" +fi + +generate_iam_role_name +"$cwd"/generate_iam_role ${EKS_CLUSTER_NAME} ${OIDC_ROLE_NAME} ${REGION} ${KFP_NAMESPACE} ${KFP_SERVICE_ACCOUNT} +install_kfp +install_generated_role + +set -x + +pytest_args=( --region "${REGION}" --role-arn "${SAGEMAKER_EXECUTION_ROLE_ARN}" --s3-data-bucket "${S3_DATA_BUCKET}" --minio-service-port "${MINIO_LOCAL_PORT}" --kfp-namespace "${KFP_NAMESPACE}" ) +[ ! -z "${PYTEST_MARKER}" ] && pytest_args+=( -m "${PYTEST_MARKER}" ) + +cd tests/integration_tests && pytest "${pytest_args[@]}" --junitxml ./integration_tests.log -n $(nproc) \ No newline at end of file