Skip to content

Commit

Permalink
# This is a combination of 5 commits.
Browse files Browse the repository at this point in the history
# This is the 1st commit message:

Add initial scripts

# This is the commit message kubeflow#2:

Add working pytest script

# This is the commit message kubeflow#3:

Add initial scripts

# This is the commit message kubeflow#4:

Add environment variable files

# This is the commit message kubeflow#5:

Remove old cluster script
  • Loading branch information
Nicholas Thomson committed May 15, 2020
1 parent 3d2c6ea commit d5979a3
Show file tree
Hide file tree
Showing 7 changed files with 322 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,5 @@ _artifacts
# Generated Python SDK documentation
docs/_build

# Any environment variable files
**/*/.env
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ phases:
# Run the container and copy the results to /tmp
# Passes all host environment variables through to the container
- docker run --name integration-test-container $(env | cut -f1 -d= | sed 's/^/-e /') amazon/integration-test-image
- docker cp integration-test-container:/app/tests/integration_tests/integration_tests.log /tmp/results.xml
- docker cp integration-test-container:/tests/integration_tests/integration_tests.log /tmp/results.xml
- docker rm -f integration-test-container

reports:
Expand Down
12 changes: 12 additions & 0 deletions components/aws/sagemaker/tests/integration_tests/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# If you would like to override the credentials for the container
# AWS_ACCESS_KEY_ID=
# AWS_SECRET_ACCESS_KEY=
# AWS_SESSION_TOKEN=

REGION=us-east-1

SAGEMAKER_EXECUTION_ROLE_ARN=arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole-Example
S3_DATA_BUCKET=my-data-bucket

# If you hope to use an existing EKS cluster, rather than creating a new one.
# EKS_EXISTING_CLUSTER=sagemaker-kfp-2020-05-13-21-12-56-eks-cluster
50 changes: 50 additions & 0 deletions components/aws/sagemaker/tests/integration_tests/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
FROM continuumio/miniconda:4.7.12

RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
wget \
git \
python \
python-pip \
vim \
sudo \
jq

# Install AWSCLI
RUN pip install awscli

# Install eksctl
RUN curl --location "https://github.com/weaveworks/eksctl/releases/download/0.19.0/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp \
&& mv /tmp/eksctl /usr/local/bin

# Install aws-iam-authenticator
RUN curl -S -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3.us-west-2.amazonaws.com/1.16.8/2020-04-16/bin/linux/amd64/aws-iam-authenticator \
&& chmod +x /usr/local/bin/aws-iam-authenticator

# Install Kubectl
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.18.0/bin/linux/amd64/kubectl \
&& chmod +x ./kubectl \
&& mv ./kubectl /usr/local/bin/kubectl

# Install Argo CLI
RUN curl -sSL -o /usr/local/bin/argo https://github.com/argoproj/argo/releases/download/v2.8.0/argo-linux-amd64 \
&& chmod +x /usr/local/bin/argo

# Copy conda environment early to avoid cache busting
COPY ./sagemaker/tests/integration_tests/environment.yml environment.yml

# Create conda environment for running tests and set as start-up environment
RUN conda env create -f environment.yml
RUN echo "source activate kfp_test_env" > ~/.bashrc
ENV PATH "/opt/conda/envs/kfp_test_env/bin":$PATH

# Environment variables to be used by tests
ENV REGION="us-west-2"
ENV SAGEMAKER_EXECUTION_ROLE_ARN="arn:aws:iam::1234567890:role/sagemaker-role"
ENV S3_DATA_BUCKET="kfp-test-data"
ENV MINIO_LOCAL_PORT=9000
ENV KFP_NAMESPACE="kubeflow"

COPY ./sagemaker/ .

ENTRYPOINT [ "/bin/bash", "./tests/integration_tests/scripts/run_integration_tests" ]
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env bash

# Helper script to generate an IAM Role needed to install operator using role-based authentication.
#
# Run as:
# $ ./generate_iam_role ${cluster_arn/cluster_name} ${role_name} ${cluster_region} [optional: ${service_namespace} ${service_account}]
#

CLUSTER_ARN="${1}"
ROLE_NAME="${2}"
CLUSTER_REGION="${3:-us-east-1}"
SERVICE_NAMESPACE="${4:-kubeflow}"
SERVICE_ACCOUNT="${5:-pipeline-runner}"
aws_account=$(aws sts get-caller-identity --query Account --output text)
trustfile="trust.json"

cwd=$(dirname $(realpath $0))

# if using an existing cluster, use the cluster arn to get the region and cluster name
# example, cluster_arn=arn:aws:eks:us-east-1:12345678910:cluster/test
cluster_name=$(echo ${CLUSTER_ARN} | cut -d'/' -f2)

# A function to get the OIDC_ID associated with an EKS cluster
function get_oidc_id {
# TODO: Ideally this should be based on version compatibility instead of command failure
eksctl utils associate-iam-oidc-provider --cluster ${cluster_name} --region ${CLUSTER_REGION} --approve
if [[ $? -ge 1 ]]; then
eksctl utils associate-iam-oidc-provider --name ${cluster_name} --region ${CLUSTER_REGION} --approve
fi

local oidc=$(aws eks describe-cluster --name ${cluster_name} --region ${CLUSTER_REGION} --query cluster.identity.oidc.issuer --output text)
oidc_id=$(echo ${oidc} | rev | cut -d'/' -f1 | rev)
}

# A function that generates an IAM role for the given account, cluster, namespace, region
# Parameter:
# $1: Name of the trust file to generate.
function create_namespaced_iam_role {
local trustfile="${1}"
# Check if role already exists
aws iam get-role --role-name ${ROLE_NAME}
if [[ $? -eq 0 ]]; then
echo "A role for this cluster and namespace already exists in this account, assuming sagemaker access and proceeding."
else
echo "IAM Role does not exist, creating a new Role for the cluster"
aws iam create-role --role-name ${ROLE_NAME} --assume-role-policy-document file://${trustfile} --output=text --query "Role.Arn"
aws iam attach-role-policy --role-name ${ROLE_NAME} --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
fi
}

# Remove the generated trust file
# Parameter:
# $1: Name of the trust file to delete.
function delete_generated_file {
rm "${1}"
}

echo "Get the OIDC ID for the cluster"
get_oidc_id
echo "Delete the trust json file if it already exists"
delete_generated_file "${trustfile}"
echo "Generate a trust json"
"$cwd"/generate_trust_policy ${CLUSTER_REGION} ${aws_account} ${oidc_id} ${SERVICE_NAMESPACE} ${SERVICE_ACCOUNT} > "${trustfile}"
echo "Create the IAM Role using these values"
create_namespaced_iam_role "${trustfile}"
echo "Cleanup for the next run"
delete_generated_file "${trustfile}"

Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash

# Helper script to generate trust policy needed to install operator using role-based authentication.
#
# Run as:
# $ ./generate_trust_policy ${EKS_CLUSTER_REGION} ${AWS_ACCOUNT_ID} ${OIDC_ID} ${SERVICE_NAMESPACE} ${SERVICE_ACCOUNT} > trust.json
#
# For example:
# $ ./generate_trust_policy us-west-2 123456789012 D48675832CA65BD10A532F597OIDCID > trust.json
# This will create a file `trust.json` containing a role policy that enables the operator in an EKS cluster to assume AWS roles.
#
# The SERVICE_NAMESPACE parameter is for when you want to run Kubeflow in a custom namespace other than "kubeflow".
# The SERVICE_ACCOUNT parameter is for when you want to give permissions to a service account other than the default "pipeline-runner".

cluster_region="$1"
account_number="$2"
oidc_id="$3"
service_namespace="${4}"
service_account="${5}"

printf '{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Federated": "arn:aws:iam::'"${account_number}"':oidc-provider/oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"'"
},
"Action": "sts:AssumeRoleWithWebIdentity",
"Condition": {
"StringEquals": {
"oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"':aud": "sts.amazonaws.com",
"oidc.eks.'"${cluster_region}"'.amazonaws.com/id/'"${oidc_id}"':sub": "system:serviceaccount:'"${service_namespace}"':'"${service_account}"'"
}
}
}
]
}
'
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env bash

set -u
set -o pipefail

usage(){
echo "Usage: $0 -n <deployment name> [-r <region>]"
exit 1
}

cwd=$(dirname $(realpath $0))

### Input parameters
DEPLOY_NAME="sagemaker-kfp-"$(date '+%Y-%m-%d-%H-%M-%S')"" # The name given to the entire deployment (tagging all resources)
REGION=${REGION:-"$(aws configure get region)"} # Deployment region

### Configuration parameters
EKS_EXISTING_CLUSTER=${EKS_EXISTING_CLUSTER:-""} # Use an existing EKS cluster
EKS_CLUSTER_VERSION=${EKS_CLUSTER_VERSION:-"1.15"} # EKS cluster K8s version
EKS_NODE_COUNT=${EKS_NODE_COUNT:-"1"} # The initial node count of the EKS cluster
EKS_PUBLIC_SUBNETS=${EKS_PUBLIC_SUBNETS:-""}
EKS_PRIVATE_SUBNETS=${EKS_PRIVATE_SUBNETS:-""}

### Testing parameters
MINIO_LOCAL_PORT=${MINIO_LOCAL_PORT:-9000}
KFP_NAMESPACE=${KFP_NAMESPACE:-"kubeflow"}
KFP_SERVICE_ACCOUNT=${KFP_SERVICE_ACCOUNT:-"pipeline-runner"}

PYTEST_MARKER=${PYTEST_MARKER:-""}
S3_DATA_BUCKET=${S3_DATA_BUCKET:-"kfp-test-data"}
SAGEMAKER_EXECUTION_ROLE_ARN=${SAGEMAKER_EXECUTION_ROLE_ARN:-""}

while getopts ":n:r:" opt; do
case $opt in
n)
DEPLOY_NAME="$OPTARG"
;;
r)
REGION="$OPTARG"
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done

# Ensure a deployment name was specified
if [ "$DEPLOY_NAME" == "" ]; then
echo "Missing deployment name"
usage
exit 1
fi

function cleanup() {
set +e

cleanup_kfp
delete_generated_role

if [[ -z "${EKS_EXISTING_CLUSTER}" ]]; then
delete_eks
fi
}

# Set the trap to clean up resources in the case of an error
trap cleanup EXIT
set -e

function launch_eks() {
EKS_CLUSTER_NAME="${DEPLOY_NAME}-eks-cluster"

echo "[Creating EKS] Launching EKS cluster $EKS_CLUSTER_NAME"

eksctl_args=( --managed --nodes "${EKS_NODE_COUNT}" --node-type=c5.xlarge --timeout=30m --region "${REGION}" --auto-kubeconfig --version "${EKS_CLUSTER_VERSION}" )
[ ! -z "${EKS_PUBLIC_SUBNETS}" ] && eksctl_args+=( --vpc-public-subnets="${EKS_PUBLIC_SUBNETS}" )
[ ! -z "${EKS_PRIVATE_SUBNETS}" ] && eksctl_args+=( --vpc-private-subnets="${EKS_PRIVATE_SUBNETS}" )

eksctl create cluster "${EKS_CLUSTER_NAME}" "${eksctl_args[@]}"

aws eks update-kubeconfig --name "$EKS_CLUSTER_NAME" --region "$REGION"

echo "[Creating EKS] $EKS_CLUSTER_NAME launched"
}

function delete_eks() {
eksctl delete cluster --name "${EKS_CLUSTER_NAME}"
}

function install_kfp() {
echo "[Installing KFP] Applying KFP manifests"

PIPELINE_VERSION=0.5.1
kubectl apply -k github.com/kubeflow/pipelines/manifests/kustomize/cluster-scoped-resources?ref=$PIPELINE_VERSION
kubectl wait --for condition=established --timeout=60s crd/applications.app.k8s.io
kubectl apply -k github.com/kubeflow/pipelines/manifests/kustomize/env/dev?ref=$PIPELINE_VERSION

echo "[Installing KFP] Port-forwarding Minio"

kubectl port-forward -n kubeflow svc/minio-service $MINIO_LOCAL_PORT:9000 &
MINIO_PID=$!

echo "[Installing KFP] Minio port-forwarded to ${MINIO_LOCAL_PORT}"
}

function generate_iam_role_name() {
OIDC_ROLE_NAME="$(echo "${DEPLOY_NAME}-kubeflow-role" | cut -c1-64)"
OIDC_ROLE_ARN="arn:aws:iam::$(aws sts get-caller-identity --query=Account --output=text):role/${OIDC_ROLE_NAME}"
}

function install_generated_role() {
kubectl patch serviceaccount -n ${KFP_NAMESPACE} ${KFP_SERVICE_ACCOUNT} --patch '{"metadata": {"annotations": {"eks.amazonaws.com/role-arn": "'"${OIDC_ROLE_ARN}"'"}}}'
}

function delete_generated_role() {
# Delete the role associated with the cluster thats being deleted
aws iam detach-role-policy --role-name "${OIDC_ROLE_NAME}" --policy-arn arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
aws iam delete-role --role-name "${OIDC_ROLE_NAME}"
}

function cleanup_kfp() {
# Clean up Minio
if [[ ! -z "${MINIO_PID}" ]]; then
kill -9 $MINIO_PID
fi
}

if [[ -z "${EKS_EXISTING_CLUSTER}" ]]; then
launch_eks
else
aws eks update-kubeconfig --name "${EKS_EXISTING_CLUSTER}" --region "$REGION"
EKS_CLUSTER_NAME="${EKS_EXISTING_CLUSTER}"
DEPLOY_NAME="${EKS_EXISTING_CLUSTER}"
fi

generate_iam_role_name
"$cwd"/generate_iam_role ${EKS_CLUSTER_NAME} ${OIDC_ROLE_NAME} ${REGION} ${KFP_NAMESPACE} ${KFP_SERVICE_ACCOUNT}
install_kfp
install_generated_role

set -x

pytest_args=( --region "${REGION}" --role-arn "${SAGEMAKER_EXECUTION_ROLE_ARN}" --s3-data-bucket "${S3_DATA_BUCKET}" --minio-service-port "${MINIO_LOCAL_PORT}" --kfp-namespace "${KFP_NAMESPACE}" )
[ ! -z "${PYTEST_MARKER}" ] && pytest_args+=( -m "${PYTEST_MARKER}" )

cd tests/integration_tests && pytest "${pytest_args[@]}" --junitxml ./integration_tests.log -n $(nproc)

0 comments on commit d5979a3

Please sign in to comment.