Skip to content

Commit

Permalink
update eks e2e script and jenkins file
Browse files Browse the repository at this point in the history
  • Loading branch information
cofyc committed Mar 12, 2020
1 parent c3ff1be commit cc19c38
Show file tree
Hide file tree
Showing 9 changed files with 353 additions and 38 deletions.
179 changes: 179 additions & 0 deletions ci/aws-clean-eks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#!/bin/bash

# Copyright 2020 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

#
# aws-k8s-tester cannot clean all resources created when some error happened.
# This script is used to clean resources created by aws-k8s-tester in our CI.
#
# DO NOT USE THIS SCRIPT FOR OTHER USES!
#

function iam_role_exists() {
local roleName="$1"
for n in $(aws iam list-roles --no-paginate --query 'Roles[*].RoleName' --output text); do
if [[ "$roleName" == "$n" ]]; then
return 0
fi
done
return 1
}

function delete_role() {
local roleName="$1"
if ! iam_role_exists "$roleName"; then
echo "info: IAM role '$roleName' does not exist, skipped"
return
fi
# detach role policies first
for policyArn in $(aws iam list-attached-role-policies --role-name "$roleName" --query 'AttachedPolicies[*].PolicyArn' --output text); do
aws iam detach-role-policy --role-name "$roleName" --policy-arn "$policyArn"
done
aws iam delete-role --role-name "$roleName"
}

# delete-vpc cannot delete its dependencies, so we need to remove one by one
# https://github.com/aws/aws-cli/issues/1721
# https://aws.amazon.com/premiumsupport/knowledge-center/troubleshoot-dependency-error-delete-vpc/
function delete_vpc() {
local vpc="$1"
echo "info: checking associated instances"
for id in $(aws ec2 describe-instances --filters 'Name=vpc-id,Values='$vpc --query 'Reservations[*].Instances[*].InstanceId' --output text); do
echo aws ec2 delete-instance --group-id $id
done
echo "info: checking associated security groups"
while IFS=$'\n' read -r line; do
read -r id name <<< $line
if [[ "$name" != "default" ]]; then
aws ec2 delete-security-group --group-id $id
fi
done <<< $(aws ec2 describe-security-groups --filters 'Name=vpc-id,Values='$vpc --query 'SecurityGroups[*].{Name:GroupName,Id:GroupId}' --output text)
echo "info: checking associated internet gateways"
for id in $(aws ec2 describe-internet-gateways --filters 'Name=attachment.vpc-id,Values='$vpc --query 'InternetGateways[*].InternetGatewayId' --output text); do
aws ec2 delete-internet-gateway --internet-gateway-id $id
done
echo "info: checking associated subnets"
for id in $(aws ec2 describe-subnets --filters 'Name=vpc-id,Values='$vpc --query 'Subnets[*].SubnetId' --output text); do
aws ec2 delete-subnet --subnet-id $id
done
echo "info: checking associated route tables"
for id in $(aws ec2 describe-route-tables --filters 'Name=vpc-id,Values='$vpc --query 'RouteTables[*].RouteTableId' --output text); do
aws ec2 delete-route-table --route-table-id $id
done
echo "info: checking associated network acls"
for id in $(aws ec2 describe-network-acls --filters 'Name=vpc-id,Values='$vpc --query 'NetworkAcls[*].NetworkAclId' --output text); do
aws ec2 delete-network-acl --network-acl-id $id
done
echo "info: checking associated vpc endpoints"
for id in $(aws ec2 describe-vpc-endpoints --filters 'Name=vpc-id,Values='$vpc --query 'VpcEndpoints[*].VpcEndpointId' --output text); do
aws ec2 delete-vpc-endpoint --vpc-endpoint-id $id
done
echo "info: checking associated nat gateways"
for id in $(aws ec2 describe-nat-gateways --filter 'Name=vpc-id,Values='$vpc --query 'NatGateways[*].NatGatewayId' --output text); do
aws ec2 delete-nat-gateway --nat-gateway-id $id
done
# echo "info: checking associated vpn connections"
# for id in $(aws ec2 describe-vpn-connections --filters 'Name=vpc-id,Values='$vpc --query 'VpcConnections[*].VpcConnectionId' --output text); do
# aws ec2 delete-vpn-connection --vpn-connection-id $id
# done
# echo "info: checking associated vpn gateways"
# for id in $(aws ec2 describe-vpn-gateways --filters 'Name=vpc-id,Values='$vpc --query 'VpcConnections[*].VpcConnectionId' --output text); do
# aws ec2 delete-vpn-gateway --vpn-gateway-id $id
# done
echo "info: checking associated network interfaces"
for id in $(aws ec2 describe-network-interfaces --filters 'Name=vpc-id,Values='$vpc --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do
aws ec2 delete-network-interface --network-interface-id $id
done
}

function get_stacks() {
aws cloudformation list-stacks --stack-status-filter CREATE_COMPLETE DELETE_FAILED --query 'StackSummaries[*].StackName' --output text
}

function delete_ec2_key_pair() {
local name="$1"
local n=$(aws ec2 describe-key-pairs --key-names $name --output text 2>/dev/null | wc -l)
if [ "$n" -gt 0 ]; then
aws ec2 delete-key-pair --key-name $name
fi
}

function clean_eks() {
echo "info: deleting mng stack"
local regex='^'$CLUSTER'-mng-[0-9]+$'
local mngStack=
for stackName in $(get_stacks); do
if [[ ! "$stackName" =~ $regex ]]; then
continue
fi
mngStack=$stackName
break
done
if [ -n "$mngStack" ]; then
echo "info: mng stack found '$mngStack', deleting it"
aws cloudformation delete-stack --stack-name $mngStack
aws cloudformation wait stack-delete-complete --stack-name $mngStack
else
echo "info: mng stack not found, skipped"
fi

echo "info: deleting cluster/cluster-role/mng-role/vpc stacks"
local stacks=(
$CLUSTER-cluster
$CLUSTER-role-cluster
$CLUSTER-role-mng
$CLUSTER-vpc
)
for stack in ${stacks[@]}; do
echo "info: deleting stack $stack"
aws cloudformation delete-stack --stack-name $stack
aws cloudformation wait stack-delete-complete --stack-name $stack
done

echo "info: clean missing resources"

# local vpcId=$(aws ec2 describe-vpcs --filter "Name=tag:Name,Values=$CLUSTER-vpc-VPC" --query 'Vpcs[0].VpcId' --output text 2>/dev/null)
# if [[ -n "$vpcId" && "$vpcId" != "None" ]]; then
# echo "info: vpc $vpcId found for cluster $CLUSTER, deleting it"
# delete_vpc $vpcId
# fi

# delete_role $CLUSTER-cluster-role
# delete_role $CLUSTER-mng-role

# echo "info: deleting keypair for node group: $CLUSTER-ssh"
# delete_ec2_key_pair $CLUSTER-ssh
}

# https://github.com/aws/aws-cli#other-configurable-variables
if [ -n "${AWS_REGION}" ]; then
export AWS_DEFAULT_REGION=${AWS_REGION:-}
fi

aws sts get-caller-identity
if [ $? -ne 0 ]; then
echo "error: failed to get caller identity"
exit 1
fi

for CLUSTER in $@; do
echo "info: start to clean eks test cluster '$CLUSTER'"
clean_eks "$CLUSTER"
if [ $? -eq 0 ]; then
echo "info: succesfully cleaned the eks test cluster '$CLUSTER'"
else
echo "fatal: failed to clean the eks test cluster '$CLUSTER'"
exit 1
fi
done
133 changes: 133 additions & 0 deletions ci/e2e_eks.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//
// Jenkins pipeline for EKS e2e job.
//
// This script is written in declarative syntax. Refer to
// https://jenkins.io/doc/book/pipeline/syntax/ for more details.
//
// Note that parameters of the job is configured in this script.
//

import groovy.transform.Field

@Field
def podYAML = '''
apiVersion: v1
kind: Pod
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
- sleep
- 1d
# we need privileged mode in order to do docker in docker
securityContext:
privileged: true
env:
- name: DOCKER_IN_DOCKER_ENABLED
value: "true"
resources:
requests:
memory: "4000Mi"
cpu: 2000m
volumeMounts:
# dind expects /var/lib/docker to be volume
- name: docker-root
mountPath: /var/lib/docker
volumes:
- name: docker-root
emptyDir: {}
'''

pipeline {
agent {
kubernetes {
yaml podYAML
defaultContainer "main"
customWorkspace "/home/jenkins/agent/workspace/go/src/github.com/pingcap/tidb-operator"
}
}

parameters {
string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url')
string(name: 'GIT_REF', defaultValue: 'master', description: 'git ref spec to checkout, e.g. master, release-1.1')
string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889')
string(name: 'CLUSTER', defaultValue: 'jenkins-tidb-operator-e2e', description: 'the name of the cluster')
string(name: 'AWS_REGION', defaultValue: 'us-west-2', description: 'the AWS region')
string(name: 'GINKGO_NODES', defaultValue: '8', description: 'the number of ginkgo nodes')
}

environment {
GIT_REF = ''
ARTIFACTS = "${env.WORKSPACE}/artifacts"
}

stages {
stage("Prepare") {
steps {
// The declarative model for Jenkins Pipelines has a restricted
// subset of syntax that it allows in the stage blocks. We use
// script step to bypass the restriction.
// https://jenkins.io/doc/book/pipeline/syntax/#script
script {
GIT_REF = params.GIT_REF
if (params.PR_ID != "") {
GIT_REF = "refs/remotes/origin/pr/${params.PR_ID}/head"
}
}
echo "env.NODE_NAME: ${env.NODE_NAME}"
echo "env.WORKSPACE: ${env.WORKSPACE}"
echo "GIT_REF: ${GIT_REF}"
echo "ARTIFACTS: ${ARTIFACTS}"
}
}

stage("Checkout") {
steps {
checkout scm: [
$class: 'GitSCM',
branches: [[name: GIT_REF]],
userRemoteConfigs: [[
credentialsId: 'github-sre-bot-ssh',
refspec: '+refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/*',
url: "${params.GIT_URL}",
]]
]
}
}

stage("Run") {
steps {
withCredentials([
string(credentialsId: 'TIDB_OPERATOR_AWS_ACCESS_KEY_ID', variable: 'AWS_ACCESS_KEY_ID'),
string(credentialsId: 'TIDB_OPERATOR_AWS_SECRET_ACCESS_KEY', variable: 'AWS_SECRET_ACCESS_KEY'),
]) {
sh """
#!/bin/bash
export PROVIDER=eks
export CLUSTER=${params.CLUSTER}
export AWS_REGION=${params.AWS_REGION}
export GINKGO_NODES=${params.GINKGO_NODES}
export REPORT_DIR=${ARTIFACTS}
echo "info: try to clean the cluster created previously"
./ci/aws-clean-eks.sh \$CLUSTER
echo "info: begin to run e2e"
./hack/e2e.sh -- --ginkgo.skip='\\[Serial\\]' --ginkgo.focus='\\[tidb-operator\\]'
"""
}
}
}
}

post {
always {
dir(ARTIFACTS) {
archiveArtifacts artifacts: "**", allowEmptyArchive: true
junit testResults: "*.xml", allowEmptyResults: true
}
}
}
}

// vim: et sw=4 ts=4
2 changes: 1 addition & 1 deletion ci/e2e_gke.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ kind: Pod
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20191108-9467d02-master
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
- sleep
Expand Down
2 changes: 1 addition & 1 deletion ci/pingcap_tidb_operator_build_kind.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ metadata:
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20191108-9467d02-master
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
# Clean containers on TERM signal in root process to avoid cgroup leaking.
Expand Down
15 changes: 6 additions & 9 deletions hack/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,9 @@ echo "GCP_PROJECT: $GCP_PROJECT"
echo "GCP_CREDENTIALS: $GCP_CREDENTIALS"
echo "GCP_REGION: $GCP_REGION"
echo "GCP_ZONE: $GCP_ZONE"
echo "AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID"
echo "AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY"
# We shouldn't print aws credential environments.
# echo "AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID"
# echo "AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY"
echo "AWS_REGION: $AWS_REGION"
echo "KUBE_VERSION: $KUBE_VERSION"
echo "KUBE_WORKERS: $KUBE_WORKERS"
Expand Down Expand Up @@ -477,16 +478,12 @@ elif [ "$PROVIDER" == "eks" ]; then
mngName=$CLUSTER-mng-$RANDOM
export AWS_K8S_TESTER_EKS_NAME=$CLUSTER
export AWS_K8S_TESTER_EKS_CONFIG_PATH=/tmp/kubetest2.eks.$CLUSTER
export AWS_K8S_TESTER_EKS_ADD_ON_NLB_HELLO_WORLD_ENABLE="false"
export AWS_K8S_TESTER_EKS_PARAMETERS_VERSION="1.15"
export AWS_K8S_TESTER_EKS_PARAMETERS_ENCRYPTION_CMK_CREATE="false"
export AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ENABLE="true"
export AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS=$(printf '{"%s":{"name":"%s","ami-type":"AL2_x86_64","asg-min-size":%d,"asg-max-size":%d,"asg-desired-capacity":%d,"instance-types":["c5.xlarge"],"volume-size":40}}' "$mngName" "$mngName" "$KUBE_WORKERS" "$KUBE_WORKERS" "$KUBE_WORKERS")
# override KUBECONFIG
KUBECONFIG=$AWS_K8S_TESTER_EKS_CONFIG_PATH.kubeconfig.yaml
if [ -z "$SKIP_UP" ]; then
# clear previous created private key to work around permission issue on this file
if test -f $HOME/.ssh/kube_aws_rsa; then
rm -f $HOME/.ssh/kube_aws_rsa
fi
fi
else
echo "error: unsupported provider '$PROVIDER'"
exit 1
Expand Down
4 changes: 2 additions & 2 deletions hack/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ KIND_VERSION=${KIND_VERSION:-0.7.0}
KIND_BIN=$OUTPUT_BIN/kind
KUBETEST2_VERSION=v0.0.8
KUBETSTS2_BIN=$OUTPUT_BIN/kubetest2
AWS_K8S_TESTER_VERSION=v0.6.2
AWS_K8S_TESTER_VERSION=v0.7.4
AWS_K8S_TESTER_BIN=$OUTPUT_BIN/aws-k8s-tester

test -d "$OUTPUT_BIN" || mkdir -p "$OUTPUT_BIN"
Expand Down Expand Up @@ -184,7 +184,7 @@ function hack::ensure_kubetest2() {

function hack::verify_aws_k8s_tester() {
if test -x $AWS_K8S_TESTER_BIN; then
[[ "$($AWS_K8S_TESTER_BIN version | awk '/ReleaseVersion/ {print $2}')" == "$AWS_K8S_TESTER_VERSION" ]]
[[ "$($AWS_K8S_TESTER_BIN version | jq '."release-version"' -r)" == "$AWS_K8S_TESTER_VERSION" ]]
return
fi
return 1
Expand Down
Loading

0 comments on commit cc19c38

Please sign in to comment.