Skip to content

Commit

Permalink
Add periodical e2e job for EKS (#1915) (#1926)
Browse files Browse the repository at this point in the history
  • Loading branch information
sre-bot authored Mar 13, 2020
1 parent 165c5b6 commit f5abbf9
Show file tree
Hide file tree
Showing 9 changed files with 287 additions and 38 deletions.
105 changes: 105 additions & 0 deletions ci/aws-clean-eks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/bin/bash

# Copyright 2020 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

#
# aws-k8s-tester cannot clean all resources created when some error happened.
# This script is used to clean resources created by aws-k8s-tester in our CI.
#
# DO NOT USE THIS SCRIPT FOR OTHER USES!
#

function get_stacks() {
aws cloudformation list-stacks --stack-status-filter CREATE_COMPLETE DELETE_FAILED --query 'StackSummaries[*].StackName' --output text
}

function fix_eks_mng_deletion_issues() {
local cluster="$1"
local mng="$2"
while IFS=$'\n' read -r line; do
read -r code resourceIds <<< $line
if [ "$code" == "Ec2SecurityGroupDeletionFailure" ]; then
echo "info: clear security group '$resourceIds'"
for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$resourceIds" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do
echo "info: clear leaked network interfaces '$eni'"
aws ec2 delete-network-interface --network-interface-id "$eni"
done
aws ec2 delete-security-group --group-id $resourceIds
fi
done <<< $(aws eks describe-nodegroup --cluster-name "$cluster" --nodegroup-name "$mng" --query 'nodegroup.health.issues' --output json | jq -r '.[].resourceIds |= join(",") | .[] | "\(.code)\t\(.resourceIds)"')
}

function clean_eks() {
local CLUSTER="$1"
echo "info: deleting mng stack"
local regex='^'$CLUSTER'-mng-[0-9]+$'
local mngStack=
for stackName in $(get_stacks); do
if [[ ! "$stackName" =~ $regex ]]; then
continue
fi
mngStack=$stackName
break
done
if [ -n "$mngStack" ]; then
echo "info: mng stack found '$mngStack', deleting it"
aws cloudformation delete-stack --stack-name $mngStack
aws cloudformation wait stack-delete-complete --stack-name $mngStack
if [ $? -ne 0 ]; then
echo "error: failed to delete mng stack '$mngStack', delete related resource first"
for mngName in $(aws eks list-nodegroups --cluster-name jenkins-tidb-operator-e2e2 --query 'nodegroups[*]' --output text); do
fix_eks_mng_deletion_issues "$CLUSTER" $mngName
done
aws cloudformation delete-stack --stack-name $mngStack
aws cloudformation wait stack-delete-complete --stack-name $mngStack
fi
else
echo "info: mng stack not found, skipped"
fi

echo "info: deleting cluster/cluster-role/mng-role/vpc stacks"
local stacks=(
$CLUSTER-cluster
$CLUSTER-role-cluster
$CLUSTER-role-mng
$CLUSTER-vpc
)
for stack in ${stacks[@]}; do
echo "info: deleting stack $stack"
aws cloudformation delete-stack --stack-name $stack
aws cloudformation wait stack-delete-complete --stack-name $stack
done
}

# https://github.com/aws/aws-cli#other-configurable-variables
if [ -n "${AWS_REGION}" ]; then
export AWS_DEFAULT_REGION=${AWS_REGION:-}
fi

aws sts get-caller-identity
if [ $? -ne 0 ]; then
echo "error: failed to get caller identity"
exit 1
fi

for CLUSTER in $@; do
echo "info: start to clean eks test cluster '$CLUSTER'"
clean_eks "$CLUSTER"
if [ $? -eq 0 ]; then
echo "info: succesfully cleaned the eks test cluster '$CLUSTER'"
else
echo "fatal: failed to clean the eks test cluster '$CLUSTER'"
exit 1
fi
done
137 changes: 137 additions & 0 deletions ci/e2e_eks.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//
// Jenkins pipeline for EKS e2e job.
//
// This script is written in declarative syntax. Refer to
// https://jenkins.io/doc/book/pipeline/syntax/ for more details.
//
// Note that parameters of the job is configured in this script.
//

import groovy.transform.Field

@Field
def podYAML = '''
apiVersion: v1
kind: Pod
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
- sleep
- 1d
# we need privileged mode in order to do docker in docker
securityContext:
privileged: true
env:
- name: DOCKER_IN_DOCKER_ENABLED
value: "true"
resources:
requests:
memory: "4000Mi"
cpu: 2000m
volumeMounts:
# dind expects /var/lib/docker to be volume
- name: docker-root
mountPath: /var/lib/docker
volumes:
- name: docker-root
emptyDir: {}
'''

pipeline {
agent {
kubernetes {
yaml podYAML
defaultContainer "main"
customWorkspace "/home/jenkins/agent/workspace/go/src/github.com/pingcap/tidb-operator"
}
}

options {
timeout(time: 3, unit: 'HOURS')
}

parameters {
string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url')
string(name: 'GIT_REF', defaultValue: 'master', description: 'git ref spec to checkout, e.g. master, release-1.1')
string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889')
string(name: 'CLUSTER', defaultValue: 'jenkins-tidb-operator-e2e', description: 'the name of the cluster')
string(name: 'AWS_REGION', defaultValue: 'us-west-2', description: 'the AWS region')
string(name: 'GINKGO_NODES', defaultValue: '8', description: 'the number of ginkgo nodes')
}

environment {
GIT_REF = ''
ARTIFACTS = "${env.WORKSPACE}/artifacts"
}

stages {
stage("Prepare") {
steps {
// The declarative model for Jenkins Pipelines has a restricted
// subset of syntax that it allows in the stage blocks. We use
// script step to bypass the restriction.
// https://jenkins.io/doc/book/pipeline/syntax/#script
script {
GIT_REF = params.GIT_REF
if (params.PR_ID != "") {
GIT_REF = "refs/remotes/origin/pr/${params.PR_ID}/head"
}
}
echo "env.NODE_NAME: ${env.NODE_NAME}"
echo "env.WORKSPACE: ${env.WORKSPACE}"
echo "GIT_REF: ${GIT_REF}"
echo "ARTIFACTS: ${ARTIFACTS}"
}
}

stage("Checkout") {
steps {
checkout scm: [
$class: 'GitSCM',
branches: [[name: GIT_REF]],
userRemoteConfigs: [[
credentialsId: 'github-sre-bot-ssh',
refspec: '+refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/*',
url: "${params.GIT_URL}",
]]
]
}
}

stage("Run") {
steps {
withCredentials([
string(credentialsId: 'TIDB_OPERATOR_AWS_ACCESS_KEY_ID', variable: 'AWS_ACCESS_KEY_ID'),
string(credentialsId: 'TIDB_OPERATOR_AWS_SECRET_ACCESS_KEY', variable: 'AWS_SECRET_ACCESS_KEY'),
]) {
sh """
#!/bin/bash
export PROVIDER=eks
export CLUSTER=${params.CLUSTER}
export AWS_REGION=${params.AWS_REGION}
export GINKGO_NODES=${params.GINKGO_NODES}
export REPORT_DIR=${ARTIFACTS}
echo "info: try to clean the cluster created previously"
./ci/aws-clean-eks.sh \$CLUSTER
echo "info: begin to run e2e"
./hack/e2e.sh -- --ginkgo.skip='\\[Serial\\]' --ginkgo.focus='\\[tidb-operator\\]'
"""
}
}
}
}

post {
always {
dir(ARTIFACTS) {
archiveArtifacts artifacts: "**", allowEmptyArchive: true
junit testResults: "*.xml", allowEmptyResults: true
}
}
}
}

// vim: et sw=4 ts=4
6 changes: 5 additions & 1 deletion ci/e2e_gke.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ kind: Pod
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20191108-9467d02-master
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
- sleep
Expand Down Expand Up @@ -49,6 +49,10 @@ pipeline {
}
}

options {
timeout(time: 3, unit: 'HOURS')
}

parameters {
string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url')
string(name: 'GIT_REF', defaultValue: 'master', description: 'git ref spec to checkout, e.g. master, release-1.1')
Expand Down
2 changes: 1 addition & 1 deletion ci/pingcap_tidb_operator_build_kind.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ metadata:
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20191108-9467d02-master
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
# Clean containers on TERM signal in root process to avoid cgroup leaking.
Expand Down
15 changes: 6 additions & 9 deletions hack/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,9 @@ echo "GCP_PROJECT: $GCP_PROJECT"
echo "GCP_CREDENTIALS: $GCP_CREDENTIALS"
echo "GCP_REGION: $GCP_REGION"
echo "GCP_ZONE: $GCP_ZONE"
echo "AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID"
echo "AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY"
# We shouldn't print aws credential environments.
# echo "AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID"
# echo "AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY"
echo "AWS_REGION: $AWS_REGION"
echo "KUBE_VERSION: $KUBE_VERSION"
echo "KUBE_WORKERS: $KUBE_WORKERS"
Expand Down Expand Up @@ -473,16 +474,12 @@ elif [ "$PROVIDER" == "eks" ]; then
mngName=$CLUSTER-mng-$RANDOM
export AWS_K8S_TESTER_EKS_NAME=$CLUSTER
export AWS_K8S_TESTER_EKS_CONFIG_PATH=/tmp/kubetest2.eks.$CLUSTER
export AWS_K8S_TESTER_EKS_ADD_ON_NLB_HELLO_WORLD_ENABLE="false"
export AWS_K8S_TESTER_EKS_PARAMETERS_VERSION="1.15"
export AWS_K8S_TESTER_EKS_PARAMETERS_ENCRYPTION_CMK_CREATE="false"
export AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ENABLE="true"
export AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS=$(printf '{"%s":{"name":"%s","ami-type":"AL2_x86_64","asg-min-size":%d,"asg-max-size":%d,"asg-desired-capacity":%d,"instance-types":["c5.xlarge"],"volume-size":40}}' "$mngName" "$mngName" "$KUBE_WORKERS" "$KUBE_WORKERS" "$KUBE_WORKERS")
# override KUBECONFIG
KUBECONFIG=$AWS_K8S_TESTER_EKS_CONFIG_PATH.kubeconfig.yaml
if [ -z "$SKIP_UP" ]; then
# clear previous created private key to work around permission issue on this file
if test -f $HOME/.ssh/kube_aws_rsa; then
rm -f $HOME/.ssh/kube_aws_rsa
fi
fi
else
echo "error: unsupported provider '$PROVIDER'"
exit 1
Expand Down
4 changes: 2 additions & 2 deletions hack/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ KIND_VERSION=${KIND_VERSION:-0.7.0}
KIND_BIN=$OUTPUT_BIN/kind
KUBETEST2_VERSION=v0.0.8
KUBETSTS2_BIN=$OUTPUT_BIN/kubetest2
AWS_K8S_TESTER_VERSION=v0.6.2
AWS_K8S_TESTER_VERSION=v0.7.4
AWS_K8S_TESTER_BIN=$OUTPUT_BIN/aws-k8s-tester

test -d "$OUTPUT_BIN" || mkdir -p "$OUTPUT_BIN"
Expand Down Expand Up @@ -184,7 +184,7 @@ function hack::ensure_kubetest2() {

function hack::verify_aws_k8s_tester() {
if test -x $AWS_K8S_TESTER_BIN; then
[[ "$($AWS_K8S_TESTER_BIN version | awk '/ReleaseVersion/ {print $2}')" == "$AWS_K8S_TESTER_VERSION" ]]
[[ "$($AWS_K8S_TESTER_BIN version | jq '."release-version"' -r)" == "$AWS_K8S_TESTER_VERSION" ]]
return
fi
return 1
Expand Down
Loading

0 comments on commit f5abbf9

Please sign in to comment.