From 1ff4d5d1a2401657bbf1229c38266b624874850c Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Thu, 23 Jul 2020 14:11:48 -0400 Subject: [PATCH] Introduce automated performance testing. (#1068) * Add version info to file, display start of performance tests. * Scale up node group before running 5000 pod test. * Create unique mng names. * Update data files for performance tests. * Add failure checking for performance tests. * Upload files to corresponding folders in s3 bucket. * Check for slow performance update. * Weekly performance test (midnight Wednesday) --- .circleci/config.yml | 79 ++++++++ scripts/lib/cluster.sh | 11 +- scripts/lib/common.sh | 8 +- scripts/lib/performance_tests.sh | 298 +++++++++++++++++++++++++++++++ scripts/run-integration-tests.sh | 41 +++-- test/integration/README.md | 70 ++++++++ testdata/deploy-130-pods.yaml | 26 +++ testdata/deploy-5000-pods.yaml | 26 +++ testdata/deploy-730-pods.yaml | 26 +++ 9 files changed, 570 insertions(+), 15 deletions(-) create mode 100644 scripts/lib/performance_tests.sh create mode 100644 test/integration/README.md create mode 100644 testdata/deploy-130-pods.yaml create mode 100644 testdata/deploy-5000-pods.yaml create mode 100644 testdata/deploy-730-pods.yaml diff --git a/.circleci/config.yml b/.circleci/config.yml index da8bb249ea..ebf7bac4d9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -82,6 +82,70 @@ jobs: - store_artifacts: path: /tmp/cni-test + performance_test: + docker: + - image: circleci/golang:1.13-stretch + working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}} + environment: + <<: *env + RUN_CONFORMANCE: "false" + RUN_PERFORMANCE_TESTS: "true" + steps: + - checkout + - setup_remote_docker + - aws-cli/setup: + profile-name: awstester + - restore_cache: + keys: + - dependency-packages-store-{{ checksum "test/integration/go.mod" }} + - dependency-packages-store- + - k8s/install-kubectl: + # requires 1.14.9 for k8s testing, since it uses log api. + kubectl-version: v1.14.9 + - run: + name: Run the integration tests + command: ./scripts/run-integration-tests.sh + no_output_timeout: 15m + - save_cache: + key: dependency-packages-store-{{ checksum "test/integration/go.mod" }} + paths: + - /go/pkg + when: always + - store_artifacts: + path: /tmp/cni-test + + kops_test: + docker: + - image: circleci/golang:1.13-stretch + working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}} + environment: + <<: *env + RUN_CONFORMANCE: "false" + RUN_KOPS_TEST: "true" + steps: + - checkout + - setup_remote_docker + - aws-cli/setup: + profile-name: awstester + - restore_cache: + keys: + - dependency-packages-store-{{ checksum "test/integration/go.mod" }} + - dependency-packages-store- + - k8s/install-kubectl: + # requires 1.14.9 for k8s testing, since it uses log api. + kubectl-version: v1.14.9 + - run: + name: Run the integration tests + command: ./scripts/run-integration-tests.sh + no_output_timeout: 15m + - save_cache: + key: dependency-packages-store-{{ checksum "test/integration/go.mod" }} + paths: + - /go/pkg + when: always + - store_artifacts: + path: /tmp/cni-test + workflows: version: 2 check: @@ -118,3 +182,18 @@ workflows: - master jobs: - integration_test + + # triggers weekly tests on master (Friday at 11 PM PST) + weekly-test-run: + triggers: + - schedule: + cron: "0 6 * * 6" + filters: + branches: + only: + - master + jobs: + - performance_test + - kops_test: + requires: + - performance_test diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index cbc426d9bb..5f49fd71bd 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -12,6 +12,15 @@ function down-test-cluster() { } function up-test-cluster() { + MNGS="" + if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then + MNGS='{"cni-test-single-node-mng":{"name":"cni-test-single-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "cni-test-multi-node-mng":{"name":"cni-test-multi-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}}' + RUN_CONFORMANCE=false + : "${PERFORMANCE_TEST_S3_BUCKET_NAME:=""}" + else + MNGS='{"GetRef.Name-mng-for-cni":{"name":"GetRef.Name-mng-for-cni","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["c5.xlarge"],"volume-size":40}}' + fi + echo -n "Configuring cluster $CLUSTER_NAME" AWS_K8S_TESTER_EKS_NAME=$CLUSTER_NAME \ AWS_K8S_TESTER_EKS_LOG_COLOR=true \ @@ -26,7 +35,7 @@ function up-test-cluster() { AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ENABLE=true \ AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ROLE_CREATE=$ROLE_CREATE \ AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ROLE_ARN=$ROLE_ARN \ - AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS='{"GetRef.Name-mng-for-cni":{"name":"GetRef.Name-mng-for-cni","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["c5.xlarge"],"volume-size":40}}' \ + AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS=$MNGS \ AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_FETCH_LOGS=true \ AWS_K8S_TESTER_EKS_ADD_ON_NLB_HELLO_WORLD_ENABLE=true \ AWS_K8S_TESTER_EKS_ADD_ON_ALB_2048_ENABLE=true \ diff --git a/scripts/lib/common.sh b/scripts/lib/common.sh index c01637245e..38788454da 100644 --- a/scripts/lib/common.sh +++ b/scripts/lib/common.sh @@ -25,6 +25,12 @@ function display_timelines() { echo "TIMELINE: Default CNI integration tests took $DEFAULT_INTEGRATION_DURATION seconds." echo "TIMELINE: Updating CNI image took $CNI_IMAGE_UPDATE_DURATION seconds." echo "TIMELINE: Current image integration tests took $CURRENT_IMAGE_INTEGRATION_DURATION seconds." - echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds." + if [[ "$RUN_CONFORMANCE" == true ]]; then + echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds." + fi + if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then + echo "TIMELINE: Performance tests took $PERFORMANCE_DURATION seconds." + fi echo "TIMELINE: Down processes took $DOWN_DURATION seconds." } + diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh new file mode 100644 index 0000000000..300b60a3a7 --- /dev/null +++ b/scripts/lib/performance_tests.sh @@ -0,0 +1,298 @@ +function check_for_timeout() { + if [[ $((SECONDS - $1)) -gt 1500 ]]; then + FAILURE_COUNT=$((FAILURE_COUNT + 1)) + HAS_FAILED=true + if [[ $FAILURE_COUNT -gt 1 ]]; then + RUNNING_PERFORMANCE=false + echo "Failed twice, deprovisioning cluster" + on_error + fi + echo "Failed once, retrying" + fi +} + +function save_results_to_file() { + echo $filename + echo "Date", "\"slot1\"", "\"slot2\"" >> $filename + echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename + echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename + echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename + + cat $filename + if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then + aws s3 cp $filename ${PERFORMANCE_TEST_S3_BUCKET_NAME}${1} + else + echo "No S3 bucket name given, skipping test result upload." + fi +} + +function check_for_slow_performance() { + BUCKET=s3://cni-scale-test-data${1} + FILE1=`aws s3 ls ${BUCKET} | sort | tail -n 2 | sed -n '1 p' | awk '{print $4}'` + FILE2=`aws s3 ls ${BUCKET} | sort | tail -n 3 | sed -n '1 p' | awk '{print $4}'` + FILE3=`aws s3 ls ${BUCKET} | sort | tail -n 4 | sed -n '1 p' | awk '{print $4}'` + + PAST_PERFORMANCE_UP_AVERAGE_SUM=0 + PAST_PERFORMANCE_DOWN_AVERAGE_SUM=0 + find_performance_duration_average $FILE1 1 + find_performance_duration_average $FILE2 2 + find_performance_duration_average $FILE3 3 + PAST_PERFORMANCE_UP_AVERAGE=$((PAST_PERFORMANCE_UP_AVERAGE_SUM / 3)) + PAST_PERFORMANCE_DOWN_AVERAGE=$((PAST_PERFORMANCE_DOWN_AVERAGE_SUM / 3)) + + # Divided by 3 to get current average, multiply past averages by 5/4 to get 25% window + if [[ $((CURRENT_PERFORMANCE_UP_SUM / 3)) -gt $((PAST_PERFORMANCE_UP_AVERAGE * 5 / 4)) ]]; then + echo "FAILURE! Performance test pod UPPING took >25% longer than the past three tests" + echo "This tests time: $((CURRENT_PERFORMANCE_UP_SUM / 3))" + echo "Previous tests' time: ${PAST_PERFORMANCE_UP_AVERAGE}" + echo "********************************" + echo "Look into how current changes could cause cni inefficiency." + echo "********************************" + on_error + fi +} + +function find_performance_duration_average() { + aws s3 cp ${BUCKET}${1} performance_test${2}.csv + SCALE_UP_TEMP_DURATION_SUM=0 + SCALE_DOWN_TEMP_DURATION_SUM=0 + for i in {2..4} + do + TEMP=$(sed -n "${i} p" performance_test${2}.csv) + PAIR=${TEMP#*,} + SCALE_UP_TEMP_DURATION_SUM=$((SCALE_UP_TEMP_DURATION_SUM + ${PAIR%%,*})) + SCALE_DOWN_TEMP_DURATION_SUM=$((SCALE_DOWN_TEMP_DURATION_SUM + ${PAIR##*,})) + done + PAST_PERFORMANCE_UP_AVERAGE_SUM=$(($PAST_PERFORMANCE_UP_AVERAGE_SUM + $((SCALE_UP_TEMP_DURATION_SUM / 3)))) + PAST_PERFORMANCE_DOWN_AVERAGE_SUM=$(($PAST_PERFORMANCE_DOWN_AVERAGE_SUM + $((SCALE_DOWN_TEMP_DURATION_SUM / 3)))) +} + +function run_performance_test_130_pods() { + echo "Running performance tests against cluster" + RUNNING_PERFORMANCE=true + $KUBECTL_PATH apply -f ./testdata/deploy-130-pods.yaml + + DEPLOY_START=$SECONDS + FAILURE_COUNT=0 + + SCALE_UP_DURATION_ARRAY=() + SCALE_DOWN_DURATION_ARRAY=() + CURRENT_PERFORMANCE_UP_SUM=0 + CURRENT_PERFORMANCE_DOWN_SUM=0 + while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] + do + ITERATION_START=$SECONDS + HAS_FAILED=false + $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=130 + while [[ ! $($KUBECTL_PATH get deploy | grep 130/130) && "$HAS_FAILED" == false ]] + do + sleep 1 + echo "Scaling UP" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $ITERATION_START + done + + if [[ "$HAS_FAILED" == false ]]; then + DURATION=$((SECONDS - ITERATION_START)) + SCALE_UP_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) + fi + $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=0 + while [[ $($KUBECTL_PATH get pods) ]] + do + sleep 1 + echo "Scaling DOWN" + echo $($KUBECTL_PATH get deploy) + done + if [[ "$HAS_FAILED" == false ]]; then + DURATION=$((SECONDS - ITERATION_START)) + SCALE_DOWN_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION)) + fi + done + + echo "Times to scale up:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ] + do + echo ${SCALE_UP_DURATION_ARRAY[$INDEX]} + INDEX=$((INDEX + 1)) + done + echo "" + echo "Times to scale down:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ] + do + echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds" + INDEX=$((INDEX + 1)) + done + echo "" + DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) + + filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" + save_results_to_file "/130-pods/" + + echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds." + RUNNING_PERFORMANCE=false + check_for_slow_performance "/130-pods/" + $KUBECTL_PATH delete -f ./testdata/deploy-130-pods.yaml +} + +function run_performance_test_730_pods() { + echo "Running performance tests against cluster" + RUNNING_PERFORMANCE=true + $KUBECTL_PATH apply -f ./testdata/deploy-730-pods.yaml + + DEPLOY_START=$SECONDS + FAILURE_COUNT=0 + + SCALE_UP_DURATION_ARRAY=() + SCALE_DOWN_DURATION_ARRAY=() + CURRENT_PERFORMANCE_UP_SUM=0 + CURRENT_PERFORMANCE_DOWN_SUM=0 + while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] + do + ITERATION_START=$SECONDS + HAS_FAILED=false + $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=730 + while [[ ! $($KUBECTL_PATH get deploy | grep 730/730) && "$HAS_FAILED" == false ]] + do + sleep 2 + echo "Scaling UP" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $ITERATION_START + done + + if [[ "$HAS_FAILED" == false ]]; then + DURATION=$((SECONDS - ITERATION_START)) + SCALE_UP_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) + fi + $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=0 + while [[ $($KUBECTL_PATH get pods) ]] + do + sleep 2 + echo "Scaling DOWN" + echo $($KUBECTL_PATH get deploy) + done + if [[ "$HAS_FAILED" == false ]]; then + DURATION=$((SECONDS - ITERATION_START)) + SCALE_DOWN_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION)) + fi + done + + echo "Times to scale up:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ] + do + echo ${SCALE_UP_DURATION_ARRAY[$INDEX]} + INDEX=$((INDEX + 1)) + done + echo "" + echo "Times to scale down:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ] + do + echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds" + INDEX=$((INDEX + 1)) + done + echo "" + DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) + + filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" + save_results_to_file "/730-pods/" + + echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds." + RUNNING_PERFORMANCE=false + check_for_slow_performance "/730-pods/" + $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml +} + +function scale_nodes_for_5000_pod_test() { + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B44 100,) + echo "Group info ${AUTO_SCALE_GROUP_INFO}" + AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_INFO%%,*} + echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" + AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_NAME_WITH_QUOTES##* } + echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" + AUTO_SCALE_GROUP_NAME="${AUTO_SCALE_GROUP_NAME_WITH_QUOTES%\"}" + echo "Group name ${AUTO_SCALE_GROUP_NAME}" + AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-) + echo $AUTO_SCALE_GROUP_NAME + + aws autoscaling update-auto-scaling-group \ + --auto-scaling-group-name $AUTO_SCALE_GROUP_NAME \ + --desired-capacity 99 +} + +function run_performance_test_5000_pods() { + echo "Running performance tests against cluster" + RUNNING_PERFORMANCE=true + $KUBECTL_PATH apply -f ./testdata/deploy-5000-pods.yaml + + DEPLOY_START=$SECONDS + FAILURE_COUNT=0 + + SCALE_UP_DURATION_ARRAY=() + SCALE_DOWN_DURATION_ARRAY=() + CURRENT_PERFORMANCE_UP_SUM=0 + CURRENT_PERFORMANCE_DOWN_SUM=0 + while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] + do + ITERATION_START=$SECONDS + HAS_FAILED=false + $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=5000 + while [[ ! $($KUBECTL_PATH get deploy | grep 5000/5000) && "$HAS_FAILED" == false ]] + do + sleep 2 + echo "Scaling UP" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $ITERATION_START + done + + if [[ "$HAS_FAILED" == false ]]; then + DURATION=$((SECONDS - ITERATION_START)) + SCALE_UP_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) + fi + $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=0 + while [[ $($KUBECTL_PATH get pods) ]] + do + sleep 2 + echo "Scaling DOWN" + echo $($KUBECTL_PATH get deploy) + done + if [[ "$HAS_FAILED" == false ]]; then + DURATION=$((SECONDS - ITERATION_START)) + SCALE_DOWN_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION)) + fi + done + + echo "Times to scale up:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ] + do + echo ${SCALE_UP_DURATION_ARRAY[$INDEX]} + INDEX=$((INDEX + 1)) + done + echo "" + echo "Times to scale down:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ] + do + echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds" + INDEX=$((INDEX + 1)) + done + echo "" + DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) + + filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" + save_results_to_file "/5000-pods/" + + echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds." + RUNNING_PERFORMANCE=false + check_for_slow_performance "/5000-pods/" + $KUBECTL_PATH delete -f ./testdata/deploy-5000-pods.yaml +} diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 6ffc15cdba..46d582b2e1 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -9,6 +9,7 @@ source "$DIR"/lib/common.sh source "$DIR"/lib/aws.sh source "$DIR"/lib/cluster.sh source "$DIR"/lib/integration.sh +source "$DIR"/lib/performance_tests.sh # Variables used in /lib/aws.sh OS=$(go env GOOS) @@ -22,29 +23,30 @@ ARCH=$(go env GOARCH) : "${RUN_CONFORMANCE:=false}" : "${RUN_KOPS_TEST:=false}" : "${RUN_BOTTLEROCKET_TEST:=false}" +: "${RUN_PERFORMANCE_TESTS:=false}" +: "${RUNNING_PERFORMANCE:=false}" __cluster_created=0 __cluster_deprovisioned=0 on_error() { # Make sure we destroy any cluster that was created if we hit run into an - # error when attempting to run tests against the cluster - if [[ $__cluster_created -eq 1 && $__cluster_deprovisioned -eq 0 && "$DEPROVISION" == true ]]; then - echo "Cluster was provisioned already. Deprovisioning it..." - __cluster_deprovisioned=1 - if [[ $RUN_KOPS_TEST == true ]]; then - echo "Cluster was provisioned already. Deprovisioning it..." - down-kops-cluster - elif [[ $RUN_BOTTLEROCKET_TEST == true ]]; then - eksctl delete cluster bottlerocket - else + # error when attempting to run tests against the + if [[ $RUNNING_PERFORMANCE == false ]]; then + if [[ $__cluster_created -eq 1 && $__cluster_deprovisioned -eq 0 && "$DEPROVISION" == true ]]; then # prevent double-deprovisioning with ctrl-c during deprovisioning... + __cluster_deprovisioned=1 echo "Cluster was provisioned already. Deprovisioning it..." - down-test-cluster + if [[ $RUN_KOPS_TEST == true ]]; then + down-kops-cluster + elif [[ $RUN_BOTTLEROCKET_TEST == true ]]; then + eksctl delete cluster bottlerocket + else + down-test-cluster + fi fi + exit 1 fi - - exit 1 } # test specific config, results location @@ -176,6 +178,7 @@ echo "Using $BASE_CONFIG_PATH as a template" cp "$BASE_CONFIG_PATH" "$TEST_CONFIG_PATH" # Daemonset template +echo "IMAGE NAME ${IMAGE_NAME} " sed -i'.bak' "s,602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni,$IMAGE_NAME," "$TEST_CONFIG_PATH" sed -i'.bak' "s,:$MANIFEST_IMAGE_VERSION,:$TEST_IMAGE_VERSION," "$TEST_CONFIG_PATH" sed -i'.bak' "s,602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni-init,$INIT_IMAGE_NAME," "$TEST_CONFIG_PATH" @@ -249,6 +252,18 @@ if [[ $TEST_PASS -eq 0 && "$RUN_CONFORMANCE" == true ]]; then echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds." fi +if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then + echo "*******************************************************************************" + echo "Running performance tests on current image:" + echo "" + START=$SECONDS + run_performance_test_130_pods + scale_nodes_for_5000_pod_test + run_performance_test_730_pods + run_performance_test_5000_pods + PERFORMANCE_DURATION=$((SECONDS - START)) +fi + if [[ "$DEPROVISION" == true ]]; then START=$SECONDS diff --git a/test/integration/README.md b/test/integration/README.md new file mode 100644 index 0000000000..d798f31211 --- /dev/null +++ b/test/integration/README.md @@ -0,0 +1,70 @@ +## How to run tests +# All tests + * set AWS_ACCESS_KEY_ID + * set AWS_SECRET_ACCESS_KEY + * set AWS_DEFAULT_REGION (optional, defaults to us-west-2 if not set) + * approve test after build completes + * Can only run one of the following tests at a time, as most need a unique cluster to work on + +# Performance + * run from cni test account to upload test results + * set PERFORMANCE_TEST_S3_BUCKET_NAME to the name of the bucket (likely s3://cni-performance-test-data) + * set RUN_PERFORMANCE_TESTS=true + * NOTE: if running on previous versions, change the date inside of the file to the date of release so as to not confuse graphing order + +# KOPS + * set RUN_KOPS_TEST=true + * WARNING: will occassionally fail/flake tests, try re-running test a couple times to ensure there is a + +# Warm IP + * set RUN_WARM_IP_TEST=true + +# Warm eni + * set RUN_WARM_ENI_TEST=true + + + +## Conformance test duration log + +* May 20, 2020: Initial integration step took roughly 3h 41min +* May 27: 3h 1min + * Skip tests labeled as “Slow” for Ginkgo framework + * Timelines: + * Default CNI: 73s + * Updating CNI image: 110s + * Current image integration: 47s + * Conformance tests: 119.167 min (2 hrs) + * Down cluster: 30 min +* May 29: 2h 59min 30s + * Cache dependencies when testing default CNI + * Timelines: + * Docker build: 4 min + * Up test cluster: 31 min + * Default CNI: 50s + * Updating CNI image: 92s + * Current image integration: 17s + * Conformance tests: 114 min (1.9 hrs) + * Down cluster: 30 min +* June 5: 1h 24min 9s + * Parallel execution of conformance tests + * Timelines: + * Docker build: 3 min + * Up test cluster: 31 min + * Default CNI: 52s + * Updating CNI image: 92s + * Current image integration: 18s + * Conformance tests: 16 min + * Down cluster: 30 min + + + +## How to Manually delete k8s tester Resources (order of deletion) + +Cloudformation - (all except cluster, vpc) +EC2 - load balancers, key pair +VPC - Nat gateways, Elastic IPs(after a minute), internet gateway +Cloudformation - cluster +EC2 - network interfaces, security groups +VPC - subnet, route tables +Cloudformation - cluster, vpc(after cluster deletes) +S3 - delete bucket \ No newline at end of file diff --git a/testdata/deploy-130-pods.yaml b/testdata/deploy-130-pods.yaml new file mode 100644 index 0000000000..74f1f938a6 --- /dev/null +++ b/testdata/deploy-130-pods.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-130-pods +spec: + replicas: 1 + selector: + matchLabels: + app: deploy-130-pods + template: + metadata: + name: test-pod-130 + labels: + app: deploy-130-pods + tier: backend + track: stable + spec: + containers: + - name: hello + image: "kubernetes/pause:latest" + ports: + - name: http + containerPort: 80 + imagePullPolicy: IfNotPresent + nodeSelector: + eks.amazonaws.com/nodegroup: cni-test-multi-node-mng diff --git a/testdata/deploy-5000-pods.yaml b/testdata/deploy-5000-pods.yaml new file mode 100644 index 0000000000..6ef572dffa --- /dev/null +++ b/testdata/deploy-5000-pods.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-5000-pods +spec: + replicas: 1 + selector: + matchLabels: + app: deploy-5000-pods + template: + metadata: + name: test-pod-5000 + labels: + app: deploy-5000-pods + tier: backend + track: stable + spec: + containers: + - name: hello + image: "kubernetes/pause:latest" + ports: + - name: http + containerPort: 80 + imagePullPolicy: IfNotPresent + nodeSelector: + eks.amazonaws.com/nodegroup: cni-test-multi-node-mng diff --git a/testdata/deploy-730-pods.yaml b/testdata/deploy-730-pods.yaml new file mode 100644 index 0000000000..97a4b913f4 --- /dev/null +++ b/testdata/deploy-730-pods.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-730-pods +spec: + replicas: 1 + selector: + matchLabels: + app: deploy-730-pods + template: + metadata: + name: test-pod-730 + labels: + app: deploy-730-pods + tier: backend + track: stable + spec: + containers: + - name: hello + image: "kubernetes/pause:latest" + ports: + - name: http + containerPort: 80 + imagePullPolicy: IfNotPresent + nodeSelector: + eks.amazonaws.com/nodegroup: cni-test-single-node-mng