From 1ff4d5d1a2401657bbf1229c38266b624874850c Mon Sep 17 00:00:00 2001
From: Ben Napolitan <bnapolitan@outlook.com>
Date: Thu, 23 Jul 2020 14:11:48 -0400
Subject: [PATCH] Introduce automated performance testing. (#1068)

* Add version info to file, display start of performance tests.
* Scale up node group before running 5000 pod test.
* Create unique mng names.
* Update data files for performance tests.
* Add failure checking for performance tests.
* Upload files to corresponding folders in s3 bucket.
* Check for slow performance update.
* Weekly performance test (midnight Wednesday)
---
 .circleci/config.yml             |  79 ++++++++
 scripts/lib/cluster.sh           |  11 +-
 scripts/lib/common.sh            |   8 +-
 scripts/lib/performance_tests.sh | 298 +++++++++++++++++++++++++++++++
 scripts/run-integration-tests.sh |  41 +++--
 test/integration/README.md       |  70 ++++++++
 testdata/deploy-130-pods.yaml    |  26 +++
 testdata/deploy-5000-pods.yaml   |  26 +++
 testdata/deploy-730-pods.yaml    |  26 +++
 9 files changed, 570 insertions(+), 15 deletions(-)
 create mode 100644 scripts/lib/performance_tests.sh
 create mode 100644 test/integration/README.md
 create mode 100644 testdata/deploy-130-pods.yaml
 create mode 100644 testdata/deploy-5000-pods.yaml
 create mode 100644 testdata/deploy-730-pods.yaml

diff --git a/.circleci/config.yml b/.circleci/config.yml
index da8bb249ea..ebf7bac4d9 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -82,6 +82,70 @@ jobs:
       - store_artifacts:
           path: /tmp/cni-test
 
+  performance_test:
+    docker:
+      - image: circleci/golang:1.13-stretch
+    working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}}
+    environment:
+      <<: *env
+      RUN_CONFORMANCE: "false"
+      RUN_PERFORMANCE_TESTS: "true"
+    steps:
+      - checkout
+      - setup_remote_docker
+      - aws-cli/setup:
+          profile-name: awstester
+      - restore_cache:
+          keys:
+            - dependency-packages-store-{{ checksum "test/integration/go.mod" }}
+            - dependency-packages-store-
+      - k8s/install-kubectl:
+          # requires 1.14.9 for k8s testing, since it uses log api.
+          kubectl-version: v1.14.9
+      - run:
+          name: Run the integration tests
+          command: ./scripts/run-integration-tests.sh
+          no_output_timeout: 15m
+      - save_cache:
+          key: dependency-packages-store-{{ checksum "test/integration/go.mod" }}
+          paths:
+            - /go/pkg
+          when: always
+      - store_artifacts:
+          path: /tmp/cni-test
+
+  kops_test:
+    docker:
+      - image: circleci/golang:1.13-stretch
+    working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}}
+    environment:
+      <<: *env
+      RUN_CONFORMANCE: "false"
+      RUN_KOPS_TEST: "true"
+    steps:
+      - checkout
+      - setup_remote_docker
+      - aws-cli/setup:
+          profile-name: awstester
+      - restore_cache:
+          keys:
+            - dependency-packages-store-{{ checksum "test/integration/go.mod" }}
+            - dependency-packages-store-
+      - k8s/install-kubectl:
+          # requires 1.14.9 for k8s testing, since it uses log api.
+          kubectl-version: v1.14.9
+      - run:
+          name: Run the integration tests
+          command: ./scripts/run-integration-tests.sh
+          no_output_timeout: 15m
+      - save_cache:
+          key: dependency-packages-store-{{ checksum "test/integration/go.mod" }}
+          paths:
+            - /go/pkg
+          when: always
+      - store_artifacts:
+          path: /tmp/cni-test
+
 workflows:
   version: 2
   check:
@@ -118,3 +182,18 @@ workflows:
                 - master
     jobs:
       - integration_test
+
+  # triggers weekly tests on master (Friday at 11 PM PST)
+  weekly-test-run:
+    triggers:
+      - schedule:
+          cron: "0 6 * * 6"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - performance_test
+      - kops_test:
+          requires:
+            - performance_test
diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh
index cbc426d9bb..5f49fd71bd 100644
--- a/scripts/lib/cluster.sh
+++ b/scripts/lib/cluster.sh
@@ -12,6 +12,15 @@ function down-test-cluster() {
 }
 
 function up-test-cluster() {
+    MNGS=""
+    if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then
+        MNGS='{"cni-test-single-node-mng":{"name":"cni-test-single-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "cni-test-multi-node-mng":{"name":"cni-test-multi-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}}'
+        RUN_CONFORMANCE=false
+        : "${PERFORMANCE_TEST_S3_BUCKET_NAME:=""}"
+    else
+        MNGS='{"GetRef.Name-mng-for-cni":{"name":"GetRef.Name-mng-for-cni","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["c5.xlarge"],"volume-size":40}}'
+    fi
+
     echo -n "Configuring cluster $CLUSTER_NAME"
     AWS_K8S_TESTER_EKS_NAME=$CLUSTER_NAME \
         AWS_K8S_TESTER_EKS_LOG_COLOR=true \
@@ -26,7 +35,7 @@ function up-test-cluster() {
         AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ENABLE=true \
         AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ROLE_CREATE=$ROLE_CREATE \
         AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ROLE_ARN=$ROLE_ARN \
-        AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS='{"GetRef.Name-mng-for-cni":{"name":"GetRef.Name-mng-for-cni","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["c5.xlarge"],"volume-size":40}}' \
+        AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS=$MNGS \
         AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_FETCH_LOGS=true \
         AWS_K8S_TESTER_EKS_ADD_ON_NLB_HELLO_WORLD_ENABLE=true \
         AWS_K8S_TESTER_EKS_ADD_ON_ALB_2048_ENABLE=true \
diff --git a/scripts/lib/common.sh b/scripts/lib/common.sh
index c01637245e..38788454da 100644
--- a/scripts/lib/common.sh
+++ b/scripts/lib/common.sh
@@ -25,6 +25,12 @@ function display_timelines() {
     echo "TIMELINE: Default CNI integration tests took $DEFAULT_INTEGRATION_DURATION seconds." 
     echo "TIMELINE: Updating CNI image took $CNI_IMAGE_UPDATE_DURATION seconds."
     echo "TIMELINE: Current image integration tests took $CURRENT_IMAGE_INTEGRATION_DURATION seconds."
-    echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds."
+    if [[ "$RUN_CONFORMANCE" == true ]]; then
+        echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds."
+    fi
+    if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then
+        echo "TIMELINE: Performance tests took $PERFORMANCE_DURATION seconds."
+    fi
     echo "TIMELINE: Down processes took $DOWN_DURATION seconds."
 }
+
diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh
new file mode 100644
index 0000000000..300b60a3a7
--- /dev/null
+++ b/scripts/lib/performance_tests.sh
@@ -0,0 +1,298 @@
+function check_for_timeout() {
+    if [[ $((SECONDS - $1)) -gt 1500 ]]; then
+        FAILURE_COUNT=$((FAILURE_COUNT + 1))
+        HAS_FAILED=true
+        if [[ $FAILURE_COUNT -gt 1 ]]; then
+            RUNNING_PERFORMANCE=false
+            echo "Failed twice, deprovisioning cluster"
+            on_error
+        fi
+        echo "Failed once, retrying"
+    fi
+}
+
+function save_results_to_file() {
+    echo $filename
+    echo "Date", "\"slot1\"", "\"slot2\"" >> $filename
+    echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename
+    echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename
+    echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename
+
+    cat $filename
+    if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then
+        aws s3 cp $filename ${PERFORMANCE_TEST_S3_BUCKET_NAME}${1}
+    else
+        echo "No S3 bucket name given, skipping test result upload."
+    fi
+}
+
+function check_for_slow_performance() {
+    BUCKET=s3://cni-scale-test-data${1}
+    FILE1=`aws s3 ls ${BUCKET} | sort | tail -n 2 | sed -n '1 p' | awk '{print $4}'`
+    FILE2=`aws s3 ls ${BUCKET} | sort | tail -n 3 | sed -n '1 p' | awk '{print $4}'`
+    FILE3=`aws s3 ls ${BUCKET} | sort | tail -n 4 | sed -n '1 p' | awk '{print $4}'`
+    
+    PAST_PERFORMANCE_UP_AVERAGE_SUM=0
+    PAST_PERFORMANCE_DOWN_AVERAGE_SUM=0
+    find_performance_duration_average $FILE1 1
+    find_performance_duration_average $FILE2 2
+    find_performance_duration_average $FILE3 3
+    PAST_PERFORMANCE_UP_AVERAGE=$((PAST_PERFORMANCE_UP_AVERAGE_SUM / 3))
+    PAST_PERFORMANCE_DOWN_AVERAGE=$((PAST_PERFORMANCE_DOWN_AVERAGE_SUM / 3))
+
+    # Divided by 3 to get current average, multiply past averages by 5/4 to get 25% window
+    if [[ $((CURRENT_PERFORMANCE_UP_SUM / 3)) -gt $((PAST_PERFORMANCE_UP_AVERAGE * 5 / 4)) ]]; then
+        echo "FAILURE! Performance test pod UPPING took >25% longer than the past three tests"
+        echo "This tests time: $((CURRENT_PERFORMANCE_UP_SUM / 3))"
+        echo "Previous tests' time: ${PAST_PERFORMANCE_UP_AVERAGE}"
+        echo "********************************"
+        echo "Look into how current changes could cause cni inefficiency."
+        echo "********************************"
+        on_error
+    fi
+}
+
+function find_performance_duration_average() {
+    aws s3 cp ${BUCKET}${1} performance_test${2}.csv
+    SCALE_UP_TEMP_DURATION_SUM=0
+    SCALE_DOWN_TEMP_DURATION_SUM=0
+    for i in {2..4}
+    do
+        TEMP=$(sed -n "${i} p" performance_test${2}.csv)
+        PAIR=${TEMP#*,}
+        SCALE_UP_TEMP_DURATION_SUM=$((SCALE_UP_TEMP_DURATION_SUM + ${PAIR%%,*}))
+        SCALE_DOWN_TEMP_DURATION_SUM=$((SCALE_DOWN_TEMP_DURATION_SUM + ${PAIR##*,}))
+    done
+    PAST_PERFORMANCE_UP_AVERAGE_SUM=$(($PAST_PERFORMANCE_UP_AVERAGE_SUM + $((SCALE_UP_TEMP_DURATION_SUM / 3))))
+    PAST_PERFORMANCE_DOWN_AVERAGE_SUM=$(($PAST_PERFORMANCE_DOWN_AVERAGE_SUM + $((SCALE_DOWN_TEMP_DURATION_SUM / 3))))
+}
+
+function run_performance_test_130_pods() {
+    echo "Running performance tests against cluster"
+    RUNNING_PERFORMANCE=true
+    $KUBECTL_PATH apply -f ./testdata/deploy-130-pods.yaml
+
+    DEPLOY_START=$SECONDS
+    FAILURE_COUNT=0
+
+    SCALE_UP_DURATION_ARRAY=()
+    SCALE_DOWN_DURATION_ARRAY=()
+    CURRENT_PERFORMANCE_UP_SUM=0
+    CURRENT_PERFORMANCE_DOWN_SUM=0
+    while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ]
+    do
+        ITERATION_START=$SECONDS
+        HAS_FAILED=false
+        $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=130
+        while [[ ! $($KUBECTL_PATH get deploy | grep 130/130) && "$HAS_FAILED" == false ]]
+        do
+            sleep 1
+            echo "Scaling UP"
+            echo $($KUBECTL_PATH get deploy)
+            check_for_timeout $ITERATION_START
+        done
+
+        if [[ "$HAS_FAILED" == false ]]; then
+            DURATION=$((SECONDS - ITERATION_START))
+            SCALE_UP_DURATION_ARRAY+=( $DURATION )
+            CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION))
+        fi
+        $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=0
+        while [[ $($KUBECTL_PATH get pods) ]]
+        do
+            sleep 1
+            echo "Scaling DOWN"
+            echo $($KUBECTL_PATH get deploy)
+        done
+        if [[ "$HAS_FAILED" == false ]]; then
+            DURATION=$((SECONDS - ITERATION_START))
+            SCALE_DOWN_DURATION_ARRAY+=( $DURATION )
+            CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION))
+        fi
+    done
+
+    echo "Times to scale up:"
+    INDEX=0
+    while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ]
+    do
+        echo ${SCALE_UP_DURATION_ARRAY[$INDEX]}
+        INDEX=$((INDEX + 1))
+    done
+    echo ""
+    echo "Times to scale down:"
+    INDEX=0
+    while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ]
+    do
+        echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds"
+        INDEX=$((INDEX + 1))
+    done
+    echo ""
+    DEPLOY_DURATION=$((SECONDS - DEPLOY_START))
+
+    filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv"
+    save_results_to_file "/130-pods/"
+    
+    echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds."
+    RUNNING_PERFORMANCE=false
+    check_for_slow_performance "/130-pods/"
+    $KUBECTL_PATH delete -f ./testdata/deploy-130-pods.yaml
+}
+
+function run_performance_test_730_pods() {
+    echo "Running performance tests against cluster"
+    RUNNING_PERFORMANCE=true
+    $KUBECTL_PATH apply -f ./testdata/deploy-730-pods.yaml
+
+    DEPLOY_START=$SECONDS
+    FAILURE_COUNT=0
+
+    SCALE_UP_DURATION_ARRAY=()
+    SCALE_DOWN_DURATION_ARRAY=()
+    CURRENT_PERFORMANCE_UP_SUM=0
+    CURRENT_PERFORMANCE_DOWN_SUM=0
+    while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ]
+    do
+        ITERATION_START=$SECONDS
+        HAS_FAILED=false
+        $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=730
+        while [[ ! $($KUBECTL_PATH get deploy | grep 730/730) && "$HAS_FAILED" == false ]]
+        do
+            sleep 2
+            echo "Scaling UP"
+            echo $($KUBECTL_PATH get deploy)
+            check_for_timeout $ITERATION_START
+        done
+
+        if [[ "$HAS_FAILED" == false ]]; then
+            DURATION=$((SECONDS - ITERATION_START))
+            SCALE_UP_DURATION_ARRAY+=( $DURATION )
+            CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION))
+        fi
+        $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=0
+        while [[ $($KUBECTL_PATH get pods) ]]
+        do
+            sleep 2
+            echo "Scaling DOWN"
+            echo $($KUBECTL_PATH get deploy)
+        done
+        if [[ "$HAS_FAILED" == false ]]; then
+            DURATION=$((SECONDS - ITERATION_START))
+            SCALE_DOWN_DURATION_ARRAY+=( $DURATION )
+            CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION))
+        fi
+    done
+
+    echo "Times to scale up:"
+    INDEX=0
+    while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ]
+    do
+        echo ${SCALE_UP_DURATION_ARRAY[$INDEX]}
+        INDEX=$((INDEX + 1))
+    done
+    echo ""
+    echo "Times to scale down:"
+    INDEX=0
+    while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ]
+    do
+        echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds"
+        INDEX=$((INDEX + 1))
+    done
+    echo ""
+    DEPLOY_DURATION=$((SECONDS - DEPLOY_START))
+
+    filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv"
+    save_results_to_file "/730-pods/"
+    
+    echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds."
+    RUNNING_PERFORMANCE=false
+    check_for_slow_performance "/730-pods/"
+    $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml
+}
+
+function scale_nodes_for_5000_pod_test() {
+    AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B44 100,)
+    echo "Group info ${AUTO_SCALE_GROUP_INFO}"
+    AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_INFO%%,*}
+    echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}"
+    AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_NAME_WITH_QUOTES##* }
+    echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}"
+    AUTO_SCALE_GROUP_NAME="${AUTO_SCALE_GROUP_NAME_WITH_QUOTES%\"}"
+    echo "Group name ${AUTO_SCALE_GROUP_NAME}"
+    AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-)
+    echo $AUTO_SCALE_GROUP_NAME
+
+    aws autoscaling update-auto-scaling-group \
+        --auto-scaling-group-name $AUTO_SCALE_GROUP_NAME \
+        --desired-capacity 99
+}
+
+function run_performance_test_5000_pods() {
+    echo "Running performance tests against cluster"
+    RUNNING_PERFORMANCE=true
+    $KUBECTL_PATH apply -f ./testdata/deploy-5000-pods.yaml
+    
+    DEPLOY_START=$SECONDS
+    FAILURE_COUNT=0
+
+    SCALE_UP_DURATION_ARRAY=()
+    SCALE_DOWN_DURATION_ARRAY=()
+    CURRENT_PERFORMANCE_UP_SUM=0
+    CURRENT_PERFORMANCE_DOWN_SUM=0
+    while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ]
+    do
+        ITERATION_START=$SECONDS
+        HAS_FAILED=false
+        $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=5000
+        while [[ ! $($KUBECTL_PATH get deploy | grep 5000/5000) && "$HAS_FAILED" == false ]]
+        do
+            sleep 2
+            echo "Scaling UP"
+            echo $($KUBECTL_PATH get deploy)
+            check_for_timeout $ITERATION_START
+        done
+
+        if [[ "$HAS_FAILED" == false ]]; then
+            DURATION=$((SECONDS - ITERATION_START))
+            SCALE_UP_DURATION_ARRAY+=( $DURATION )
+            CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION))
+        fi
+        $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=0
+        while [[ $($KUBECTL_PATH get pods) ]]
+        do
+            sleep 2
+            echo "Scaling DOWN"
+            echo $($KUBECTL_PATH get deploy)
+        done
+        if [[ "$HAS_FAILED" == false ]]; then
+            DURATION=$((SECONDS - ITERATION_START))
+            SCALE_DOWN_DURATION_ARRAY+=( $DURATION )
+            CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION))
+        fi
+    done
+
+    echo "Times to scale up:"
+    INDEX=0
+    while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ]
+    do
+        echo ${SCALE_UP_DURATION_ARRAY[$INDEX]}
+        INDEX=$((INDEX + 1))
+    done
+    echo ""
+    echo "Times to scale down:"
+    INDEX=0
+    while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ]
+    do
+        echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds"
+        INDEX=$((INDEX + 1))
+    done
+    echo ""
+    DEPLOY_DURATION=$((SECONDS - DEPLOY_START))
+
+    filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv"
+    save_results_to_file "/5000-pods/"
+    
+    echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds."
+    RUNNING_PERFORMANCE=false
+    check_for_slow_performance "/5000-pods/"
+    $KUBECTL_PATH delete -f ./testdata/deploy-5000-pods.yaml
+}
diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh
index 6ffc15cdba..46d582b2e1 100755
--- a/scripts/run-integration-tests.sh
+++ b/scripts/run-integration-tests.sh
@@ -9,6 +9,7 @@ source "$DIR"/lib/common.sh
 source "$DIR"/lib/aws.sh
 source "$DIR"/lib/cluster.sh
 source "$DIR"/lib/integration.sh
+source "$DIR"/lib/performance_tests.sh
 
 # Variables used in /lib/aws.sh
 OS=$(go env GOOS)
@@ -22,29 +23,30 @@ ARCH=$(go env GOARCH)
 : "${RUN_CONFORMANCE:=false}"
 : "${RUN_KOPS_TEST:=false}"
 : "${RUN_BOTTLEROCKET_TEST:=false}"
+: "${RUN_PERFORMANCE_TESTS:=false}"
+: "${RUNNING_PERFORMANCE:=false}"
 
 __cluster_created=0
 __cluster_deprovisioned=0
 
 on_error() {
     # Make sure we destroy any cluster that was created if we hit run into an
-    # error when attempting to run tests against the cluster
-    if [[ $__cluster_created -eq 1 && $__cluster_deprovisioned -eq 0 && "$DEPROVISION" == true ]]; then
-        echo "Cluster was provisioned already. Deprovisioning it..."
-        __cluster_deprovisioned=1
-        if [[ $RUN_KOPS_TEST == true ]]; then
-            echo "Cluster was provisioned already. Deprovisioning it..."
-            down-kops-cluster
-        elif [[ $RUN_BOTTLEROCKET_TEST == true ]]; then
-            eksctl delete cluster bottlerocket
-        else
+    # error when attempting to run tests against the 
+    if [[ $RUNNING_PERFORMANCE == false ]]; then
+        if [[ $__cluster_created -eq 1 && $__cluster_deprovisioned -eq 0 && "$DEPROVISION" == true ]]; then
             # prevent double-deprovisioning with ctrl-c during deprovisioning...
+            __cluster_deprovisioned=1
             echo "Cluster was provisioned already. Deprovisioning it..."
-            down-test-cluster
+            if [[ $RUN_KOPS_TEST == true ]]; then
+                down-kops-cluster
+            elif [[ $RUN_BOTTLEROCKET_TEST == true ]]; then
+                eksctl delete cluster bottlerocket
+            else
+                down-test-cluster
+            fi
         fi
+        exit 1
     fi
-    
-    exit 1
 }
 
 # test specific config, results location
@@ -176,6 +178,7 @@ echo "Using $BASE_CONFIG_PATH as a template"
 cp "$BASE_CONFIG_PATH" "$TEST_CONFIG_PATH"
 
 # Daemonset template
+echo "IMAGE NAME ${IMAGE_NAME} "
 sed -i'.bak' "s,602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni,$IMAGE_NAME," "$TEST_CONFIG_PATH"
 sed -i'.bak' "s,:$MANIFEST_IMAGE_VERSION,:$TEST_IMAGE_VERSION," "$TEST_CONFIG_PATH"
 sed -i'.bak' "s,602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni-init,$INIT_IMAGE_NAME," "$TEST_CONFIG_PATH"
@@ -249,6 +252,18 @@ if [[ $TEST_PASS -eq 0 && "$RUN_CONFORMANCE" == true ]]; then
   echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds."
 fi
 
+if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then
+    echo "*******************************************************************************"
+    echo "Running performance tests on current image:"
+    echo ""
+    START=$SECONDS
+    run_performance_test_130_pods
+    scale_nodes_for_5000_pod_test
+    run_performance_test_730_pods
+    run_performance_test_5000_pods
+    PERFORMANCE_DURATION=$((SECONDS - START))
+fi
+
 if [[ "$DEPROVISION" == true ]]; then
     START=$SECONDS
 
diff --git a/test/integration/README.md b/test/integration/README.md
new file mode 100644
index 0000000000..d798f31211
--- /dev/null
+++ b/test/integration/README.md
@@ -0,0 +1,70 @@
+## How to run tests
+# All tests
+    * set AWS_ACCESS_KEY_ID
+    * set AWS_SECRET_ACCESS_KEY
+    * set AWS_DEFAULT_REGION (optional, defaults to us-west-2 if not set)
+    * approve test after build completes
+    * Can only run one of the following tests at a time, as most need a unique cluster to work on
+
+# Performance
+    * run from cni test account to upload test results
+        * set PERFORMANCE_TEST_S3_BUCKET_NAME to the name of the bucket (likely s3://cni-performance-test-data)
+    * set RUN_PERFORMANCE_TESTS=true
+    * NOTE: if running on previous versions, change the date inside of the file to the date of release so as to not confuse graphing order
+
+# KOPS
+    * set RUN_KOPS_TEST=true
+    * WARNING: will occassionally fail/flake tests, try re-running test a couple times to ensure there is a 
+    
+# Warm IP
+    * set RUN_WARM_IP_TEST=true
+
+# Warm eni
+    * set RUN_WARM_ENI_TEST=true
+
+
+
+## Conformance test duration log 
+
+* May 20, 2020: Initial integration step took roughly 3h 41min
+* May 27: 3h 1min
+    * Skip tests labeled as “Slow” for Ginkgo framework
+    * Timelines:
+        * Default CNI: 73s
+        * Updating CNI image: 110s
+        * Current image integration: 47s
+        * Conformance tests: 119.167 min (2 hrs)
+        * Down cluster: 30 min
+* May 29: 2h 59min 30s
+    * Cache dependencies when testing default CNI
+    * Timelines:
+        * Docker build: 4 min
+        * Up test cluster: 31 min
+        * Default CNI: 50s
+        * Updating CNI image: 92s
+        * Current image integration: 17s
+        * Conformance tests: 114 min (1.9 hrs)
+        * Down cluster: 30 min
+* June 5: 1h 24min 9s
+    * Parallel execution of conformance tests
+    * Timelines:
+        * Docker build: 3 min
+        * Up test cluster: 31 min
+        * Default CNI: 52s
+        * Updating CNI image: 92s
+        * Current image integration: 18s
+        * Conformance tests: 16 min
+        * Down cluster: 30 min
+
+
+
+## How to Manually delete k8s tester Resources (order of deletion)
+
+Cloudformation - (all except cluster, vpc)
+EC2 - load balancers, key pair
+VPC - Nat gateways, Elastic IPs(after a minute), internet gateway
+Cloudformation - cluster
+EC2 - network interfaces, security groups
+VPC - subnet, route tables
+Cloudformation - cluster, vpc(after cluster deletes)
+S3 - delete bucket
\ No newline at end of file
diff --git a/testdata/deploy-130-pods.yaml b/testdata/deploy-130-pods.yaml
new file mode 100644
index 0000000000..74f1f938a6
--- /dev/null
+++ b/testdata/deploy-130-pods.yaml
@@ -0,0 +1,26 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: deploy-130-pods
+spec:
+  replicas: 1
+  selector:
+      matchLabels:
+        app: deploy-130-pods
+  template:
+    metadata:
+      name: test-pod-130
+      labels:
+        app: deploy-130-pods
+        tier: backend
+        track: stable
+    spec:
+      containers:
+        - name: hello
+          image: "kubernetes/pause:latest"
+          ports:
+            - name: http
+              containerPort: 80
+          imagePullPolicy: IfNotPresent
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cni-test-multi-node-mng
diff --git a/testdata/deploy-5000-pods.yaml b/testdata/deploy-5000-pods.yaml
new file mode 100644
index 0000000000..6ef572dffa
--- /dev/null
+++ b/testdata/deploy-5000-pods.yaml
@@ -0,0 +1,26 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: deploy-5000-pods
+spec:
+  replicas: 1
+  selector:
+      matchLabels:
+        app: deploy-5000-pods
+  template:
+    metadata:
+      name: test-pod-5000
+      labels:
+        app: deploy-5000-pods
+        tier: backend
+        track: stable
+    spec:
+      containers:
+        - name: hello
+          image: "kubernetes/pause:latest"
+          ports:
+            - name: http
+              containerPort: 80
+          imagePullPolicy: IfNotPresent
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cni-test-multi-node-mng
diff --git a/testdata/deploy-730-pods.yaml b/testdata/deploy-730-pods.yaml
new file mode 100644
index 0000000000..97a4b913f4
--- /dev/null
+++ b/testdata/deploy-730-pods.yaml
@@ -0,0 +1,26 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: deploy-730-pods
+spec:
+  replicas: 1
+  selector:
+      matchLabels:
+        app: deploy-730-pods
+  template:
+    metadata:
+      name: test-pod-730
+      labels:
+        app: deploy-730-pods
+        tier: backend
+        track: stable
+    spec:
+      containers:
+        - name: hello
+          image: "kubernetes/pause:latest"
+          ports:
+            - name: http
+              containerPort: 80
+          imagePullPolicy: IfNotPresent
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cni-test-single-node-mng