From 5e6aab49a046c19e85f2177df440c38c7277dc08 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Fri, 22 Jul 2022 09:21:08 -0700
Subject: [PATCH] [SPARK-38597][K8S][INFRA] Enable Spark on K8S integration
 tests

### What changes were proposed in this pull request?
Enable Spark on K8S integration tests in Github Action based on minikube:
- The K8S IT will always triggered in user fork repo and `apache/spark` merged commits to master branch
- This PR does NOT contains Volcano related test due to limited resource of github action.
- minikube installation is allowed by Apache Infra: [INFRA-23000](https://issues.apache.org/jira/projects/INFRA/issues/INFRA-23000)
- Why setting driver 0.5 cpu, executor 0.2 cpu?
  * Github-hosted runner hardware limited: [2U7G](https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources), so cpu resource is very limited.
  * IT Job available CPU = 2U - 0.85U (K8S deploy) = 1.15U
  * There are 1.15 cpu left after k8s installation, to meet the requirement of K8S tests (one driver + max to 3 executors).
  * For memory: 6947 is maximum (Otherwise raise `Exiting due to RSRC_OVER_ALLOC_MEM: Requested memory allocation 7168MB is more than your system limit 6947MB.`), but this is not integer multiple of 1024, so I just set this to 6144 for better resource statistic.

- Time cost info:

  * 14 mins to compile related code.
  * 3 mins to build docker images.
  * 20-30 mins to test
  * Total: about 30-40 mins

### Why are the changes needed?

This will also improve the efficiency of K8S development and guarantee the quality of spark on K8S and spark docker image in some level.

### Does this PR introduce _any_ user-facing change?
No, dev only.

### How was this patch tested?
CI passed

Closes #35830

Closes #37244 from Yikun/SPARK-38597-k8s-it.

Authored-by: Yikun Jiang <yikunkero@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .github/workflows/build_and_test.yml | 73 +++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 02b799891fd7f..1902468e90cad 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -99,7 +99,8 @@ jobs:
               \"docker-integration-tests\": \"$docker\",
               \"scala-213\": \"true\",
               \"java-11-17\": \"true\",
-              \"lint\" : \"true\"
+              \"lint\" : \"true\",
+              \"k8s-integration-tests\" : \"true\",
             }"
           echo $precondition # For debugging
           # GitHub Actions set-output doesn't take newlines
@@ -869,3 +870,73 @@ jobs:
       with:
         name: unit-tests-log-docker-integration--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/unit-tests.log"
+
+  k8s-integration-tests:
+    needs: precondition
+    if: fromJson(needs.precondition.outputs.required).k8s-integration-tests == 'true'
+    name: Run Spark on Kubernetes Integration test
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Checkout Spark repository
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+          repository: apache/spark
+          ref: ${{ inputs.branch }}
+      - name: Sync the current branch with the latest in Apache Spark
+        if: github.repository != 'apache/spark'
+        run: |
+          echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+          git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
+          git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
+          git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
+      - name: Cache Scala, SBT and Maven
+        uses: actions/cache@v2
+        with:
+          path: |
+            build/apache-maven-*
+            build/scala-*
+            build/*.jar
+            ~/.sbt
+          key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+          restore-keys: |
+            build-
+      - name: Cache Coursier local repository
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/coursier
+          key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+          restore-keys: |
+            k8s-integration-coursier-
+      - name: Install Java ${{ inputs.java }}
+        uses: actions/setup-java@v1
+        with:
+          java-version: ${{ inputs.java }}
+      - name: start minikube
+        run: |
+          # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/
+          curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
+          sudo install minikube-linux-amd64 /usr/local/bin/minikube
+          # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
+          minikube start --cpus 2 --memory 6144
+      - name: Print K8S pods and nodes info
+        run: |
+          kubectl get pods -A
+          kubectl describe node
+      - name: Run Spark on K8S integration test (With driver cpu 0.5, executor cpu 0.2 limited)
+        run: |
+          # Prepare PV test
+          PVC_TMP_DIR=$(mktemp -d)
+          export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR
+          export PVC_TESTS_VM_PATH=$PVC_TMP_DIR
+          minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 &
+          kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
+          eval $(minikube docker-env)
+          # - Exclude Volcano test (-Pvolcano), batch jobs need more CPU resource
+          build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 "kubernetes-integration-tests/test"
+      - name: Upload Spark on K8S integration tests log files
+        if: failure()
+        uses: actions/upload-artifact@v2
+        with:
+          name: spark-on-kubernetes-it-log
+          path: "**/target/integration-tests.log"