From 0a3066cb00bd084747f4c7d876134f48cbff51b0 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 20 Mar 2024 13:59:38 -0700 Subject: [PATCH] build: Add CI for TPC-H queries --- .github/workflows/benchmark-tpch.yml | 123 +++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 .github/workflows/benchmark-tpch.yml diff --git a/.github/workflows/benchmark-tpch.yml b/.github/workflows/benchmark-tpch.yml new file mode 100644 index 0000000000..f67e8398b1 --- /dev/null +++ b/.github/workflows/benchmark-tpch.yml @@ -0,0 +1,123 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: TPC-H Correctness + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +on: + push: + paths-ignore: + - "doc/**" + - "**.md" + pull_request: + paths-ignore: + - "doc/**" + - "**.md" + # manual trigger + # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow + workflow_dispatch: + +env: + RUST_VERSION: nightly + +jobs: + prepare: + name: Build native and prepare data + runs-on: ubuntu-latest + container: + image: amd64/rust + env: + JAVA_VERSION: 11 + steps: + - uses: actions/checkout@v4 + - name: Setup Rust & Java toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{env.RUST_VERSION}} + jdk-version: 11 + - name: Cache Maven dependencies + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository + /root/.m2/repository + key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-java-maven- + - name: Cache TPC-H generated data + id: cache-tpch-sf-1 + uses: actions/cache@v4 + with: + path: ./tpch + key: tpch-${{ hashFiles('.github/workflows/benchmark-tpch.yml') }} + - name: Build Comet + run: make release + - name: Upload Comet native lib + uses: actions/upload-artifact@v4 + with: + name: libcomet-${{ github.run_id }} + path: | + core/target/release/libcomet.so + core/target/release/libcomet.dylib + retention-days: 1 # remove the artifact after 1 day, only valid for this workflow + overwrite: true + - name: Generate TPC-H (SF=1) table data + if: steps.cache-tpch-sf-1.outputs.cache-hit != 'true' + run: | + cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="org.apache.spark.sql.GenTPCHData" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="--location `pwd`/.. --scaleFactor 1 --numPartitions 1 --overwrite" + cd .. + + benchmark: + name: Run TPCHQuerySuite + runs-on: ubuntu-latest + needs: [prepare] + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + - name: Setup Rust & Java toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{env.RUST_VERSION}} + jdk-version: 11 + - name: Cache Maven dependencies + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository + /root/.m2/repository + key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-java-maven- + - name: Restore TPC-H generated data + id: cache-tpch-sf-1 + uses: actions/cache/restore@v4 + with: + path: ./tpch + key: tpch-${{ hashFiles('.github/workflows/benchmark-tpch.yml') }} + fail-on-cache-miss: true # it's always be cached as it should be generated by pre-step if not existed + - name: Download Comet native lib + uses: actions/download-artifact@v4 + with: + name: libcomet-${{ github.run_id }} + path: core/target/release + - name: Run TPC-H queries + run: | + SPARK_HOME=`pwd` SPARK_TPCH_DATA=`pwd`/tpch/sf1_parquet ./mvnw -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test