Skip to content

feat: Add dictionary binary to shuffle writer #48

feat: Add dictionary binary to shuffle writer

feat: Add dictionary binary to shuffle writer #48

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: PR Build
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
on:
push:
paths-ignore:
- "doc/**"
- "**.md"
pull_request:
paths-ignore:
- "doc/**"
- "**.md"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
env:
RUST_VERSION: nightly
jobs:
linux-test:
strategy:
matrix:
os: [ubuntu-latest]
java_version: [8, 11, 17]
test-target: [rust, java]
fail-fast: false
name: ${{ matrix.test-target }} test on ${{ matrix.os }} with java ${{ matrix.java_version }}
runs-on: ${{ matrix.os }}
container:
image: amd64/rust
env:
JAVA_VERSION: ${{ matrix.java_version == 8 && '1.8' || format('{0}', matrix.java_version) }}
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java_version }}
- uses: actions/checkout@v4
- if: matrix.test-target == 'rust'
name: Rust test steps
uses: ./.github/actions/rust-test
- if: matrix.test-target == 'java'
name: Java test steps
uses: ./.github/actions/java-test
macos-test:
strategy:
matrix:
os: [macos-latest]
java_version: [8, 11, 17]
test-target: [rust, java]
is_push_event:
- ${{ github.event_name == 'push' }}
exclude: # exclude java 11 for pull_request event
- java_version: 11
is_push_event: false
fail-fast: false
name: ${{ matrix.test-target }} test on ${{ matrix.os }} with java ${{ matrix.java_version }}
runs-on: ${{ matrix.os }}
env:
JAVA_VERSION: ${{ matrix.java_version == 8 && '1.8' || format('{0}', matrix.java_version) }}
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-macos-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java_version }}
- uses: actions/checkout@v4
- if: matrix.test-target == 'rust'
name: Rust test steps
uses: ./.github/actions/rust-test
- if: matrix.test-target == 'java'
name: Java test steps
uses: ./.github/actions/java-test
macos-aarch64-test:
strategy:
matrix:
java_version: [8, 11, 17]
test-target: [rust, java]
is_push_event:
- ${{ github.event_name == 'push' }}
exclude: # exclude java 11 for pull_request event
- java_version: 11
is_push_event: false
fail-fast: false
name: ${{ matrix.test-target }} test on macos-aarch64 with java ${{ matrix.java_version }}
runs-on: macos-14
env:
JAVA_VERSION: ${{ matrix.java_version == 8 && '1.8' || format('{0}', matrix.java_version) }}
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-macos-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java_version }}
jdk-architecture: aarch64
protoc-architecture: aarch_64
- uses: actions/checkout@v4
- if: matrix.test-target == 'rust'
name: Rust test steps
uses: ./.github/actions/rust-test
- if: matrix.test-target == 'java'
name: Java test steps
uses: ./.github/actions/java-test
tpcds-1g:
name: Run TPC-DS queries with SF=1
runs-on: ubuntu-latest
container:
image: amd64/rust
env:
JAVA_VERSION: 11
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: 11
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v4
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/pr_build.yml') }}
- name: Checkout tpcds-kit repository
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: databricks/tpcds-kit
path: ./tpcds-kit
- name: Build Comet
run: make release
- name: Build tpcds-kit
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: |
apt-get install -y yacc bison flex
cd tpcds-kit/tools && make OS=LINUX
- name: Generate TPC-DS (SF=1) table data
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: |
cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="org.apache.spark.sql.GenTPCDSData" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="--dsdgenDir `pwd`/../tpcds-kit/tools --location `pwd`/../tpcds-sf-1 --scaleFactor 1 --numPartitions 1"
cd ..
- name: Run TPC-DS queries (Sort merge join)
run: |
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
env:
SPARK_TPCDS_JOIN_CONF: |
spark.sql.autoBroadcastJoinThreshold=-1
spark.sql.join.preferSortMergeJoin=true
- name: Run TPC-DS queries (Broadcast hash join)
run: |
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
env:
SPARK_TPCDS_JOIN_CONF: |
spark.sql.autoBroadcastJoinThreshold=10485760
- name: Run TPC-DS queries (Shuffled hash join)
run: |
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
env:
SPARK_TPCDS_JOIN_CONF: |
spark.sql.autoBroadcastJoinThreshold=-1
spark.sql.join.forceApplyShuffledHashJoin=true