From 94156fdc7d9e96a312162d68ddf3a7f8938adc8d Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Thu, 10 Oct 2024 10:04:42 +0800 Subject: [PATCH 1/4] Initial --- dev/builddeps-veloxbe.sh | 107 ++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 0a07a568e769..cdec49dcdfd9 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -187,6 +187,60 @@ fi concat_velox_param +function prepare_build { + ( + cd $GLUTEN_DIR/ep/build-velox/src + ./get_velox.sh $VELOX_PARAMETER + ) + + if [ "$VELOX_HOME" == "" ]; then + VELOX_HOME="$GLUTEN_DIR/ep/build-velox/build/velox_ep" + fi + + OS=`uname -s` + ARCH=`uname -m` + DEPENDENCY_DIR=${DEPENDENCY_DIR:-$CURRENT_DIR/../ep/_ep} + mkdir -p ${DEPENDENCY_DIR} + + source $GLUTEN_DIR/dev/build_helper_functions.sh + if [ -z "${GLUTEN_VCPKG_ENABLED:-}" ] && [ $RUN_SETUP_SCRIPT == "ON" ]; then + echo "Start to install dependencies" + pushd $VELOX_HOME + if [ $OS == 'Linux' ]; then + setup_linux + elif [ $OS == 'Darwin' ]; then + setup_macos + else + echo "Unsupported kernel: $OS" + exit 1 + fi + if [ $ENABLE_S3 == "ON" ]; then + if [ $OS == 'Darwin' ]; then + echo "S3 is not supported on MacOS." + exit 1 + fi + ${VELOX_HOME}/scripts/setup-adapters.sh aws + fi + if [ $ENABLE_HDFS == "ON" ]; then + if [ $OS == 'Darwin' ]; then + echo "HDFS is not supported on MacOS." + exit 1 + fi + pushd $VELOX_HOME + install_libhdfs3 + popd + fi + if [ $ENABLE_GCS == "ON" ]; then + ${VELOX_HOME}/scripts/setup-adapters.sh gcs + fi + if [ $ENABLE_ABFS == "ON" ]; then + export AZURE_SDK_DISABLE_AUTO_VCPKG=ON + ${VELOX_HOME}/scripts/setup-adapters.sh abfs + fi + popd + fi +} + function build_arrow { cd $GLUTEN_DIR/dev ./build_arrow.sh @@ -216,6 +270,7 @@ function build_gluten_cpp { } function build_velox_backend { + prepare_build if [ $BUILD_ARROW == "ON" ]; then build_arrow fi @@ -223,58 +278,6 @@ function build_velox_backend { build_gluten_cpp } -( - cd $GLUTEN_DIR/ep/build-velox/src - ./get_velox.sh $VELOX_PARAMETER -) - -if [ "$VELOX_HOME" == "" ]; then - VELOX_HOME="$GLUTEN_DIR/ep/build-velox/build/velox_ep" -fi - -OS=`uname -s` -ARCH=`uname -m` -DEPENDENCY_DIR=${DEPENDENCY_DIR:-$CURRENT_DIR/../ep/_ep} -mkdir -p ${DEPENDENCY_DIR} - -source $GLUTEN_DIR/dev/build_helper_functions.sh -if [ -z "${GLUTEN_VCPKG_ENABLED:-}" ] && [ $RUN_SETUP_SCRIPT == "ON" ]; then - echo "Start to install dependencies" - pushd $VELOX_HOME - if [ $OS == 'Linux' ]; then - setup_linux - elif [ $OS == 'Darwin' ]; then - setup_macos - else - echo "Unsupported kernel: $OS" - exit 1 - fi - if [ $ENABLE_S3 == "ON" ]; then - if [ $OS == 'Darwin' ]; then - echo "S3 is not supported on MacOS." - exit 1 - fi - ${VELOX_HOME}/scripts/setup-adapters.sh aws - fi - if [ $ENABLE_HDFS == "ON" ]; then - if [ $OS == 'Darwin' ]; then - echo "HDFS is not supported on MacOS." - exit 1 - fi - pushd $VELOX_HOME - install_libhdfs3 - popd - fi - if [ $ENABLE_GCS == "ON" ]; then - ${VELOX_HOME}/scripts/setup-adapters.sh gcs - fi - if [ $ENABLE_ABFS == "ON" ]; then - export AZURE_SDK_DISABLE_AUTO_VCPKG=ON - ${VELOX_HOME}/scripts/setup-adapters.sh abfs - fi - popd -fi - commands_to_run=${OTHER_ARGUMENTS:-} ( if [[ "x$commands_to_run" == "x" ]]; then From 279564dd8b57406f29fa508b6b51feda8ebdc71f Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Thu, 10 Oct 2024 10:32:06 +0800 Subject: [PATCH 2/4] Fix build failure --- dev/builddeps-veloxbe.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index cdec49dcdfd9..bdd56f9ca54f 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -187,22 +187,23 @@ fi concat_velox_param +if [ "$VELOX_HOME" == "" ]; then + VELOX_HOME="$GLUTEN_DIR/ep/build-velox/build/velox_ep" +fi + +source $GLUTEN_DIR/dev/build_helper_functions.sh + function prepare_build { ( cd $GLUTEN_DIR/ep/build-velox/src ./get_velox.sh $VELOX_PARAMETER ) - if [ "$VELOX_HOME" == "" ]; then - VELOX_HOME="$GLUTEN_DIR/ep/build-velox/build/velox_ep" - fi - OS=`uname -s` ARCH=`uname -m` DEPENDENCY_DIR=${DEPENDENCY_DIR:-$CURRENT_DIR/../ep/_ep} mkdir -p ${DEPENDENCY_DIR} - source $GLUTEN_DIR/dev/build_helper_functions.sh if [ -z "${GLUTEN_VCPKG_ENABLED:-}" ] && [ $RUN_SETUP_SCRIPT == "ON" ]; then echo "Start to install dependencies" pushd $VELOX_HOME @@ -239,6 +240,7 @@ function prepare_build { fi popd fi + echo "Finished build preparation." } function build_arrow { From ead6e87f1ac6f2bfc71e8acf340b7a8ca481104f Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Thu, 10 Oct 2024 15:51:37 +0800 Subject: [PATCH 3/4] Enable re-using cached lib velox --- .github/workflows/velox_backend.yml | 34 +++++++++++++++++------ .github/workflows/velox_backend_cache.yml | 17 +++++------- cpp/velox/CMakeLists.txt | 2 +- dev/ci-velox-buildshared-centos-8.sh | 2 +- dev/ci-velox-buildstatic-centos-7.sh | 2 +- 5 files changed, 36 insertions(+), 21 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index d47a97d15bd4..f2fa2a9b299c 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -56,19 +56,29 @@ jobs: build-native-lib-centos-7: runs-on: ubuntu-20.04 container: apache/gluten:vcpkg-centos-7 + env: + VELOX_BUILD_PATH: "./ep/build-velox/build/velox_ep/_build/release/" steps: - uses: actions/checkout@v2 - name: Generate cache key run: | - echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', './.github/workflows/*') }} > cache-key + echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './.github/workflows/velox_backend.yml') }} > cache-key - name: Cache id: cache uses: actions/cache/restore@v3 with: path: | - ./cpp/build/releases/ + ${VELOX_BUILD_PATH}/lib/libvelox.a key: cache-velox-build-centos-7-${{ hashFiles('./cache-key') }} - - name: Build Gluten native libraries + - name: Build Gluten with cached lib Velox + if: ${{ steps.cache.outputs.cache-hit == 'true' }} + run: | + df -a + mkdir -p cache_dir && mv ${VELOX_BUILD_PATH}/* ./cache_dir/ + bash dev/ci-velox-buildstatic-centos-7.sh prepare_build + mkdir -p ${VELOX_BUILD_PATH}/ && mv ./cache_dir/* ${VELOX_BUILD_PATH}/ + bash dev/ci-velox-buildstatic-centos-7.sh build_gluten_cpp + - name: Build Velox and Gluten CPP if: ${{ steps.cache.outputs.cache-hit != 'true' }} run: | df -a @@ -1060,19 +1070,19 @@ jobs: run-cpp-test-udf-test: runs-on: ubuntu-20.04 container: ghcr.io/facebookincubator/velox-dev:centos8 + env: + VELOX_BUILD_PATH: "./ep/build-velox/build/velox_ep/_build/release/" steps: - uses: actions/checkout@v2 - name: Generate cache key run: | - echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', './.github/workflows/*') }} > cache-key + echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './.github/workflows/velox_backend.yml') }} > cache-key - name: Cache id: cache uses: actions/cache/restore@v3 with: path: | - ./cpp/build/releases/ - ./cpp/build/velox/udf/examples/ - ./cpp/build/velox/benchmarks/ + ${VELOX_BUILD_PATH} /root/.m2/repository/org/apache/arrow/ key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }} - name: Setup java and maven @@ -1081,7 +1091,15 @@ jobs: sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true yum install sudo patch java-1.8.0-openjdk-devel wget -y $SETUP install_maven - - name: Build Gluten native libraries + - name: Build Gluten with cached lib Velox + if: steps.cache.outputs.cache-hit == 'true' + run: | + df -a + mkdir -p cache_dir && mv ${VELOX_BUILD_PATH}/* ./cache_dir/ + bash dev/ci-velox-buildshared-centos-8.sh prepare_build + mkdir -p ${VELOX_BUILD_PATH}/ && mv ./cache_dir/* ${VELOX_BUILD_PATH}/ + bash dev/ci-velox-buildshared-centos-8.sh build_gluten_cpp + - name: Build Velox and Gluten CPP if: steps.cache.outputs.cache-hit != 'true' run: | df -a diff --git a/.github/workflows/velox_backend_cache.yml b/.github/workflows/velox_backend_cache.yml index a25eda9367d6..2ae2620d41c4 100644 --- a/.github/workflows/velox_backend_cache.yml +++ b/.github/workflows/velox_backend_cache.yml @@ -22,6 +22,7 @@ on: env: ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + VELOX_BUILD_PATH: "./ep/build-velox/build/velox_ep/_build/release/" concurrency: group: ${{ github.repository }}-${{ github.workflow }} @@ -35,14 +36,14 @@ jobs: - uses: actions/checkout@v2 - name: Generate cache key run: | - echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', './.github/workflows/*') }} > cache-key + echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './.github/workflows/velox_backend.yml') }} > cache-key - name: Check existing caches id: check-cache uses: actions/cache/restore@v3 with: lookup-only: true path: | - ./cpp/build/releases/ + ${VELOX_BUILD_PATH}/lib/libvelox.a key: cache-velox-build-centos-7-${{ hashFiles('./cache-key') }} - name: Build Gluten native libraries if: steps.check-cache.outputs.cache-hit != 'true' @@ -55,7 +56,7 @@ jobs: uses: actions/cache/save@v3 with: path: | - ./cpp/build/releases/ + ${VELOX_BUILD_PATH}/lib/libvelox.a key: cache-velox-build-centos-7-${{ hashFiles('./cache-key') }} cache-native-lib-centos-8: @@ -65,16 +66,14 @@ jobs: - uses: actions/checkout@v2 - name: Generate cache key run: | - echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', './.github/workflows/*') }} > cache-key + echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './.github/workflows/velox_backend.yml') }} > cache-key - name: Check existing caches id: check-cache uses: actions/cache/restore@v3 with: lookup-only: true path: | - ./cpp/build/releases/ - ./cpp/build/velox/udf/examples/ - ./cpp/build/velox/benchmarks/ + ${VELOX_BUILD_PATH}/ /root/.m2/repository/org/apache/arrow/ key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }} - name: Setup java and maven @@ -95,9 +94,7 @@ jobs: uses: actions/cache/save@v3 with: path: | - ./cpp/build/releases/ - ./cpp/build/velox/udf/examples/ - ./cpp/build/velox/benchmarks/ + ${VELOX_BUILD_PATH} /root/.m2/repository/org/apache/arrow/ key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }} diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt index f872df47bf01..baec45b61da2 100644 --- a/cpp/velox/CMakeLists.txt +++ b/cpp/velox/CMakeLists.txt @@ -76,7 +76,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SCRIPT_CXX_FLAGS}") message("Velox module final CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}") -# User can specify VELOX_BUILD_PATH, if Velox are built elsewhere. +# User can specify VELOX_BUILD_PATH, if Velox is built elsewhere. if(NOT DEFINED VELOX_BUILD_PATH) if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(VELOX_BUILD_PATH diff --git a/dev/ci-velox-buildshared-centos-8.sh b/dev/ci-velox-buildshared-centos-8.sh index b6b0cda02d28..2aaaee0402b1 100755 --- a/dev/ci-velox-buildshared-centos-8.sh +++ b/dev/ci-velox-buildshared-centos-8.sh @@ -4,4 +4,4 @@ set -e source /opt/rh/gcc-toolset-9/enable ./dev/builddeps-veloxbe.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_tests=ON \ - --build_examples=ON --build_benchmarks=ON --build_protobuf=ON + --build_examples=ON --build_benchmarks=ON --build_protobuf=ON $@ diff --git a/dev/ci-velox-buildstatic-centos-7.sh b/dev/ci-velox-buildstatic-centos-7.sh index 3272de95d910..8bd9295924fe 100755 --- a/dev/ci-velox-buildstatic-centos-7.sh +++ b/dev/ci-velox-buildstatic-centos-7.sh @@ -5,4 +5,4 @@ set -e source /opt/rh/devtoolset-9/enable export NUM_THREADS=4 ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \ - --build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON + --build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON $@ From b8ebcaddeafd85bb760330c0e79885f86d0b8346 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Fri, 11 Oct 2024 15:47:11 +0800 Subject: [PATCH 4/4] Minor changes --- .github/workflows/velox_backend.yml | 5 +++-- .github/workflows/velox_backend_cache.yml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index f2fa2a9b299c..ae0aa98f48ed 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -82,7 +82,7 @@ jobs: if: ${{ steps.cache.outputs.cache-hit != 'true' }} run: | df -a - cd $GITHUB_WORKSPACE/ + rm -rf ./ep/build-velox/build/velox_ep bash dev/ci-velox-buildstatic-centos-7.sh - uses: actions/upload-artifact@v3 with: @@ -1091,7 +1091,7 @@ jobs: sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true yum install sudo patch java-1.8.0-openjdk-devel wget -y $SETUP install_maven - - name: Build Gluten with cached lib Velox + - name: Build Gluten with cached Velox build binary if: steps.cache.outputs.cache-hit == 'true' run: | df -a @@ -1103,6 +1103,7 @@ jobs: if: steps.cache.outputs.cache-hit != 'true' run: | df -a + rm -rf ./ep/build-velox/build/velox_ep bash dev/ci-velox-buildshared-centos-8.sh - name: Run CPP unit test run: | diff --git a/.github/workflows/velox_backend_cache.yml b/.github/workflows/velox_backend_cache.yml index 2ae2620d41c4..ae28ac270c8f 100644 --- a/.github/workflows/velox_backend_cache.yml +++ b/.github/workflows/velox_backend_cache.yml @@ -73,7 +73,7 @@ jobs: with: lookup-only: true path: | - ${VELOX_BUILD_PATH}/ + ${VELOX_BUILD_PATH} /root/.m2/repository/org/apache/arrow/ key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }} - name: Setup java and maven