From 41b721b904503035599492ba7374011d48d20445 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Tue, 25 Feb 2025 17:31:24 -0800 Subject: [PATCH] Adding jar build --- .github/workflows/jarbuild.yml | 552 +++++++++++++++++++++++++++++++++ 1 file changed, 552 insertions(+) create mode 100644 .github/workflows/jarbuild.yml diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml new file mode 100644 index 00000000..ae2981cd --- /dev/null +++ b/.github/workflows/jarbuild.yml @@ -0,0 +1,552 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: JarBuild +on: + workflow_dispatch: + inputs: + arrow_branch: + description: 'Arrow branch to build' + required: true + default: main + arrow_repo: + description: 'Arrow repository' + type: string + required: true + default: apache/arrow + release_tag_name: + description: 'release tag name' + type: string + required: true + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true +permissions: + contents: read +jobs: + source: + name: Source + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: recursive + - name: Set env + run: echo "release_tag_name=$(echo $release_tag_name)" >> $GITHUB_ENV + - name: Prepare for tag + run: | + echo "${{github.event.inputs.release_tag_name}}" + ver=$(echo ${{github.event.inputs.release_tag_name}}) + version=${ver%-rc*} + version=${version#v} + rc=${ver#*-rc} + echo "VERSION=${version}" >> ${GITHUB_ENV} + echo "VERSION=${version}" + echo "RC=${rc}" >> ${GITHUB_ENV} + echo "RC=${rc}" + - name: Archive + run: | + id="apache-arrow-java-${VERSION}" + tar_gz="${id}.tar.gz" + echo "TAR_GZ=${tar_gz}" >> ${GITHUB_ENV} + git archive HEAD --prefix "${id}/" --output "${tar_gz}" + sha256sum "${tar_gz}" > "${tar_gz}.sha256" + sha512sum "${tar_gz}" > "${tar_gz}.sha512" + - name: Audit + run: | + dev/release/run_rat.sh "${TAR_GZ}" + - name: Upload source archive + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: release-source + path: | + apache-arrow-java-* + jni-linux: + name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs_on }} + timeout-minutes: 120 + needs: + - source + strategy: + fail-fast: false + matrix: + platform: + - runs_on: ubuntu-latest + arch: "x86_64" + archery_arch: "amd64" + - runs_on: ubuntu-24.04-arm + arch: "aarch_64" + archery_arch: "arm64v8" + env: + # architecture name used for archery build + ARCH: ${{ matrix.platform.archery_arch }} + DOCKER_VOLUME_PREFIX: .docker/ + permissions: + contents: read + packages: write + steps: + - name: Download source archive + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: release-source + - name: Extract source archive + run: | + tar -xf apache-arrow-java-*.tar.gz --strip-components=1 + # - name: Download the latest Apache Arrow C++ + # if: github.event_name != 'schedule' + # run: | + # ci/scripts/download_cpp.sh + - name: Checkout Apache Arrow C++ + # if: github.event_name == 'schedule' + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: ${{github.event.inputs.arrow_repo}} + ref: ${{github.event.inputs.arrow_branch}} + path: arrow + - name: Checkout apache/arrow-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/arrow-testing + path: arrow/testing + - name: Checkout apache/parquet-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/parquet-testing + path: arrow/cpp/submodules/parquet-testing + - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Cache + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: .docker + key: jni-linux-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} + restore-keys: jni-linux-${{ matrix.platform.arch }}- + - name: Build + run: | + docker compose run vcpkg-jni + - name: Push Docker image + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow-java' && github.ref_name == 'main' + run: | + docker compose push vcpkg-jni + - name: Compress into single artifact to keep directory structure + run: tar -cvzf jni-linux-${{ matrix.platform.arch }}.tar.gz jni/ + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: jni-linux-${{ matrix.platform.arch }} + path: jni-linux-${{ matrix.platform.arch }}.tar.gz + jni-macos: + name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs_on }} + timeout-minutes: 45 + needs: + - source + strategy: + fail-fast: false + matrix: + platform: + - { runs_on: macos-13, arch: "x86_64"} + - { runs_on: macos-14, arch: "aarch_64" } + env: + MACOSX_DEPLOYMENT_TARGET: "14.0" + steps: + - name: Download source archive + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: release-source + - name: Extract source archive + run: | + tar -xf apache-arrow-java-*.tar.gz --strip-components=1 + # - name: Download the latest Apache Arrow C++ + # if: github.event_name != 'schedule' + # run: | + # ci/scripts/download_cpp.sh + - name: Checkout Apache Arrow C++ + # if: github.event_name == 'schedule' + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: ${{github.event.inputs.arrow_repo}} + ref: ${{github.event.inputs.arrow_branch}} + path: arrow + - name: Checkout apache/arrow-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/arrow-testing + path: arrow/testing + - name: Checkout apache/parquet-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/parquet-testing + path: arrow/cpp/submodules/parquet-testing + - name: Set up Python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + cache: 'pip' + python-version: 3.12 + - name: Install Archery + run: pip install -e arrow/dev/archery[all] + - name: Install dependencies + run: | + # We want to use llvm@14 to avoid shared z3 + # dependency. llvm@14 doesn't depend on z3 and llvm depends + # on z3. And Homebrew's z3 provides only shared library. It + # doesn't provides static z3 because z3's CMake doesn't accept + # building both shared and static libraries at once. + # See also: Z3_BUILD_LIBZ3_SHARED in + # https://github.com/Z3Prover/z3/blob/master/README-CMake.md + # + # If llvm is installed, Apache Arrow C++ uses llvm rather than + # llvm@14 because llvm is newer than llvm@14. + brew uninstall llvm || : + + # Ensure updating python@XXX with the "--overwrite" option. + # If python@XXX is updated without "--overwrite", it causes + # a conflict error. Because Python 3 installed not by + # Homebrew exists in /usr/local on GitHub Actions. If + # Homebrew's python@XXX is updated without "--overwrite", it + # tries to replace /usr/local/bin/2to3 and so on and causes + # a conflict error. + brew update + for python_package in $(brew list | grep python@); do + brew install --overwrite ${python_package} + done + brew install --overwrite python + + if [ "$(uname -m)" = "arm64" ]; then + # pkg-config formula is deprecated but it's still installed + # in GitHub Actions runner now. We can remove this once + # pkg-config formula is removed from GitHub Actions runner. + brew uninstall pkg-config || : + brew uninstall pkg-config@0.29.2 || : + fi + + brew bundle --file=arrow/cpp/Brewfile + # We want to link aws-sdk-cpp statically but Homebrew's + # aws-sdk-cpp provides only shared library. If we have + # Homebrew's aws-sdk-cpp, our build mix Homebrew's + # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's + # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. + brew uninstall aws-sdk-cpp + # We want to use bundled RE2 for static linking. If + # Homebrew's RE2 is installed, its header file may be used. + # We uninstall Homebrew's RE2 to ensure using bundled RE2. + brew uninstall grpc || : # gRPC depends on RE2 + brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too + brew uninstall re2 + # We want to use bundled Protobuf for static linking. If + # Homebrew's Protobuf is installed, its library file may be + # used on test We uninstall Homebrew's Protobuf to ensure using + # bundled Protobuf. + brew uninstall protobuf + + brew bundle --file=Brewfile + - name: Prepare ccache + run: | + echo "CCACHE_DIR=${PWD}/ccache" >> ${GITHUB_ENV} + - name: Cache ccache + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: ccache + key: jni-macos-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} + restore-keys: jni-macos-${{ matrix.platform.arch }}- + - name: Build + run: | + set -e + # make brew Java available to CMake + export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home + ci/scripts/jni_macos_build.sh . arrow build jni + - name: Compress into single artifact to keep directory structure + run: tar -cvzf jni-macos-${{ matrix.platform.arch }}.tar.gz jni/ + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: jni-macos-${{ matrix.platform.arch }} + path: jni-macos-${{ matrix.platform.arch }}.tar.gz + jni-windows: + name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs_on }} + timeout-minutes: 45 + needs: + - source + strategy: + fail-fast: false + matrix: + platform: + - runs_on: windows-2019 + arch: "x86_64" + steps: + - name: Download source archive + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: release-source + - name: Extract source archive + shell: bash + run: | + tar -xf apache-arrow-java-*.tar.gz --strip-components=1 + # - name: Download the latest Apache Arrow C++ + # if: github.event_name != 'schedule' + # shell: bash + # run: | + # ci/scripts/download_cpp.sh + - name: Checkout Apache Arrow C++ + # if: github.event_name == 'schedule' + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: ${{github.event.inputs.arrow_repo}} + ref: ${{github.event.inputs.arrow_branch}} + path: arrow + - name: Set up Java + uses: actions/setup-java@7a6d8a8234af8eb26422e24e3006232cccaa061b # v4.6.0 + with: + java-version: '11' + distribution: 'temurin' + - name: Download Timezone Database + shell: bash + run: | + arrow/ci/scripts/download_tz_database.sh + - name: Install ccache + shell: bash + run: | + env | sort + version=4.10.2 + base_name="ccache-${version}-windows-x86_64" + url="https://github.com/ccache/ccache/releases/download/v${version}/${base_name}.zip" + curl --fail --location --remote-name "${url}" + unzip "${base_name}.zip" + chmod +x "${base_name}/ccache.exe" + mv "${base_name}/ccache.exe" /usr/bin/ + rm -rf "${base_name}"{,.zip} + - name: Prepare ccache + shell: bash + run: | + echo "CCACHE_DIR=${PWD}/ccache" >> ${GITHUB_ENV} + - name: Cache ccache + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: ccache + key: jni-windows-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} + restore-keys: jni-windows-${{ matrix.platform.arch }}- + - name: Build + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + REM For ORC + set TZDIR=/c/msys64/usr/share/zoneinfo + bash -c "ci/scripts/jni_windows_build.sh . arrow build jni" + - name: Compress into single artifact to keep directory structure + shell: bash + run: tar -cvzf jni-windows-${{ matrix.platform.arch }}.tar.gz jni/ + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: jni-windows-${{ matrix.platform.arch }} + path: jni-windows-${{ matrix.platform.arch }}.tar.gz + binaries: + name: Binaries + runs-on: ubuntu-latest + needs: + - jni-linux + - jni-macos + - jni-windows + steps: + - name: Download artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + path: artifacts + - name: Decompress artifacts + run: | + mv artifacts/*/*.tar.gz . + tar -xf apache-arrow-java-*.tar.gz --strip-components=1 + tar -xvzf jni-linux-x86_64.tar.gz + tar -xvzf jni-linux-aarch_64.tar.gz + tar -xvzf jni-macos-x86_64.tar.gz + tar -xvzf jni-macos-aarch_64.tar.gz + tar -xvzf jni-windows-x86_64.tar.gz + - name: Test that shared libraries exist + run: | + set -x + + test -f jni/arrow_cdata_jni/x86_64/libarrow_cdata_jni.so + test -f jni/arrow_dataset_jni/x86_64/libarrow_dataset_jni.so + test -f jni/arrow_orc_jni/x86_64/libarrow_orc_jni.so + test -f jni/gandiva_jni/x86_64/libgandiva_jni.so + + test -f jni/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.so + test -f jni/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.so + test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.so + test -f jni/gandiva_jni/aarch_64/libgandiva_jni.so + + test -f jni/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib + test -f jni/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib + test -f jni/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib + test -f jni/gandiva_jni/x86_64/libgandiva_jni.dylib + + test -f jni/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib + test -f jni/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib + test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib + test -f jni/gandiva_jni/aarch_64/libgandiva_jni.dylib + + test -f jni/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll + test -f jni/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll + test -f jni/arrow_orc_jni/x86_64/arrow_orc_jni.dll + - name: Checkout apache/arrow-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/arrow-testing + path: testing + - name: Cache ~/.m2 + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: ~/.m2 + key: binaries-build-${{ hashFiles('**/*.java', '**/pom.xml') }} + restore-keys: binaries-build- + - name: Build bundled JAR and docs + run: | + ci/scripts/jni_full_build.sh . jni binaries + - name: Prepare docs + run: | + mkdir -p docs + cp -a target/site/apidocs reference + tar -cvzf reference.tar.gz reference + - name: Upload binaries + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: release-binaries + path: binaries/* + - name: Upload docs + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: reference + path: reference.tar.gz + docs: + name: Docs + needs: + - binaries + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + cache: 'pip' + - name: Download source archive + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: release-source + - name: Download Javadocs + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: reference + - name: Extract source archive + run: | + tar -xf apache-arrow-java-*.tar.gz --strip-components=1 + - name: Build + run: | + cd docs + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + make html + tar -xf ../reference.tar.gz -C build/html + - name: Compress into single artifact to keep directory structure + run: tar -cvzf html.tar.gz -C docs/build html + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: release-html + path: html.tar.gz + verify: + name: Verify + needs: + - binaries + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - macos-latest + - ubuntu-latest + steps: + - name: Download release artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: release-* + - name: Verify + run: | + mv release-source/* ./ + tar_gz=$(echo apache-arrow-java-*.tar.gz) + version=${tar_gz#apache-arrow-java-} + version=${version%.tar.gz} + # rc isn't used with VERIFY_DOWNLOAD=0 + if [ "${GITHUB_REF_TYPE}" = "tag" ]; then + rc="${GITHUB_REF_NAME#*-rc}" + else + rc=$(date +%Y%m%d) + fi + tar -xf ${tar_gz} + export VERIFY_DEFAULT=0 + export VERIFY_BINARY=1 + export VERIFY_SOURCE=1 + cd apache-arrow-java-${version} + mv ../${tar_gz}* ./ + mv ../release-binaries binaries + dev/release/verify_rc.sh "${version}" "${rc}" + upload: + name: Upload + needs: + - docs + - verify + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Download release artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: release-* + path: artifacts + - name: Upload + run: | + # GH-499: How to create release notes? + echo "${{github.event.inputs.release_tag_name}}" + ver=$(echo ${{github.event.inputs.release_tag_name}}) + version=${ver%-rc*} + version=${version#v} + rc=${ver#*-rc} + gh release create ${{github.event.inputs.release_tag_name}} \ + --generate-notes \ + --prerelease \ + --repo ${GITHUB_REPOSITORY} \ + --title "Apache Arrow Java ${version} RC${rc}" \ + --verify-tag + # GitHub CLI does not respect their own rate limits + # https://github.com/cli/cli/issues/9586 + for artifact in artifacts/*/*; do + sleep 1 + gh release upload ${{github.event.inputs.release_tag_name}} \ + --repo ${GITHUB_REPOSITORY} \ + $artifact + done + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}