Skip to content

Allow getting a ParquetFileReader and the SchemaManifest from an Arro… #1673

Allow getting a ParquetFileReader and the SchemaManifest from an Arro…

Allow getting a ParquetFileReader and the SchemaManifest from an Arro… #1673

Workflow file for this run

name: CI
on:
push:
pull_request:
schedule:
# Run daily at 00:00 so we get notified if CI is broken before a pull request
# is submitted.
- cron: '0 0 * * *'
env:
DOTNET_CLI_TELEMETRY_OPTOUT: true
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
permissions:
contents: read
jobs:
# Note that vcpkg dependencies takes the majority of the build time.
# We cache them using GitHub Actions cache, making the scripts below a bit more complex.
check-format:
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event.pull_request.head.repo.id != github.event.pull_request.base.repo.id
name: Check format
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup .NET SDK v7.0.x
uses: actions/setup-dotnet@v3
with:
dotnet-version: 7.0.x
- name: Code formating check
run: |
dotnet tool restore
dotnet jb cleanupcode "csharp" "csharp.test" "csharp.benchmark" --profile="Built-in: Reformat Code" --settings="ParquetSharp.DotSettings" --verbosity=WARN
files=($(git diff --name-only))
if [ ${#files[@]} -gt 0 ]
then
for file in $files; do echo "::error file=$file::Code format check failed"; done
exit 1
fi
# Build everything on all platorms (thus testing the developer workflow).
# Upload the native shared libraries as artifacts.
build-native:
# Do not run this job for pull requests where both branches are from the same repo.
# Other jobs will be skipped too, as they depend on this one.
# This prevents duplicate CI runs for our own pull requests, whilst preserving the ability to
# run the CI for each branch push to a fork, and for each pull request originating from a fork.
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event.pull_request.head.repo.id != github.event.pull_request.base.repo.id
strategy:
matrix:
os: [ubuntu-20.04, macos-11, windows-2022]
arch: [x64, arm64]
exclude:
- os: windows-2022
arch: arm64
fail-fast: false
name: Build native ${{ matrix.arch }} library (${{ matrix.os }})
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v3
# Compute vcpkg triplet and root
- name: Compute vcpkg triplet and root
id: vcpkg-info
run: |
triplet="${{ matrix.arch }}-"
case ${{ runner.os }} in
Linux)
triplet+="linux"
;;
macOS)
triplet+="osx"
;;
Windows)
triplet+="windows-static"
;;
esac
echo "triplet=$triplet" >> $GITHUB_OUTPUT
echo "root=$VCPKG_INSTALLATION_ROOT" >> $GITHUB_OUTPUT
shell: bash
# Get cmake version, which is used by vcpkg binary caching
- name: Get cmake version
id: cmake-info
run: echo "version=$(cmake --version | head -n1 | awk '{print $3}')" >> $GITHUB_OUTPUT
shell: bash
- name: Runner-info
id: runner-info
run: |
echo "info=$ImageOS-$ImageVersion" >> $GITHUB_OUTPUT
shell: bash
# Check for cached vcpkg dependencies (use these if we can).
- name: Get cached vcpkg dependencies
id: get-cached-vcpkg
uses: actions/cache@v3
with:
path: cache/vcpkg
# https://vcpkg.readthedocs.io/en/stable/users/binarycaching/
# Binary caching relies on hashing everything that contributes to a particular package build. This includes:
# - The triplet file and name
# - The C and C++ compilers executable
# - The version of CMake used
# - Every file in the port directory
# - & more... (subject to change without notice)
#
# We use Vcpkg and C/C++ compilers which are preinstalled on the runner, so we include the runner's image identity as part of the hash key.
key: vcpkg-${{ steps.vcpkg-info.outputs.triplet }}-cmake:${{ steps.cmake-info.outputs.version }}-vcpkg_json:${{ hashFiles('vcpkg*.json') }}-runner:${{ steps.runner-info.outputs.info }}
restore-keys: |
vcpkg-${{ steps.vcpkg-info.outputs.triplet }}-cmake:${{ steps.cmake-info.outputs.version }}-vcpkg_json:${{ hashFiles('vcpkg*.json') }}
vcpkg-${{ steps.vcpkg-info.outputs.triplet }}-cmake:${{ steps.cmake-info.outputs.version }}
vcpkg-${{ steps.vcpkg-info.outputs.triplet }}
# Ensure vcpkg builtin registry is up-to-date
- name: Update vcpkg builtin registry
working-directory: ${{ steps.vcpkg-info.outputs.root }}
run: |
git reset --hard
git pull
# Setup a CentOS 7 container to build on Linux x64 for backwards compatibility.
- name: Start CentOS container and install toolchain
if: runner.os == 'Linux' && matrix.arch == 'x64'
run: |
docker run -d --name centos --entrypoint tail -v $PWD:$PWD -v $VCPKG_INSTALLATION_ROOT:$VCPKG_INSTALLATION_ROOT centos:7 -f /dev/null
docker exec centos sh -c "yum install -y centos-release-scl epel-release && \
yum install -y devtoolset-7 rh-git227 flex bison perl-Data-Dumper perl-IPC-Cmd && \
curl -fsSL -o /tmp/cmake.sh https://github.com/Kitware/CMake/releases/download/v${{ steps.cmake-info.outputs.version }}/cmake-${{ steps.cmake-info.outputs.version }}-linux-x86_64.sh && \
sh /tmp/cmake.sh --skip-license --prefix=/usr/local && \
rm /tmp/cmake.sh"
# Install arm64 cross-compilation toolchain if required
- name: Install arm64 cross-compilation toolchain
if: runner.os == 'Linux' && matrix.arch == 'arm64'
run: |
sudo apt-get update
sudo apt install g++-aarch64-linux-gnu
# Install vcpkg dependencies
- name: Install vcpkg build dependencies (macOS)
if: runner.os == 'macOS'
run: brew install bison
# .NET Core Setup (and also MSBuild for Windows).
- name: Setup .NET SDK v7.0.x
uses: actions/setup-dotnet@v3
with:
dotnet-version: 7.0.x
- name: Setup MSBuild
if: runner.os == 'Windows'
uses: microsoft/setup-msbuild@v1
# Compile ParquetSharp and C++ dependencies (and upload the native library as an artifact).
- name: Compile native ParquetSharp library (Unix)
if: runner.os == 'Linux' || runner.os == 'macOS'
run: |
if [ "${{ runner.os }}" == "Linux" ] && [ "${{ matrix.arch }}" == "x64" ]; then
exec="docker exec -w $PWD -e GITHUB_ACTIONS -e VCPKG_BINARY_SOURCES -e VCPKG_INSTALLATION_ROOT centos scl enable devtoolset-7 rh-git227 --"
fi
$exec ./build_unix.sh ${{ matrix.arch }}
env:
VCPKG_BINARY_SOURCES: clear;files,${{ github.workspace }}/cache/vcpkg,readwrite
- name: Compile native ParquetSharp library (Windows)
if: runner.os == 'Windows'
run: ./build_windows.ps1
env:
VCPKG_BINARY_SOURCES: clear;files,${{ github.workspace }}/cache/vcpkg,readwrite
- name: Upload vcpkg arrow logs
if: steps.get-cached-vcpkg.outputs.cache-hit != 'true' && (success() || failure())
uses: actions/upload-artifact@v3
with:
name: ${{ steps.vcpkg-info.outputs.triplet }}-vcpkg-arrow-logs
path: ${{ steps.vcpkg-info.outputs.root }}/buildtrees/arrow/*.log
- name: Dump vcpkg arrow abi info (Unix)
if: runner.os == 'Linux' || runner.os == 'macOS'
run: cat ./build/${{ steps.vcpkg-info.outputs.triplet }}-release/vcpkg_installed/${{ steps.vcpkg-info.outputs.triplet }}/share/arrow/vcpkg_abi_info.txt
- name: Dump vcpkg arrow abi info (Windows)
if: runner.os == 'Windows'
run: Get-Content ./build/${{ steps.vcpkg-info.outputs.triplet }}/vcpkg_installed/${{ steps.vcpkg-info.outputs.triplet }}/share/arrow/vcpkg_abi_info.txt
- name: Build .NET benchmarks & unit tests
run: |
dotnet build csharp.benchmark --configuration=Release -p:OSArchitecture=${{ matrix.arch }}
dotnet build csharp.test --configuration=Release -p:OSArchitecture=${{ matrix.arch }}
dotnet build fsharp.test --configuration=Release -p:OSArchitecture=${{ matrix.arch }}
- name: Upload native ParquetSharp library
uses: actions/upload-artifact@v3
with:
name: ${{ steps.vcpkg-info.outputs.triplet }}-native-library
path: bin
- name: Stop CentOS container
if: runner.os == 'Linux' && matrix.arch == 'x64'
run: docker rm -f centos
# Download all native shared libraries and create the nuget package.
# Upload nuget package as an artifact.
build-nuget:
name: Build NuGet package
runs-on: ubuntu-latest
needs: build-native
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Download all artifacts
uses: actions/download-artifact@v3
with:
path: artifacts
- name: Copy native ParquetSharp libraries
run: |
mkdir bin
cp -rv artifacts/*-native-library/* bin/
- name: Setup .NET SDK v7.0.x
uses: actions/setup-dotnet@v3
with:
dotnet-version: 7.0.x
- name: Build NuGet package
run: dotnet build csharp --configuration=Release
- name: Upload NuGet artifact
uses: actions/upload-artifact@v3
with:
name: nuget-package
path: nuget
# Run .NET unit tests with the nuget package on all platforms and all supported .NET runtimes (thus testing the user workflow).
test-nuget:
strategy:
matrix:
os: [ubuntu-20.04, macos-11, macos-12, macos-13-xlarge, windows-2022, ubuntu-20.04-arm64]
dotnet: [netcoreapp3.1, net6.0, net7.0]
include:
- os: windows-2022
dotnet: net472
exclude:
- os: macos-13-xlarge
dotnet: netcoreapp3.1
fail-fast: false
name: Test NuGet package (${{ matrix.dotnet }} on ${{ matrix.os }})
runs-on: ${{ matrix.os }}
needs: build-nuget
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Get version
id: get-version
run: echo "version=$((Select-Xml -Path ./csharp/ParquetSharp.csproj -XPath '/Project/PropertyGroup/Version/text()').node.Value)" >> $env:GITHUB_OUTPUT
shell: pwsh
- name: Download NuGet artifact
uses: actions/download-artifact@v3
with:
name: nuget-package
path: nuget
- name: Setup .NET Core SDK v3.1.x
if: matrix.dotnet == 'netcoreapp3.1'
uses: actions/setup-dotnet@v3
with:
dotnet-version: 3.1.x
- name: Setup .NET SDK v6.0.x
if: matrix.dotnet == 'net6.0'
uses: actions/setup-dotnet@v3
with:
dotnet-version: 6.0.x
- name: Setup .NET SDK v7.0.x
uses: actions/setup-dotnet@v3
with:
dotnet-version: 7.0.x
- name: Add local NuGet feed
run: |
dotnet new nugetconfig
dotnet nuget add source -n local $PWD/nuget
- name: Change test project references to use local NuGet package
run: |
dotnet remove csharp.test reference csharp/ParquetSharp.csproj
dotnet add csharp.test package ParquetSharp -v ${{ steps.get-version.outputs.version }}
dotnet remove fsharp.test reference csharp/ParquetSharp.csproj
dotnet add fsharp.test package ParquetSharp -v ${{ steps.get-version.outputs.version }}
- name: Build & Run C# unit tests
run: dotnet test csharp.test --configuration=Release --framework ${{ matrix.dotnet }}
- name: Build & Run F# unit tests
run: dotnet test fsharp.test --configuration=Release --framework ${{ matrix.dotnet }}
# Virtual job that can be configured as a required check before a PR can be merged.
# As GitHub considers a check as successful if it is skipped, we need to check its status in
# another workflow (check-required.yml) and create a check there.
all-required-checks-done:
name: All required checks done
needs:
- check-format
- test-nuget
runs-on: ubuntu-latest
steps:
- run: echo "All required checks done"
# Create a GitHub release and publish the NuGet packages to nuget.org when a tag is pushed.
publish-release:
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && !github.event.repository.fork
name: Publish release
runs-on: ubuntu-latest
permissions:
contents: write
needs: all-required-checks-done
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Check version
id: check-version
shell: pwsh
run: |
$version = (Select-Xml -Path ./csharp/ParquetSharp.csproj -XPath '/Project/PropertyGroup/Version/text()').node.Value
$tag = "${{ github.ref }}".SubString(10)
if (-not ($tag -eq $version)) {
echo "::error ::There is a mismatch between the project version ($version) and the tag ($tag)"
exit 1
}
echo "version=$version" >> $env:GITHUB_OUTPUT
- name: Download NuGet artifact
uses: actions/download-artifact@v3
with:
name: nuget-package
path: nuget
- name: Setup .NET SDK v7.0.x
uses: actions/setup-dotnet@v3
with:
dotnet-version: 7.0.x
# if version contains "-" treat it as pre-release
# example: 1.0.0-beta1
- name: Create release
uses: softprops/action-gh-release@v1
with:
name: ParquetSharp ${{ steps.check-version.outputs.version }}
draft: true
prerelease: ${{ contains(steps.check-version.outputs.version, '-') }}
files: |
nuget/ParquetSharp.${{ steps.check-version.outputs.version }}.nupkg
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Publish to NuGet
run: dotnet nuget push nuget/ParquetSharp.${{ steps.check-version.outputs.version }}.nupkg --api-key ${{ secrets.NUGET_API_KEY }} --source https://api.nuget.org/v3/index.json