From 427e3d63549afc959162e4b1bb88e6d38254604c Mon Sep 17 00:00:00 2001
From: Robert Pack <42610831+roeap@users.noreply.github.com>
Date: Mon, 5 Jun 2023 02:50:54 +0200
Subject: [PATCH] ci: prune CI/CD pipelines (#1433)

# Description

With our current integration tests being close to unusable due to the
very frequent failures of the HDFS tests, I took the liberty to disable
HDFS in the integration tests for now. While at it I also sprinkled a
couple of `RUSTFLAG`s in the hopes of improving the overall experience
with our CI/CD.

Last but not least, I added python 3.11 to the test matrix, in the hopes
that it may just work :).

# Related Issue(s)
 related #1428
---
 .github/workflows/build.yml        | 46 +++++++++++++++++++++---------
 .github/workflows/python_build.yml | 38 ++++++++++++++++++------
 2 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f087a0bfc8..34b39df29f 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -11,12 +11,14 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
+
       - name: Install minimal stable with clippy and rustfmt
         uses: actions-rs/toolchain@v1
         with:
           profile: default
           toolchain: stable
           override: true
+
       - name: Format
         run: cargo fmt -- --check
 
@@ -29,21 +31,28 @@ jobs:
           - macos-11
           - windows-latest
     runs-on: ${{ matrix.os }}
+
     steps:
       - uses: actions/checkout@v3
+
       - name: Install minimal stable with clippy and rustfmt
         uses: actions-rs/toolchain@v1
         with:
           profile: default
           toolchain: stable
           override: true
+
       - uses: Swatinem/rust-cache@v2
+
       - name: build and lint with clippy
         run: cargo clippy --features azure,datafusion,s3,gcs,glue --tests
+
       - name: Spot-check build for native-tls features
         run: cargo clippy --no-default-features --features azure,datafusion,s3-native-tls,gcs,glue-native-tls --tests
+
       - name: Check docs
         run: cargo doc --features azure,datafusion,s3,gcs,glue
+
       - name: Check no default features (except rustls)
         run: cargo check --no-default-features --features rustls
 
@@ -60,15 +69,19 @@ jobs:
       # Disable full debug symbol generation to speed up CI build and keep memory down
       # "1" means line tables only, which is useful for panic tracebacks.
       RUSTFLAGS: -C debuginfo=1
+
     steps:
       - uses: actions/checkout@v3
+
       - name: Install minimal stable with clippy and rustfmt
         uses: actions-rs/toolchain@v1
         with:
           profile: default
           toolchain: "stable"
           override: true
+
       - uses: Swatinem/rust-cache@v2
+
       - name: Run tests
         run: cargo test --verbose --features datafusion,azure
 
@@ -102,19 +115,19 @@ jobs:
           toolchain: stable
           override: true
 
-      - uses: actions/setup-java@v3
-        with:
-          distribution: "zulu"
-          java-version: "17"
+      # - uses: actions/setup-java@v3
+      #   with:
+      #     distribution: "zulu"
+      #     java-version: "17"
 
-      - uses: beyondstorage/setup-hdfs@master
-        with:
-          hdfs-version: "3.3.2"
+      # - uses: beyondstorage/setup-hdfs@master
+      #   with:
+      #     hdfs-version: "3.3.2"
 
-      - name: Set Hadoop env
-        run: |
-          echo "CLASSPATH=$CLASSPATH:`hadoop classpath --glob`" >> $GITHUB_ENV
-          echo "LD_LIBRARY_PATH=$JAVA_HOME/lib/server" >> $GITHUB_ENV
+      # - name: Set Hadoop env
+      #   run: |
+      #     echo "CLASSPATH=$CLASSPATH:`hadoop classpath --glob`" >> $GITHUB_ENV
+      #     echo "LD_LIBRARY_PATH=$JAVA_HOME/lib/server" >> $GITHUB_ENV
 
       - uses: Swatinem/rust-cache@v2
 
@@ -123,22 +136,29 @@ jobs:
 
       - name: Run tests with rustls (default)
         run: |
-          cargo test -p deltalake --features integration_test,azure,s3,gcs,datafusion,hdfs
+          cargo test -p deltalake --features integration_test,azure,s3,gcs,datafusion
+
       - name: Run tests with native-tls
         run: |
-          cargo test -p deltalake --no-default-features --features integration_test,s3-native-tls,datafusion,hdfs
+          cargo test -p deltalake --no-default-features --features integration_test,s3-native-tls,datafusion
 
   parquet2_test:
     runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-C debuginfo=0"
+
     steps:
       - uses: actions/checkout@v3
+
       - name: Install minimal stable with clippy and rustfmt
         uses: actions-rs/toolchain@v1
         with:
           profile: default
           toolchain: stable
           override: true
+
       - uses: Swatinem/rust-cache@v2
+
       - name: Run tests
         working-directory: rust
         run: cargo test --no-default-features --features=parquet2
diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml
index 58e3415e1c..cd7315a072 100644
--- a/.github/workflows/python_build.yml
+++ b/.github/workflows/python_build.yml
@@ -19,22 +19,28 @@ jobs:
         uses: actions/setup-python@v2
         with:
           python-version: 3.7
+
       - name: Check Python
         run: |
           pip install ruff black mypy types-dataclasses typing-extensions
           make check-python
+
       - name: Install minimal stable with clippy and rustfmt
         uses: actions-rs/toolchain@v1
         with:
           profile: default
           toolchain: stable
           override: true
+
       - name: Check Rust
         run: make check-rust
 
   test-minimal:
     name: Python Build (Python 3.7 PyArrow 7.0.0)
     runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-C debuginfo=0"
+
     # use the same environment we have for python release
     container: quay.io/pypa/manylinux2014_x86_64:2022-09-24-4f086d0
     steps:
@@ -65,6 +71,8 @@ jobs:
           make setup
           # Install minimum PyArrow version
           pip install -e .[pandas,devel] pyarrow==7.0.0
+        env:
+          RUSTFLAGS: "-C debuginfo=0"
 
       - name: Run tests
         run: |
@@ -78,6 +86,9 @@ jobs:
   test:
     name: Python Build (Python 3.10 PyArrow latest)
     runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-C debuginfo=1"
+
     steps:
       - uses: actions/checkout@v3
 
@@ -102,8 +113,8 @@ jobs:
           pip install virtualenv
           virtualenv venv
           source venv/bin/activate
-          make develop      
-              
+          make develop
+
       - name: Download Data Acceptance Tests (DAT) files
         run: make setup-dat
 
@@ -123,10 +134,13 @@ jobs:
         run: |
           source venv/bin/activate
           make build-documentation
-  
+
   benchmark:
     name: Python Benchmark
     runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-C debuginfo=0"
+
     steps:
       - uses: actions/checkout@v2
 
@@ -142,30 +156,30 @@ jobs:
       - uses: actions/setup-python@v4
         with:
           python-version: "3.10"
-      
+
       - name: Build deltalake in release mode
         run: |
           pip install virtualenv
           virtualenv venv
           source venv/bin/activate
           MATURIN_EXTRA_ARGS=--release make develop
-      
+
       # Download previous benchmark result from cache (if exists)
       - name: Download previous benchmark data
         uses: actions/cache@v2
         with:
           path: ./cache
           key: ${{ runner.os }}-benchmark
-      
+
       - name: Run benchmark
         run: |
           source venv/bin/activate
           pytest tests/test_benchmark.py -m benchmark --benchmark-json output.json
-        
+
       - name: Store benchmark result
         uses: benchmark-action/github-action-benchmark@v1
         with:
-          tool: 'pytest'
+          tool: "pytest"
           output-file-path: python/output.json
           external-data-json-path: ./cache/benchmark-data.json
           fail-on-alert: true
@@ -173,6 +187,9 @@ jobs:
   test-pyspark:
     name: PySpark Integration Tests
     runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-C debuginfo=0"
+
     steps:
       - uses: actions/checkout@v3
 
@@ -209,9 +226,12 @@ jobs:
   multi-python-running:
     name: Running with Python ${{ matrix.python-version }}
     runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-C debuginfo=0"
+
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
 
     steps:
       - uses: actions/checkout@v3