From f9d2f4226366d442b0433b4ad0c6cbf17950cc1c Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Fri, 22 Nov 2024 16:20:56 +0900 Subject: [PATCH] [SPARK-50394][PYTHON][INFRA] Reduce parallelism in Pure Python library builds ### What changes were proposed in this pull request? This PR proposes to decrease parallelism in Pure Python library builds ### Why are the changes needed? In order to make the tests more robust: https://github.com/apache/spark/actions/workflows/build_python_connect.yml https://github.com/apache/spark/actions/workflows/build_python_connect35.yml Now they fail because of OOM. ### Does this PR introduce _any_ user-facing change? No, test-only ### How was this patch tested? Will monitor the build: https://github.com/apache/spark/actions/workflows/build_python_connect.yml https://github.com/apache/spark/actions/workflows/build_python_connect35.yml ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48932 from HyukjinKwon/reduce-parallelism. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon --- .github/workflows/build_python_connect.yml | 2 +- .github/workflows/build_python_connect35.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index f668d813ef26e..d57a0c2b91623 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -93,7 +93,7 @@ jobs: # Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener. ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect # None of tests are dependent on each other in Pandas API on Spark so run them in parallel - ./python/run-tests --parallelism=4 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3 + ./python/run-tests --parallelism=2 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3 # Stop Spark Connect server. ./sbin/stop-connect-server.sh diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml index 276c9ad9d2f92..4b7a6b82b9527 100644 --- a/.github/workflows/build_python_connect35.yml +++ b/.github/workflows/build_python_connect35.yml @@ -98,7 +98,7 @@ jobs: # Run branch-3.5 tests ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect # None of tests are dependent on each other in Pandas API on Spark so run them in parallel - ./python/run-tests --parallelism=4 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect + ./python/run-tests --parallelism=2 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect - name: Upload test results to report if: always() uses: actions/upload-artifact@v4