From 03f56ffb01ece9840167ed887dcdbc998932a533 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Wed, 5 Feb 2025 17:39:18 +0000 Subject: [PATCH] [CI] Fix windows build (#2760) --- .github/scripts/version_script.bat | 50 +++++++ .../windows_optdepts/scripts/post_process.sh | 3 - .../windows_optdepts/scripts/run_test.sh | 18 --- .../windows_optdepts/scripts/setup_env.sh | 45 ------ .../scripts/{install.sh => unittest.sh} | 47 +++++- .github/workflows/build-wheels-m1.yml | 2 +- .github/workflows/test-windows-optdepts.yml | 24 ++- .github/workflows/wheels-legacy.yml | 108 -------------- packaging/wheel/relocate.py | 138 ++++++++++++++++++ setup.cfg | 1 + 10 files changed, 242 insertions(+), 194 deletions(-) delete mode 100644 .github/unittest/windows_optdepts/scripts/post_process.sh delete mode 100644 .github/unittest/windows_optdepts/scripts/run_test.sh delete mode 100644 .github/unittest/windows_optdepts/scripts/setup_env.sh rename .github/unittest/windows_optdepts/scripts/{install.sh => unittest.sh} (64%) mode change 100644 => 100755 delete mode 100644 .github/workflows/wheels-legacy.yml create mode 100644 packaging/wheel/relocate.py diff --git a/.github/scripts/version_script.bat b/.github/scripts/version_script.bat index 32f739bdeed..8adc95b2da7 100644 --- a/.github/scripts/version_script.bat +++ b/.github/scripts/version_script.bat @@ -1,3 +1,53 @@ @echo off set TORCHRL_BUILD_VERSION=0.7.0 echo TORCHRL_BUILD_VERSION is set to %TORCHRL_BUILD_VERSION% + +@echo on + +set VC_VERSION_LOWER=17 +set VC_VERSION_UPPER=18 +if "%VC_YEAR%" == "2019" ( + set VC_VERSION_LOWER=16 + set VC_VERSION_UPPER=17 +) +if "%VC_YEAR%" == "2017" ( + set VC_VERSION_LOWER=15 + set VC_VERSION_UPPER=16 +) + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere +if "%VSDEVCMD_ARGS%" == "" ( + call "%VS15VCVARSALL%" x64 || exit /b 1 +) else ( + call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 +) + +@echo on + +if "%CU_VERSION%" == "xpu" call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + +set DISTUTILS_USE_SDK=1 + +set args=%1 +shift +:start +if [%1] == [] goto done +set args=%args% %1 +shift +goto start + +:done +if "%args%" == "" ( + echo Usage: vc_env_helper.bat [command] [args] + echo e.g. vc_env_helper.bat cl /c test.cpp +) + +%args% || exit /b 1 diff --git a/.github/unittest/windows_optdepts/scripts/post_process.sh b/.github/unittest/windows_optdepts/scripts/post_process.sh deleted file mode 100644 index df82332cd84..00000000000 --- a/.github/unittest/windows_optdepts/scripts/post_process.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -set -e diff --git a/.github/unittest/windows_optdepts/scripts/run_test.sh b/.github/unittest/windows_optdepts/scripts/run_test.sh deleted file mode 100644 index 351eb4bfef7..00000000000 --- a/.github/unittest/windows_optdepts/scripts/run_test.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -set -e - -eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" -conda activate ./env - -this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -source "$this_dir/set_cuda_envs.sh" - -# we don't use torchsnapshot -export CKPT_BACKEND=torch -export MAX_IDLE_COUNT=60 -export BATCHED_PIPE_TIMEOUT=60 -export LAZY_LEGACY_OP=False - -python -m torch.utils.collect_env -pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py diff --git a/.github/unittest/windows_optdepts/scripts/setup_env.sh b/.github/unittest/windows_optdepts/scripts/setup_env.sh deleted file mode 100644 index 84f709d6537..00000000000 --- a/.github/unittest/windows_optdepts/scripts/setup_env.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash - -# This script is for setting up environment in which unit test is ran. -# To speed up the CI time, the resulting environment is cached. -# -# Do not install PyTorch and torchvision here, otherwise they also get cached. - -set -e - -this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -root_dir="$(git rev-parse --show-toplevel)" -conda_dir="${root_dir}/conda" -env_dir="${root_dir}/env" - -cd "${root_dir}" - -# 1. Install conda at ./conda -if [ ! -d "${conda_dir}" ]; then - printf "* Installing conda\n" - export tmp_conda="$(echo $conda_dir | tr '/' '\\')" - export miniconda_exe="$(echo $root_dir | tr '/' '\\')\\miniconda.exe" - curl --output miniconda.exe https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -O - "$this_dir/install_conda.bat" - unset tmp_conda - unset miniconda_exe -fi - -eval "$(${conda_dir}/Scripts/conda.exe 'shell.bash' 'hook')" - -# 2. Create test environment at ./env -printf "* Creating a test environment\n" -conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION" - -printf "* Activating the environment" -conda deactivate -conda activate "${env_dir}" - -printf "Python version" -echo $(which python) -echo $(python --version) -echo $(conda info -e) - -#conda env update --file "${this_dir}/environment.yml" --prune - -python -m pip install hypothesis future cloudpickle pytest pytest-cov pytest-mock pytest-instafail pytest-rerunfailures expecttest pyyaml scipy coverage diff --git a/.github/unittest/windows_optdepts/scripts/install.sh b/.github/unittest/windows_optdepts/scripts/unittest.sh old mode 100644 new mode 100755 similarity index 64% rename from .github/unittest/windows_optdepts/scripts/install.sh rename to .github/unittest/windows_optdepts/scripts/unittest.sh index f13b83a0be0..f8adfe696f2 --- a/.github/unittest/windows_optdepts/scripts/install.sh +++ b/.github/unittest/windows_optdepts/scripts/unittest.sh @@ -1,16 +1,38 @@ #!/usr/bin/env bash -unset PYTORCH_VERSION -# For unittest, nightly PyTorch is used as the following section, -# so no need to set PYTORCH_VERSION. -# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. +# This script is for setting up environment in which unit test is ran. +# To speed up the CI time, the resulting environment is cached. +# +# Do not install PyTorch and torchvision here, otherwise they also get cached. set -ex +# =================================== Setup ================================================= this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +root_dir="$(git rev-parse --show-toplevel)" +env_dir="${root_dir}/env" + +cd "${root_dir}" + +eval "$($(which conda) shell.bash hook)" && set -x + +# Create test environment at ./env +printf "* Creating a test environment\n" +conda create --name ci -y python="$PYTHON_VERSION" + +printf "* Activating the environment" +conda activate ci + +printf "Python version" +echo $(which python) +echo $(python --version) +echo $(conda info -e) + + +python -m pip install hypothesis future cloudpickle pytest pytest-cov pytest-mock pytest-instafail pytest-rerunfailures expecttest pyyaml scipy coverage + +# =================================== Install ================================================= -eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" -conda activate ./env # TODO, refactor the below logic to make it easy to understand how to get correct cuda_version. if [ "${CU_VERSION:-}" == cpu ] ; then @@ -98,3 +120,16 @@ python -c """ from torchrl.data import ReplayBuffer print('successfully imported torchrl') """ + +# =================================== Run ================================================= + +source "$this_dir/set_cuda_envs.sh" + +# we don't use torchsnapshot +export CKPT_BACKEND=torch +export MAX_IDLE_COUNT=60 +export BATCHED_PIPE_TIMEOUT=60 +export LAZY_LEGACY_OP=False + +python -m torch.utils.collect_env +pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py diff --git a/.github/workflows/build-wheels-m1.yml b/.github/workflows/build-wheels-m1.yml index 73a365a79f2..d52c2a95e92 100644 --- a/.github/workflows/build-wheels-m1.yml +++ b/.github/workflows/build-wheels-m1.yml @@ -34,7 +34,7 @@ jobs: - repository: pytorch/rl smoke-test-script: test/smoke_test.py package-name: torchrl - name: pytorch/rl + name: ${{ matrix.repository }} uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@main with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/test-windows-optdepts.yml b/.github/workflows/test-windows-optdepts.yml index dfcb8de7966..26359929830 100644 --- a/.github/workflows/test-windows-optdepts.yml +++ b/.github/workflows/test-windows-optdepts.yml @@ -21,15 +21,23 @@ permissions: jobs: unittests-cpu: + strategy: + matrix: + python-version: + - "3.10" + runner: ["windows.4xlarge"] + gpu-arch-type: ["cpu"] + fail-fast: false uses: pytorch/test-infra/.github/workflows/windows_job.yml@main with: - runner: "windows.4xlarge" repository: pytorch/rl + runner: ${{ matrix.runner }} timeout: 40 + test-infra-ref: main script: | set -euxo pipefail - export PYTHON_VERSION="3.9" + export PYTHON_VERSION=${{ matrix.python-version }} export CU_VERSION="cpu" export torch_cuda="False" @@ -48,17 +56,7 @@ jobs: fi export TD_GET_DEFAULTS_TO_NONE=1 - ## setup_env.sh - ./.github/unittest/windows_optdepts/scripts/setup_env.sh - - ## install.sh - ./.github/unittest/windows_optdepts/scripts/install.sh - - ## run_test.sh - ./.github/unittest/windows_optdepts/scripts/run_test.sh - - ## post_process.sh - ./.github/unittest/windows_optdepts/scripts/post_process.sh + ./.github/unittest/windows_optdepts/scripts/unittest.sh # unittests-gpu: # uses: pytorch/test-infra/.github/workflows/windows_job.yml@main diff --git a/.github/workflows/wheels-legacy.yml b/.github/workflows/wheels-legacy.yml deleted file mode 100644 index 200c9c291d3..00000000000 --- a/.github/workflows/wheels-legacy.yml +++ /dev/null @@ -1,108 +0,0 @@ -name: Wheels -on: - pull_request: - types: [opened, synchronize, reopened] - push: - branches: - - release/* - - main - -concurrency: - # Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}. - # On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke. - group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }} - cancel-in-progress: true - -permissions: - id-token: write - contents: read - -jobs: - - build-wheel-windows: - runs-on: windows-latest - strategy: - matrix: - python_version: [["3.9", "3.9"], ["3.10", "3.10.3"], ["3.11", "3.11"], ["3.12", "3.12"]] - steps: - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python_version[1] }} - - name: Checkout torchrl - uses: actions/checkout@v2 - - name: Install PyTorch RC - shell: bash - run: | - python3 -mpip install torch --index-url https://download.pytorch.org/whl/cpu - - name: Build wheel - shell: bash - run: | - python3 -mpip install wheel - TORCHRL_BUILD_VERSION=0.7.0 python3 setup.py bdist_wheel - - name: Upload wheel for the test-wheel job - uses: actions/upload-artifact@v4 - with: - name: torchrl-win-${{ matrix.python_version[0] }}.whl - path: dist/torchrl-*.whl - - name: Upload wheel for download - uses: actions/upload-artifact@v4 - with: - name: torchrl-batch.whl - path: dist/*.whl - - test-wheel-windows: - needs: build-wheel-windows - strategy: - matrix: - python_version: ["3.9", "3.10", "3.11", "3.12" ] - runs-on: windows-latest - steps: - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python_version }} - - name: Checkout torchrl - uses: actions/checkout@v2 - - name: Install PyTorch RC - shell: bash - run: | - python3 -mpip install torch torchvision --index-url https://download.pytorch.org/whl/cpu - - name: Upgrade pip - shell: bash - run: | - python3 -mpip install --upgrade pip - - name: Install tensordict - shell: bash - run: | - python3 -mpip install git+https://github.com/pytorch/tensordict.git - - name: Install test dependencies - shell: bash - run: | - python3 -mpip install numpy pytest pytest-cov codecov unittest-xml-reporting pillow>=4.1.1 scipy av networkx expecttest pyyaml - - name: Download built wheels - uses: actions/download-artifact@v4 - with: - name: torchrl-win-${{ matrix.python_version }}.whl - path: wheels - - name: Install built wheels - shell: bash - run: | - python3 -mpip install wheels/* - - name: Log version string - shell: bash - run: | - # Avoid ambiguity of "import torchrl" by deleting the source files. - rm -rf torchrl/ - python -c "import torchrl; print(torchrl.__version__)" - - name: Run tests - shell: bash - run: | - set -e - export IN_CI=1 - mkdir test-reports - python -m torch.utils.collect_env - python -c "import torchrl; print(torchrl.__version__)" - EXIT_STATUS=0 - pytest test/smoke_test.py -v --durations 200 - exit $EXIT_STATUS diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py new file mode 100644 index 00000000000..a05e047f3c5 --- /dev/null +++ b/packaging/wheel/relocate.py @@ -0,0 +1,138 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Helper script to package wheels and relocate binaries.""" + +import glob +import hashlib + +# Standard library imports +import os +import os.path as osp +import shutil +import sys +import zipfile +from base64 import urlsafe_b64encode + +HERE = osp.dirname(osp.abspath(__file__)) +PACKAGE_ROOT = osp.dirname(osp.dirname(HERE)) + + +def rehash(path, blocksize=1 << 20): + """Return (hash, length) for path using hashlib.sha256()""" + h = hashlib.sha256() + length = 0 + with open(path, "rb") as f: + while block := f.read(blocksize): + length += len(block) + h.update(block) + digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=") + # unicode/str python2 issues + return (digest, str(length)) # type: ignore + + +def unzip_file(file, dest): + """Decompress zip `file` into directory `dest`.""" + with zipfile.ZipFile(file, "r") as zip_ref: + zip_ref.extractall(dest) + + +def is_program_installed(basename): + """ + Return program absolute path if installed in PATH. + Otherwise, return None + On macOS systems, a .app is considered installed if + it exists. + """ + if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename): + return basename + + for path in os.environ["PATH"].split(os.pathsep): + abspath = osp.join(path, basename) + if osp.isfile(abspath): + return abspath + + +def find_program(basename): + """ + Find program in PATH and return absolute path + Try adding .exe or .bat to basename on Windows platforms + (return None if not found) + """ + names = [basename] + if os.name == "nt": + # Windows platforms + extensions = (".exe", ".bat", ".cmd", ".dll") + if not basename.endswith(extensions): + names = [basename + ext for ext in extensions] + [basename] + for name in names: + path = is_program_installed(name) + if path: + return path + + +def compress_wheel(output_dir, wheel, wheel_dir, wheel_name): + """Create RECORD file and compress wheel distribution.""" + # ("Update RECORD file in wheel") + dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0] + record_file = osp.join(dist_info, "RECORD") + + with open(record_file, "w") as f: + for root, _, files in os.walk(output_dir): + for this_file in files: + full_file = osp.join(root, this_file) + rel_file = osp.relpath(full_file, output_dir) + if full_file == record_file: + f.write(f"{rel_file},,\n") + else: + digest, size = rehash(full_file) + f.write(f"{rel_file},{digest},{size}\n") + + # ("Compressing wheel") + base_wheel_name = osp.join(wheel_dir, wheel_name) + shutil.make_archive(base_wheel_name, "zip", output_dir) + os.remove(wheel) + shutil.move(f"{base_wheel_name}.zip", wheel) + shutil.rmtree(output_dir) + + +def patch_win(): + # Get dumpbin location + dumpbin = find_program("dumpbin") + if dumpbin is None: + raise FileNotFoundError( + "Dumpbin was not found in the system, please make sure that is available on the PATH." + ) + + # Find wheel + # ("Finding wheels...") + wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl")) + output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process") + + for wheel in wheels: + print(f"processing {wheel}") + if osp.exists(output_dir): + shutil.rmtree(output_dir) + print(f"creating output directory {output_dir}") + os.makedirs(output_dir) + + # ("Unzipping wheel...") + wheel_file = osp.basename(wheel) + wheel_dir = osp.dirname(wheel) + # (f"{wheel_file}") + wheel_name, _ = osp.splitext(wheel_file) + print(f"unzipping {wheel} in {output_dir}") + unzip_file(wheel, output_dir) + print("compressing wheel") + compress_wheel(output_dir, wheel, wheel_dir, wheel_name) + + +if __name__ == "__main__": + if sys.platform == "linux": + pass + elif sys.platform == "win32": + patch_win() + else: + raise NotImplementedError diff --git a/setup.cfg b/setup.cfg index eeb7001ac32..985c68e5af9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,7 @@ per-file-ignores = examples/*.py: T201 test/opengl_rendering.py: T201 */**/run-clang-format.py: T201 + packaging/*/**.py: T201 exclude = venv extend-select = B901, C401, C408, C409, TOR0, TOR1, TOR2