diff --git a/.gitignore b/.gitignore index 23b7d09..cb7efbf 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ build/ wheels/ *.egg-info/ *.egg +*.whl diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..fb8dd34 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,21 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.2 + hooks: + - id: ruff + args: ["--fix"] + - id: ruff-format + +default_language_version: + python: python3 diff --git a/README.md b/README.md index dc0ebc5..ed8bb75 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ The `rapids-dask-dependency` package encodes both `dask` and `distributed` requi # Versioning the Metapackage Itself This package is versioned just like the rest of RAPIDS: using CalVer, with alpha tags (trailing a\*) for nightlies. -Nightlies of the metapackage should be consumed just like nightlies of any other RAPIDS package: +Nightlies of the metapackage should be consumed just like nightlies of any other RAPIDS package: - conda packages should pin up to the minor version with a trailing `.*`, i.e. `==23.10.*`. Conda will allow nightlies to match, so no further intervention is needed. - pip packages should have the same pin, but wheel building scripts must add an alpha spec `>=0.0.0a0` when building nightlies to allow rapids-dask-dependency nightlies. This is the same strategy used to have RAPIDS repositories pull nightly versions of other RAPIDS dependencies (e.g. `cudf` requires `rmm` nightlies). @@ -31,3 +31,11 @@ At release, these dependencies will be pinned to the desired versions. Note that encoding direct URLs as above is technically prohibited by the [Python packaging specifications](https://packaging.python.org/en/latest/specifications/version-specifiers/#direct-references). However, while PyPI enforces this, the RAPIDS nightly index does not. Therefore, use of this versioning strategy currently prohibits rapids-dask-dependency nightlies from being uploaded to PyPI, and they must be hosted on the RAPIDS nightly pip index. + +# Patching + +In addition to functioning as a metapackage, `rapids-dask-dependency` also includes code for patching dask itself. +This package is never intended to be manually imported by the user. +Instead, upon installation it installs a `.pth` file (see the [site module documentation](https://docs.python.org/3.11/library/site.html) for how these work) that will be run whenever the Python interpreter starts. +This file installs a custom [meta path loader](https://docs.python.org/3/reference/import.html#the-meta-path) that intercepts all calls to import dask modules. +This loader is set up to apply RAPIDS-specific patches to the modules, ensuring that regardless of import order issues dask modules will always be patched for RAPIDS-compatibility in environments where RAPIDS packages are installed. diff --git a/_rapids_dask_dependency.pth b/_rapids_dask_dependency.pth new file mode 100644 index 0000000..33349fb --- /dev/null +++ b/_rapids_dask_dependency.pth @@ -0,0 +1 @@ +import rapids_dask_dependency diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 9c17c01..0fc36fb 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -1,18 +1,19 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -euo pipefail source rapids-configure-sccache source rapids-date-string -package_name=rapids-dask-dependency -package_dir="pip/${package_name}" version=$(rapids-generate-version) -sed -i "s/^version = .*/version = \"${version}\"/g" "${package_dir}/pyproject.toml" +sed -i "s/^version = .*/version = \"${version}\"/g" "pyproject.toml" -cd "${package_dir}" -python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check +python -m pip wheel . -w dist -vv --no-deps --disable-pip-version-check -RAPIDS_PY_WHEEL_NAME="${package_name}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 dist +RAPIDS_PY_WHEEL_NAME="rapids-dask-dependency" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 dist + +# Run tests +python -m pip install $(ls dist/*.whl)[test] +python -m pytest -v tests/ diff --git a/conda/recipes/rapids-dask-dependency/meta.yaml b/conda/recipes/rapids-dask-dependency/meta.yaml index 68c56e2..3da9f2c 100644 --- a/conda/recipes/rapids-dask-dependency/meta.yaml +++ b/conda/recipes/rapids-dask-dependency/meta.yaml @@ -11,9 +11,22 @@ source: build: number: 0 - noarch: generic + noarch: python + script: python -m pip install . -vv --no-deps + +test: + requires: + - pip + - pytest + source_files: + - tests/ requirements: + host: + - pip + - python >=3.9 + - setuptools + - conda-verify run: - dask ==2024.1.1 - dask-core ==2024.1.1 @@ -29,7 +42,7 @@ about: This metapackage encodes the standard Dask version pinning used for a particular release of RAPIDS. The metapackage adds an extra release segment to the RAPIDS CalVer to allow pinnings in this metapackage to be updated - for a given release and automatically propagate to its dependents. + for a given release and automatically propagate to its dependents. It also + includes any patches to dask required for RAPIDS to function correctly. doc_url: https://docs.rapids.ai/ dev_url: https://github.com/rapidsai/rapids_dask_dependency - diff --git a/conda/recipes/rapids-dask-dependency/run_test.sh b/conda/recipes/rapids-dask-dependency/run_test.sh new file mode 100755 index 0000000..7541a82 --- /dev/null +++ b/conda/recipes/rapids-dask-dependency/run_test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +python -m pytest -v tests/ diff --git a/conda/recipes/rapids-dask-dependency/tests b/conda/recipes/rapids-dask-dependency/tests new file mode 120000 index 0000000..d41566a --- /dev/null +++ b/conda/recipes/rapids-dask-dependency/tests @@ -0,0 +1 @@ +../../../tests/ \ No newline at end of file diff --git a/pip/rapids-dask-dependency/LICENSE b/pip/rapids-dask-dependency/LICENSE deleted file mode 120000 index 30cff74..0000000 --- a/pip/rapids-dask-dependency/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../../LICENSE \ No newline at end of file diff --git a/pip/rapids-dask-dependency/README.md b/pip/rapids-dask-dependency/README.md deleted file mode 120000 index fe84005..0000000 --- a/pip/rapids-dask-dependency/README.md +++ /dev/null @@ -1 +0,0 @@ -../../README.md \ No newline at end of file diff --git a/pip/rapids-dask-dependency/pyproject.toml b/pyproject.toml similarity index 64% rename from pip/rapids-dask-dependency/pyproject.toml rename to pyproject.toml index 076d31e..a9510ee 100644 --- a/pip/rapids-dask-dependency/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. [build-system] build-backend = "setuptools.build_meta" @@ -19,5 +19,17 @@ dependencies = [ license = { text = "Apache 2.0" } readme = { file = "README.md", content-type = "text/markdown" } +[project.optional-dependencies] +test = [ + "pytest", +] + [tool.setuptools] license-files = ["LICENSE"] + +[tool.setuptools.packages.find] +include = ["rapids_dask_dependency*"] + +[tool.ruff] +lint.select = ["E", "F", "W", "I", "N", "UP"] +lint.fixable = ["ALL"] diff --git a/rapids_dask_dependency/__init__.py b/rapids_dask_dependency/__init__.py new file mode 100644 index 0000000..07ae32d --- /dev/null +++ b/rapids_dask_dependency/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from .dask_loader import DaskLoader + +DaskLoader.install() diff --git a/rapids_dask_dependency/dask_loader.py b/rapids_dask_dependency/dask_loader.py new file mode 100644 index 0000000..1033ce7 --- /dev/null +++ b/rapids_dask_dependency/dask_loader.py @@ -0,0 +1,87 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import importlib +import importlib.abc +import importlib.machinery +import sys +import warnings +from contextlib import contextmanager + +from .patches.dask import patches as dask_patches +from .patches.distributed import patches as distributed_patches + +original_warn = warnings.warn + + +def _warning_with_increased_stacklevel( + message, category=None, stacklevel=1, source=None, **kwargs +): + # Patch warnings to have the right stacklevel + # Add 3 to the stacklevel to account for the 3 extra frames added by the loader: one + # in this warnings function, one in the actual loader, and one in the importlib + # call (not including all internal frames). + original_warn(message, category, stacklevel + 3, source, **kwargs) + + +@contextmanager +def patch_warning_stacklevel(): + warnings.warn = _warning_with_increased_stacklevel + yield + warnings.warn = original_warn + + +class DaskLoader(importlib.abc.MetaPathFinder, importlib.abc.Loader): + def create_module(self, spec): + if spec.name.startswith("dask") or spec.name.startswith("distributed"): + with self.disable(), patch_warning_stacklevel(): + mod = importlib.import_module(spec.name) + + # Note: The spec does not make it clear whether we're guaranteed that spec + # is not a copy of the original spec, but that is the case for now. We need + # to assign this because the spec is used to update module attributes after + # it is initialized by create_module. + spec.origin = mod.__spec__.origin + spec.submodule_search_locations = mod.__spec__.submodule_search_locations + + # TODO: I assume we'll want to only apply patches to specific submodules, + # that'll be up to RAPIDS dask devs to decide. + patches = dask_patches if "dask" in spec.name else distributed_patches + for patch in patches: + patch(mod) + return mod + + def exec_module(self, _): + pass + + @contextmanager + def disable(self): + sys.meta_path.remove(self) + try: + yield + finally: + sys.meta_path.insert(0, self) + + def find_spec(self, fullname: str, _, __=None): + if ( + fullname in ("dask", "distributed") + or fullname.startswith("dask.") + or fullname.startswith("distributed.") + ): + return importlib.machinery.ModuleSpec( + name=fullname, + loader=self, + # Set these parameters dynamically in create_module + origin=None, + loader_state=None, + is_package=True, + ) + return None + + @classmethod + def install(cls): + try: + (self,) = (obj for obj in sys.meta_path if isinstance(obj, cls)) + except ValueError: + self = cls() + sys.meta_path.insert(0, self) + return self diff --git a/rapids_dask_dependency/patches/__init__.py b/rapids_dask_dependency/patches/__init__.py new file mode 100644 index 0000000..3c827d4 --- /dev/null +++ b/rapids_dask_dependency/patches/__init__.py @@ -0,0 +1 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. diff --git a/rapids_dask_dependency/patches/dask/__init__.py b/rapids_dask_dependency/patches/dask/__init__.py new file mode 100644 index 0000000..d2e8062 --- /dev/null +++ b/rapids_dask_dependency/patches/dask/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from .add_patch_attr import add_patch_attr + +patches = [add_patch_attr] diff --git a/rapids_dask_dependency/patches/dask/add_patch_attr.py b/rapids_dask_dependency/patches/dask/add_patch_attr.py new file mode 100644 index 0000000..b3afa13 --- /dev/null +++ b/rapids_dask_dependency/patches/dask/add_patch_attr.py @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + + +def add_patch_attr(mod): + mod._rapids_patched = True diff --git a/rapids_dask_dependency/patches/distributed/__init__.py b/rapids_dask_dependency/patches/distributed/__init__.py new file mode 100644 index 0000000..d2e8062 --- /dev/null +++ b/rapids_dask_dependency/patches/distributed/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from .add_patch_attr import add_patch_attr + +patches = [add_patch_attr] diff --git a/rapids_dask_dependency/patches/distributed/add_patch_attr.py b/rapids_dask_dependency/patches/distributed/add_patch_attr.py new file mode 100644 index 0000000..b3afa13 --- /dev/null +++ b/rapids_dask_dependency/patches/distributed/add_patch_attr.py @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + + +def add_patch_attr(mod): + mod._rapids_patched = True diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..87119dc --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +import os + +from setuptools import setup +from setuptools.command.build_py import build_py + + +# Adapted from https://stackoverflow.com/a/71137790 +class build_py_with_pth_file(build_py): # noqa: N801 + """Include the .pth file in the generated wheel.""" + + def run(self): + super().run() + + fn = "_rapids_dask_dependency.pth" + + outfile = os.path.join(self.build_lib, fn) + self.copy_file(fn, outfile, preserve_mode=0) + + +setup(cmdclass={"build_py": build_py_with_pth_file}) diff --git a/tests/test_patch.py b/tests/test_patch.py new file mode 100644 index 0000000..5e9b646 --- /dev/null +++ b/tests/test_patch.py @@ -0,0 +1,26 @@ +from functools import wraps +from multiprocessing import Process + + +def run_test_in_subprocess(func): + @wraps(func) + def wrapper(*args, **kwargs): + p = Process(target=func, args=args, kwargs=kwargs) + p.start() + p.join() + + return wrapper + + +@run_test_in_subprocess +def test_dask(): + import dask + + assert hasattr(dask, "_rapids_patched") + + +@run_test_in_subprocess +def test_distributed(): + import distributed + + assert hasattr(distributed, "_rapids_patched")