From 3b3dcac87fb189ce020f2e62c5b76b6a2fa8cabd Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Fri, 6 Sep 2024 14:38:30 -0400 Subject: [PATCH 01/11] align array behavior for 'bool,int,float,complex,index' with dask.Array --- src/dask_awkward/lib/core.py | 30 ++++++++++++++++++++++++++++++ tests/test_core.py | 24 ++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 19ae2dde..ed0503d5 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -921,6 +921,36 @@ def __dask_postpersist__(self): __dask_scheduler__ = staticmethod(threaded_get) + # taken from https://github.com/dask/dask/blob/3003db5b84070b1afc197c7c70c76c5c4c2bc821/dask/array/core.py#L1854-L1883 + def __bool__(self): + materialized = self.compute().to_numpy() + if materialized.size != 1: + raise ValueError( + f"The truth value of a {self.__class__.__name__} is ambiguous. " + "Use a.any() or a.all()." + ) + else: + return bool(materialized) + + def _scalarfunc(self, cast_type): + materialized = self.compute().to_numpy() + if materialized.size != 1: + raise TypeError("Only length-1 arrays can be converted to Python scalars") + else: + return cast_type(materialized.item()) + + def __int__(self): + return self._scalarfunc(int) + + def __float__(self): + return self._scalarfunc(float) + + def __complex__(self): + return self._scalarfunc(complex) + + def __index__(self): + return self._scalarfunc(operator.index) + def __setitem__(self, where: Any, what: Any) -> None: if not ( isinstance(where, str) diff --git a/tests/test_core.py b/tests/test_core.py index 08a87def..38e0c12d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -968,3 +968,27 @@ def test_map_partitions_bad_arguments(): array2, meta=empty_typetracer(), ) + + +def test_array__bool_nonzero_long_int_float_complex_index(): + import operator + + dak_arr = dak.from_awkward(ak.Array([1]), npartitions=1) + dask_arr = da.from_array(np.array([1])) + + for fun in bool, int, float, complex, operator.index: + assert fun(dak_arr) == fun(dask_arr) + + toolong = dak.from_awkward(ak.Array([1, 2]), npartitions=1) + + with pytest.raises( + ValueError, + match=r"The truth value of a .+ is ambiguous. Use a.any\(\) or a.all\(\).", + ): + bool(toolong) + + with pytest.raises( + TypeError, match="Only length-1 arrays can be converted to Python scalars" + ): + for fun in int, float, complex, operator.index: + fun(toolong) From 203a43d95368470b1823ef85f6f017205dbdfdea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 21:55:59 +0000 Subject: [PATCH 02/11] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.3 → v0.6.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.3...v0.6.4) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bda1ab57..3c9e2c62 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: - --target-version=py312 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.3 + rev: v0.6.4 hooks: - id: ruff From 35cf37cbb1727016a46c0aa1d8b39643d79fdb90 Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Tue, 10 Sep 2024 10:34:43 -0400 Subject: [PATCH 03/11] explicitely raise error for tracer conversions --- docs/api/utils.rst | 23 +++++++++++++++++++ docs/index.rst | 1 + src/dask_awkward/lib/core.py | 26 +++++----------------- src/dask_awkward/utils.py | 43 ++++++++++++++++++++++++++++++++++++ tests/test_core.py | 25 ++++++--------------- 5 files changed, 80 insertions(+), 38 deletions(-) create mode 100644 docs/api/utils.rst diff --git a/docs/api/utils.rst b/docs/api/utils.rst new file mode 100644 index 00000000..7f3b697b --- /dev/null +++ b/docs/api/utils.rst @@ -0,0 +1,23 @@ +Errors +------ + +Utilities to implement array behaviors for dask-awkward arrays. + + +.. currentmodule:: dask_awkward + + +.. autosummary:: + :toctree: generated/ + + utils.IncompatiblePartitions + +.. autosummary:: + :toctree: generated/ + + utils.TracerConversionError + +.. raw:: html + + diff --git a/docs/index.rst b/docs/index.rst index 9bb01088..815d85fd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -50,6 +50,7 @@ Table of Contents api/reducers.rst api/structure.rst api/behavior.rst + api/utils.rst .. toctree:: :maxdepth: 1 diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index ed0503d5..3c3537c0 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -50,6 +50,7 @@ from dask_awkward.utils import ( DaskAwkwardNotImplemented, IncompatiblePartitions, + TracerConversionError, field_access_to_front, first, hyphenize, @@ -921,35 +922,20 @@ def __dask_postpersist__(self): __dask_scheduler__ = staticmethod(threaded_get) - # taken from https://github.com/dask/dask/blob/3003db5b84070b1afc197c7c70c76c5c4c2bc821/dask/array/core.py#L1854-L1883 def __bool__(self): - materialized = self.compute().to_numpy() - if materialized.size != 1: - raise ValueError( - f"The truth value of a {self.__class__.__name__} is ambiguous. " - "Use a.any() or a.all()." - ) - else: - return bool(materialized) - - def _scalarfunc(self, cast_type): - materialized = self.compute().to_numpy() - if materialized.size != 1: - raise TypeError("Only length-1 arrays can be converted to Python scalars") - else: - return cast_type(materialized.item()) + raise TracerConversionError(bool, self) def __int__(self): - return self._scalarfunc(int) + raise TracerConversionError(int, self) def __float__(self): - return self._scalarfunc(float) + raise TracerConversionError(float, self) def __complex__(self): - return self._scalarfunc(complex) + raise TracerConversionError(complex, self) def __index__(self): - return self._scalarfunc(operator.index) + raise TracerConversionError(operator.index, self) def __setitem__(self, where: Any, what: Any) -> None: if not ( diff --git a/src/dask_awkward/utils.py b/src/dask_awkward/utils.py index 87b7efd4..70db3b8a 100644 --- a/src/dask_awkward/utils.py +++ b/src/dask_awkward/utils.py @@ -38,6 +38,49 @@ def divisions_msg(name: str, *args: Array) -> str: return msg +class TracerConversionError(TypeError): + """ + This error occurs when a tracer is used in a context that requires a concrete + value. + + + There are several reasons why this error might occur: + + Examples + -------- + + - When a tracer is used in a conditional statement: + + >>> import dask_awkward as dak + >>> tracer = dak.from_awkward(ak.Array([1]), npartitions=1) + >>> bool(dask_arr) + Traceback (most recent call last): ... + TracerConversionError: Attempted to convert (``bool(dask.awkward)``) a Dask tracer to a concrete value. If you intend to convert the tracer to a concrete value, use the `.compute()` method. + + + - When a tracer is cast to a Python type: + + >>> import dask_awkward as dak + >>> tracer = dak.from_awkward(ak.Array([1]), npartitions=1) + >>> int(dask_arr) + Traceback (most recent call last): ... + TracerConversionError: Attempted to convert (``int(dask.awkward)``) a Dask tracer to a concrete value. If you intend to convert the tracer to a concrete value, use the `.compute()` method. + + + These errors can be resolved by explicitely converting the tracer to a concrete value: + + >>> import dask_awkward as dak + >>> tracer = dak.from_awkward(ak.Array([1]), npartitions=1) + >>> bool(tracer.compute()) + >>> int(tracer.compute()) + """ + + def __init__(self, func: Callable, array: Array): + self.message = f"Attempted to convert (`{func.__name__}({array!r})`) a Dask tracer to a concrete value. " + self.message += "If you intend to convert the tracer to a concrete value, use the `.compute()` method." + super().__init__(self.message) + + class LazyInputsDict(Mapping): """Dictionary with lazy key value pairs diff --git a/tests/test_core.py b/tests/test_core.py index 38e0c12d..479b6381 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -34,7 +34,7 @@ typetracer_array, ) from dask_awkward.lib.testutils import assert_eq -from dask_awkward.utils import IncompatiblePartitions +from dask_awkward.utils import IncompatiblePartitions, TracerConversionError if TYPE_CHECKING: from dask_awkward.lib.core import Array @@ -973,22 +973,11 @@ def test_map_partitions_bad_arguments(): def test_array__bool_nonzero_long_int_float_complex_index(): import operator - dak_arr = dak.from_awkward(ak.Array([1]), npartitions=1) - dask_arr = da.from_array(np.array([1])) + tracer = dak.from_awkward(ak.Array([1]), npartitions=1) for fun in bool, int, float, complex, operator.index: - assert fun(dak_arr) == fun(dask_arr) - - toolong = dak.from_awkward(ak.Array([1, 2]), npartitions=1) - - with pytest.raises( - ValueError, - match=r"The truth value of a .+ is ambiguous. Use a.any\(\) or a.all\(\).", - ): - bool(toolong) - - with pytest.raises( - TypeError, match="Only length-1 arrays can be converted to Python scalars" - ): - for fun in int, float, complex, operator.index: - fun(toolong) + with pytest.raises( + TracerConversionError, + match=r"Attempted to convert \(.+\) a Dask tracer to a concrete value. If you intend to convert the tracer to a concrete value, use the `.compute\(\)` method.", + ): + fun(tracer) From d2a2279b9fe35924e2471fcd551d7bb76da60aad Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Tue, 10 Sep 2024 14:49:29 -0400 Subject: [PATCH 04/11] test_json_sanity: properly test length of an Array --- tests/test_io_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_io_json.py b/tests/test_io_json.py index 71f835bb..688fb550 100644 --- a/tests/test_io_json.py +++ b/tests/test_io_json.py @@ -68,9 +68,9 @@ def test_json_sanity(json_data_dir: Path, concrete_data: ak.Array) -> None: ", use `\\.eager_compute_divisions\\(\\)` on the collection." ), ): - assert ds + assert len(ds) ds.eager_compute_divisions() - assert ds + assert len(ds) assert_eq(ds, concrete_data) From 235c7c63468fedd16ff839953664b4e5b015d537 Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Wed, 11 Sep 2024 09:11:20 -0400 Subject: [PATCH 05/11] improve error message --- docs/api/utils.rst | 2 +- src/dask_awkward/lib/core.py | 14 ++++++++------ src/dask_awkward/utils.py | 37 ++++++++++++++++++------------------ tests/test_core.py | 10 +++++----- 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/docs/api/utils.rst b/docs/api/utils.rst index 7f3b697b..d7236b37 100644 --- a/docs/api/utils.rst +++ b/docs/api/utils.rst @@ -15,7 +15,7 @@ Utilities to implement array behaviors for dask-awkward arrays. .. autosummary:: :toctree: generated/ - utils.TracerConversionError + utils.ConcretizationTypeError .. raw:: html diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 3c3537c0..d7c1a4e0 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -48,9 +48,9 @@ from dask_awkward.layers import AwkwardBlockwiseLayer, AwkwardMaterializedLayer from dask_awkward.lib.optimize import all_optimizations from dask_awkward.utils import ( + ConcretizationTypeError, DaskAwkwardNotImplemented, IncompatiblePartitions, - TracerConversionError, field_access_to_front, first, hyphenize, @@ -923,19 +923,21 @@ def __dask_postpersist__(self): __dask_scheduler__ = staticmethod(threaded_get) def __bool__(self): - raise TracerConversionError(bool, self) + raise ConcretizationTypeError(f"The __bool__() method was called on {self!r}.") def __int__(self): - raise TracerConversionError(int, self) + raise ConcretizationTypeError(f"The __int__() method was called on {self!r}.") def __float__(self): - raise TracerConversionError(float, self) + raise ConcretizationTypeError(f"The __float__() method was called on {self!r}.") def __complex__(self): - raise TracerConversionError(complex, self) + raise ConcretizationTypeError( + f"The __complex__() method was called on {self!r}." + ) def __index__(self): - raise TracerConversionError(operator.index, self) + raise ConcretizationTypeError(f"The __index__() method was called on {self!r}.") def __setitem__(self, where: Any, what: Any) -> None: if not ( diff --git a/src/dask_awkward/utils.py b/src/dask_awkward/utils.py index 70db3b8a..a7f9179f 100644 --- a/src/dask_awkward/utils.py +++ b/src/dask_awkward/utils.py @@ -8,7 +8,6 @@ if TYPE_CHECKING: from dask_awkward.lib.core import Array - T = TypeVar("T") P = ParamSpec("P") @@ -38,9 +37,9 @@ def divisions_msg(name: str, *args: Array) -> str: return msg -class TracerConversionError(TypeError): +class ConcretizationTypeError(TypeError): """ - This error occurs when a tracer is used in a context that requires a concrete + This error occurs when a ``dask_awkward.Array`` is used in a context that requires a concrete value. @@ -49,35 +48,37 @@ class TracerConversionError(TypeError): Examples -------- - - When a tracer is used in a conditional statement: + - When a ``dask_awkward.Array`` is used in a conditional statement: >>> import dask_awkward as dak - >>> tracer = dak.from_awkward(ak.Array([1]), npartitions=1) - >>> bool(dask_arr) + >>> import awkward as ak + >>> dask_arr = dak.from_awkward(ak.Array([1]), npartitions=1) + >>> if dask_arr > 2: + >>> dask_arr += 1 Traceback (most recent call last): ... - TracerConversionError: Attempted to convert (``bool(dask.awkward)``) a Dask tracer to a concrete value. If you intend to convert the tracer to a concrete value, use the `.compute()` method. - + dask_awkward.utils.ConcretizationTypeError: A dask_awkward.Array is encountered in a computation where a concrete value is expected. If you intend to convert the dask_awkward.Array to a concrete value, use the `.compute()` method. The __bool__() method was called on dask.awkward. - - When a tracer is cast to a Python type: + - When a ``dask_awkward.Array`` is cast to a Python type: >>> import dask_awkward as dak - >>> tracer = dak.from_awkward(ak.Array([1]), npartitions=1) + >>> import awkward as ak + >>> dask_arr = dak.from_awkward(ak.Array([1]), npartitions=1) >>> int(dask_arr) Traceback (most recent call last): ... - TracerConversionError: Attempted to convert (``int(dask.awkward)``) a Dask tracer to a concrete value. If you intend to convert the tracer to a concrete value, use the `.compute()` method. - + dask_awkward.utils.ConcretizationTypeError: A dask_awkward.Array is encountered in a computation where a concrete value is expected. If you intend to convert the dask_awkward.Array to a concrete value, use the `.compute()` method. The __int__() method was called on dask.awkward. These errors can be resolved by explicitely converting the tracer to a concrete value: >>> import dask_awkward as dak - >>> tracer = dak.from_awkward(ak.Array([1]), npartitions=1) - >>> bool(tracer.compute()) - >>> int(tracer.compute()) + >>> dask_arr = dak.from_awkward(ak.Array([1]), npartitions=1) + >>> bool(dask_arr.compute()) + True """ - def __init__(self, func: Callable, array: Array): - self.message = f"Attempted to convert (`{func.__name__}({array!r})`) a Dask tracer to a concrete value. " - self.message += "If you intend to convert the tracer to a concrete value, use the `.compute()` method." + def __init__(self, msg: str): + self.message = "A dask_awkward.Array is encountered in a computation where a concrete value is expected. " + self.message += "If you intend to convert the dask_awkward.Array to a concrete value, use the `.compute()` method. " + self.message += msg super().__init__(self.message) diff --git a/tests/test_core.py b/tests/test_core.py index 479b6381..3ebf0777 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -34,7 +34,7 @@ typetracer_array, ) from dask_awkward.lib.testutils import assert_eq -from dask_awkward.utils import IncompatiblePartitions, TracerConversionError +from dask_awkward.utils import ConcretizationTypeError, IncompatiblePartitions if TYPE_CHECKING: from dask_awkward.lib.core import Array @@ -973,11 +973,11 @@ def test_map_partitions_bad_arguments(): def test_array__bool_nonzero_long_int_float_complex_index(): import operator - tracer = dak.from_awkward(ak.Array([1]), npartitions=1) + dask_arr = dak.from_awkward(ak.Array([1]), npartitions=1) for fun in bool, int, float, complex, operator.index: with pytest.raises( - TracerConversionError, - match=r"Attempted to convert \(.+\) a Dask tracer to a concrete value. If you intend to convert the tracer to a concrete value, use the `.compute\(\)` method.", + ConcretizationTypeError, + match=r"A dask_awkward.Array is encountered in a computation where a concrete value is expected. If you intend to convert the dask_awkward.Array to a concrete value, use the `.compute\(\)` method. The .+ method was called on .+.", ): - fun(tracer) + fun(dask_arr) From 8f5b9d17e079be2eb34b1087be1c541268087b73 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 17 Sep 2024 09:58:20 -0400 Subject: [PATCH 06/11] Solve circular imports --- pyproject.toml | 2 +- src/{dask_awkward/sizeof.py => dask_awkward_sizeof/__init__.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/{dask_awkward/sizeof.py => dask_awkward_sizeof/__init__.py} (100%) diff --git a/pyproject.toml b/pyproject.toml index 4bbafc23..ab9c423d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ test = [ ] [project.entry-points."dask.sizeof"] -awkward = "dask_awkward.sizeof:register" +awkward = "dask_awkward_sizeof:register" [project.entry-points."awkward.pickle.reduce"] dask_awkward = "dask_awkward.pickle:plugin" diff --git a/src/dask_awkward/sizeof.py b/src/dask_awkward_sizeof/__init__.py similarity index 100% rename from src/dask_awkward/sizeof.py rename to src/dask_awkward_sizeof/__init__.py From 99dee0ead71bab0b6658899110c6d6c4de9cf55b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 17 Sep 2024 10:26:50 -0400 Subject: [PATCH 07/11] update pyproject --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index ab9c423d..5157ab88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,12 @@ test = [ [project.entry-points."dask.sizeof"] awkward = "dask_awkward_sizeof:register" +[tool.hatch.build.targets.sdist.force-include] +"../src/dask_awkward_sizeof" = "src/dask_awkward_sizeof" + +[tool.hatch.build.targets.wheel] +packages = ["src/dask_awkward", "src/dask_awkward_sizeof"] + [project.entry-points."awkward.pickle.reduce"] dask_awkward = "dask_awkward.pickle:plugin" From 05cb1aa12bf1c65d6675bf679d7223dcdf93934b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 17 Sep 2024 11:58:31 -0400 Subject: [PATCH 08/11] [temp] show that it works with dask-histogram branch --- .github/envs/environment-3.11.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/envs/environment-3.11.yml b/.github/envs/environment-3.11.yml index 6549509b..b93e20ed 100644 --- a/.github/envs/environment-3.11.yml +++ b/.github/envs/environment-3.11.yml @@ -10,6 +10,8 @@ dependencies: - pytest - pyarrow - awkward - - dask-histogram + # - dask-histogram - uproot - hist + - pip: + - git+https://github.com/martindurant/dask-histogram@sizeof From 3947042a99951863a428c5ac6aa7e48e4b7fa542 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 17 Sep 2024 12:03:32 -0400 Subject: [PATCH 09/11] [temp] also try for pypi --- .github/workflows/pypi-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pypi-tests.yml b/.github/workflows/pypi-tests.yml index 49d09f4e..61b8979e 100644 --- a/.github/workflows/pypi-tests.yml +++ b/.github/workflows/pypi-tests.yml @@ -35,6 +35,7 @@ jobs: pip install pip wheel -U pip install dask[array,dataframe,distributed,diagnostics] pip install -q --no-cache-dir .[complete,test] + pip install git+https://github.com/martindurant/dask-histogram@sizeof pip list - name: test run: | From 3ea661f08ab77d4e2c980cc03bf700730b2a384a Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 19 Sep 2024 13:09:11 -0400 Subject: [PATCH 10/11] undo temp env --- .github/envs/environment-3.11.yml | 4 +--- .github/workflows/pypi-tests.yml | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/envs/environment-3.11.yml b/.github/envs/environment-3.11.yml index b93e20ed..6549509b 100644 --- a/.github/envs/environment-3.11.yml +++ b/.github/envs/environment-3.11.yml @@ -10,8 +10,6 @@ dependencies: - pytest - pyarrow - awkward - # - dask-histogram + - dask-histogram - uproot - hist - - pip: - - git+https://github.com/martindurant/dask-histogram@sizeof diff --git a/.github/workflows/pypi-tests.yml b/.github/workflows/pypi-tests.yml index 61b8979e..49d09f4e 100644 --- a/.github/workflows/pypi-tests.yml +++ b/.github/workflows/pypi-tests.yml @@ -35,7 +35,6 @@ jobs: pip install pip wheel -U pip install dask[array,dataframe,distributed,diagnostics] pip install -q --no-cache-dir .[complete,test] - pip install git+https://github.com/martindurant/dask-histogram@sizeof pip list - name: test run: | From fcae1a24bb568339735aef713f22cd928a6ce0fe Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 19 Sep 2024 13:16:40 -0400 Subject: [PATCH 11/11] fix path --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5157ab88..d5514159 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,7 @@ test = [ awkward = "dask_awkward_sizeof:register" [tool.hatch.build.targets.sdist.force-include] -"../src/dask_awkward_sizeof" = "src/dask_awkward_sizeof" +"src/dask_awkward_sizeof" = "src/dask_awkward_sizeof" [tool.hatch.build.targets.wheel] packages = ["src/dask_awkward", "src/dask_awkward_sizeof"]