From 5a2683e91c7081b1f57fb75dc290efd95dd7a831 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 23 Feb 2022 09:46:28 +0000 Subject: [PATCH 1/5] Feat: add `after_option` argument to `ak.zip` This broadcasts any option types at the depth limit, and forms the RecordArray layout node below the result. Effectively, this produces `?(int64, float64)` instead of `(?int64, ?float64)` --- src/awkward/operations/structure.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/awkward/operations/structure.py b/src/awkward/operations/structure.py index f10ac5ab64..c455ffea62 100644 --- a/src/awkward/operations/structure.py +++ b/src/awkward/operations/structure.py @@ -475,6 +475,7 @@ def zip( highlevel=True, behavior=None, right_broadcast=False, + after_option=False, ): """ Args: @@ -496,6 +497,8 @@ def zip( high-level. right_broadcast (bool): If True, follow rules for implicit right-broadcasting, as described in #ak.broadcast_arrays. + after_option (bool): If True, continue broadcasting past any option + types before creating the new #ak.layout.RecordArray node. Combines `arrays` into a single structure as the fields of a collection of records or the slots of a collection of tuples. If the `arrays` have @@ -565,6 +568,23 @@ def zip( As an extreme, `depth_limit=1` is a handy way to make a record structure at the outermost level, regardless of whether the fields have matching structure or not. + + When zipping together arrays with optional values, it can be useful to create + the #ak.layout.RecordArray node after the option types. By default, #ak.zip + does not do this: + + >>> one = ak.Array([1, 2, None]) + >>> two = ak.Array([None, 5, 6]) + >>> ak.zip([one, two]) + + + If the `after_option` option is set to `True`, Awkward will continue to + broadcast the arrays together at the depth_limit until it reaches non-option + types. This effectively takes the union of the option mask: + + >>> ak.zip([one, two], after_option=True) + + """ if depth_limit is not None and depth_limit <= 0: raise ValueError( @@ -629,6 +649,12 @@ def getfunction(inputs, depth): for x in inputs ) ): + # If we want to zip after option types at this depth + if after_option and any( + isinstance(x, ak._util.optiontypes) for x in inputs + ): + return None + return lambda: ( ak.layout.RecordArray(inputs, recordlookup, parameters=parameters), ) From 296ab5dc4acd07f3602f7e40ab5d9ff3c7086ee0 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 23 Feb 2022 10:27:28 +0000 Subject: [PATCH 2/5] Test: test `ak.zip(..., after_option=True)` --- tests/test_1308-zip-after-option.py | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/test_1308-zip-after-option.py diff --git a/tests/test_1308-zip-after-option.py b/tests/test_1308-zip-after-option.py new file mode 100644 index 0000000000..dac48c8d6e --- /dev/null +++ b/tests/test_1308-zip-after-option.py @@ -0,0 +1,44 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 +import awkward as ak # noqa: F401 + + +def test_all_options(): + one = ak.Array([1, 2, None]) + two = ak.Array([None, 5, None]) + result = ak.zip([one, two], after_option=True) + assert str(result.type) == "3 * ?(int64, int64)" + assert result.tolist() == [None, (2, 5), None] + + +def test_mixed_options(): + one = ak.Array([1, 2, None]) + two = ak.Array([4, 5, 6]) + result = ak.zip([one, two], after_option=True) + assert str(result.type) == "3 * ?(int64, int64)" + assert result.tolist() == [(1, 4), (2, 5), None] + + +def test_no_options(): + one = ak.Array([1, 2, 3]) + two = ak.Array([4, 5, 6]) + result = ak.zip([one, two], after_option=True) + assert str(result.type) == "3 * (int64, int64)" + assert result.tolist() == [(1, 4), (2, 5), (3, 6)] + + +def test_complex_inner(): + one = ak.Array([1, 2, 3]) + two = ak.Array([[7, 5], [1, 2], [4, None]]) + result = ak.zip([one, two], after_option=True) + assert str(result.type) == "3 * var * ?(int64, int64)" + assert result.tolist() == [[(1, 7), (1, 5)], [(2, 1), (2, 2)], [(3, 4), None]] + + +def test_complex_outer(): + one = ak.Array([1, None, 3]) + two = ak.Array([[7, 5], [1, 2], [4, None]]) + result = ak.zip([one, two], after_option=True) + assert str(result.type) == "3 * option[var * ?(int64, int64)]" + assert result.tolist() == [[(1, 7), (1, 5)], None, [(3, 4), None]] From 49f269f26198faa48d8157a686b00e0e874a0ba5 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 23 Feb 2022 20:19:53 +0000 Subject: [PATCH 3/5] Refactor: change `after_option` to `optiontype_outside_record` --- src/awkward/operations/structure.py | 12 ++++++------ tests/test_1308-zip-after-option.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/awkward/operations/structure.py b/src/awkward/operations/structure.py index c455ffea62..32a8cecffd 100644 --- a/src/awkward/operations/structure.py +++ b/src/awkward/operations/structure.py @@ -475,7 +475,7 @@ def zip( highlevel=True, behavior=None, right_broadcast=False, - after_option=False, + optiontype_outside_record=False, ): """ Args: @@ -497,8 +497,8 @@ def zip( high-level. right_broadcast (bool): If True, follow rules for implicit right-broadcasting, as described in #ak.broadcast_arrays. - after_option (bool): If True, continue broadcasting past any option - types before creating the new #ak.layout.RecordArray node. + optiontype_outside_record (bool): If True, continue broadcasting past + any option types before creating the new #ak.layout.RecordArray node. Combines `arrays` into a single structure as the fields of a collection of records or the slots of a collection of tuples. If the `arrays` have @@ -578,11 +578,11 @@ def zip( >>> ak.zip([one, two]) - If the `after_option` option is set to `True`, Awkward will continue to + If the `optiontype_outside_record` option is set to `True`, Awkward will continue to broadcast the arrays together at the depth_limit until it reaches non-option types. This effectively takes the union of the option mask: - >>> ak.zip([one, two], after_option=True) + >>> ak.zip([one, two], optiontype_outside_record=True) """ @@ -650,7 +650,7 @@ def getfunction(inputs, depth): ) ): # If we want to zip after option types at this depth - if after_option and any( + if optiontype_outside_record and any( isinstance(x, ak._util.optiontypes) for x in inputs ): return None diff --git a/tests/test_1308-zip-after-option.py b/tests/test_1308-zip-after-option.py index dac48c8d6e..05cdabf13b 100644 --- a/tests/test_1308-zip-after-option.py +++ b/tests/test_1308-zip-after-option.py @@ -7,7 +7,7 @@ def test_all_options(): one = ak.Array([1, 2, None]) two = ak.Array([None, 5, None]) - result = ak.zip([one, two], after_option=True) + result = ak.zip([one, two], optiontype_outside_record=True) assert str(result.type) == "3 * ?(int64, int64)" assert result.tolist() == [None, (2, 5), None] @@ -15,7 +15,7 @@ def test_all_options(): def test_mixed_options(): one = ak.Array([1, 2, None]) two = ak.Array([4, 5, 6]) - result = ak.zip([one, two], after_option=True) + result = ak.zip([one, two], optiontype_outside_record=True) assert str(result.type) == "3 * ?(int64, int64)" assert result.tolist() == [(1, 4), (2, 5), None] @@ -23,7 +23,7 @@ def test_mixed_options(): def test_no_options(): one = ak.Array([1, 2, 3]) two = ak.Array([4, 5, 6]) - result = ak.zip([one, two], after_option=True) + result = ak.zip([one, two], optiontype_outside_record=True) assert str(result.type) == "3 * (int64, int64)" assert result.tolist() == [(1, 4), (2, 5), (3, 6)] @@ -31,7 +31,7 @@ def test_no_options(): def test_complex_inner(): one = ak.Array([1, 2, 3]) two = ak.Array([[7, 5], [1, 2], [4, None]]) - result = ak.zip([one, two], after_option=True) + result = ak.zip([one, two], optiontype_outside_record=True) assert str(result.type) == "3 * var * ?(int64, int64)" assert result.tolist() == [[(1, 7), (1, 5)], [(2, 1), (2, 2)], [(3, 4), None]] @@ -39,6 +39,6 @@ def test_complex_inner(): def test_complex_outer(): one = ak.Array([1, None, 3]) two = ak.Array([[7, 5], [1, 2], [4, None]]) - result = ak.zip([one, two], after_option=True) + result = ak.zip([one, two], optiontype_outside_record=True) assert str(result.type) == "3 * option[var * ?(int64, int64)]" assert result.tolist() == [[(1, 7), (1, 5)], None, [(3, 4), None]] From 811c267b9a1651fa7d8505d516f385ec09da3857 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 23 Feb 2022 20:29:28 +0000 Subject: [PATCH 4/5] Feat: add `optiontype_outside_record` arg to `ak.zip` for v2 --- .../_v2/operations/structure/ak_zip.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/awkward/_v2/operations/structure/ak_zip.py b/src/awkward/_v2/operations/structure/ak_zip.py index 5d4e763e64..f2a057fd65 100644 --- a/src/awkward/_v2/operations/structure/ak_zip.py +++ b/src/awkward/_v2/operations/structure/ak_zip.py @@ -13,6 +13,7 @@ def zip( highlevel=True, behavior=None, right_broadcast=False, + optiontype_outside_record=False, ): """ @@ -35,6 +36,8 @@ def zip( high-level. right_broadcast (bool): If True, follow rules for implicit right-broadcasting, as described in #ak.broadcast_arrays. + optiontype_outside_record (bool): If True, continue broadcasting past + any option types before creating the new #ak.layout.RecordArray node. Combines `arrays` into a single structure as the fields of a collection of records or the slots of a collection of tuples. If the `arrays` have @@ -104,6 +107,22 @@ def zip( As an extreme, `depth_limit=1` is a handy way to make a record structure at the outermost level, regardless of whether the fields have matching structure or not. + + When zipping together arrays with optional values, it can be useful to create + the #ak.layout.RecordArray node after the option types. By default, #ak.zip + does not do this: + + >>> one = ak.Array([1, 2, None]) + >>> two = ak.Array([None, 5, 6]) + >>> ak.zip([one, two]) + + + If the `optiontype_outside_record` option is set to `True`, Awkward will continue to + broadcast the arrays together at the depth_limit until it reaches non-option + types. This effectively takes the union of the option mask: + + >>> ak.zip([one, two], optiontype_outside_record=True) + """ if depth_limit is not None and depth_limit <= 0: raise ValueError("depth_limit must be None or at least 1") @@ -165,6 +184,10 @@ def action(inputs, depth, **ignore): for x in inputs ) ): + # If we want to zip after option types at this depth + if optiontype_outside_record and any(x.is_OptionType for x in inputs): + return None + return ( ak._v2.contents.RecordArray( inputs, recordlookup, parameters=parameters From 38c2fc1fdd50d7ea3b56fa49702f19517fed43b7 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 23 Feb 2022 20:29:56 +0000 Subject: [PATCH 5/5] Test: add `optiontype_outside_record` test for v2 --- tests/.tmpHaXt1g | 0 tests/v2/test_1308-zip-after-option.py | 44 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tests/.tmpHaXt1g create mode 100644 tests/v2/test_1308-zip-after-option.py diff --git a/tests/.tmpHaXt1g b/tests/.tmpHaXt1g new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v2/test_1308-zip-after-option.py b/tests/v2/test_1308-zip-after-option.py new file mode 100644 index 0000000000..bdb9bf98f5 --- /dev/null +++ b/tests/v2/test_1308-zip-after-option.py @@ -0,0 +1,44 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 +import awkward as ak # noqa: F401 + + +def test_all_options(): + one = ak._v2.highlevel.Array([1, 2, None]) + two = ak._v2.highlevel.Array([None, 5, None]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * ?(int64, int64)" + assert result.tolist() == [None, (2, 5), None] + + +def test_mixed_options(): + one = ak._v2.highlevel.Array([1, 2, None]) + two = ak._v2.highlevel.Array([4, 5, 6]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * ?(int64, int64)" + assert result.tolist() == [(1, 4), (2, 5), None] + + +def test_no_options(): + one = ak._v2.highlevel.Array([1, 2, 3]) + two = ak._v2.highlevel.Array([4, 5, 6]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * (int64, int64)" + assert result.tolist() == [(1, 4), (2, 5), (3, 6)] + + +def test_complex_inner(): + one = ak._v2.highlevel.Array([1, 2, 3]) + two = ak._v2.highlevel.Array([[7, 5], [1, 2], [4, None]]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * var * ?(int64, int64)" + assert result.tolist() == [[(1, 7), (1, 5)], [(2, 1), (2, 2)], [(3, 4), None]] + + +def test_complex_outer(): + one = ak._v2.highlevel.Array([1, None, 3]) + two = ak._v2.highlevel.Array([[7, 5], [1, 2], [4, None]]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * option[var * ?(int64, int64)]" + assert result.tolist() == [[(1, 7), (1, 5)], None, [(3, 4), None]]