diff --git a/src/awkward/_v2/operations/structure/ak_zip.py b/src/awkward/_v2/operations/structure/ak_zip.py index 5d4e763e64..f2a057fd65 100644 --- a/src/awkward/_v2/operations/structure/ak_zip.py +++ b/src/awkward/_v2/operations/structure/ak_zip.py @@ -13,6 +13,7 @@ def zip( highlevel=True, behavior=None, right_broadcast=False, + optiontype_outside_record=False, ): """ @@ -35,6 +36,8 @@ def zip( high-level. right_broadcast (bool): If True, follow rules for implicit right-broadcasting, as described in #ak.broadcast_arrays. + optiontype_outside_record (bool): If True, continue broadcasting past + any option types before creating the new #ak.layout.RecordArray node. Combines `arrays` into a single structure as the fields of a collection of records or the slots of a collection of tuples. If the `arrays` have @@ -104,6 +107,22 @@ def zip( As an extreme, `depth_limit=1` is a handy way to make a record structure at the outermost level, regardless of whether the fields have matching structure or not. + + When zipping together arrays with optional values, it can be useful to create + the #ak.layout.RecordArray node after the option types. By default, #ak.zip + does not do this: + + >>> one = ak.Array([1, 2, None]) + >>> two = ak.Array([None, 5, 6]) + >>> ak.zip([one, two]) + + + If the `optiontype_outside_record` option is set to `True`, Awkward will continue to + broadcast the arrays together at the depth_limit until it reaches non-option + types. This effectively takes the union of the option mask: + + >>> ak.zip([one, two], optiontype_outside_record=True) + """ if depth_limit is not None and depth_limit <= 0: raise ValueError("depth_limit must be None or at least 1") @@ -165,6 +184,10 @@ def action(inputs, depth, **ignore): for x in inputs ) ): + # If we want to zip after option types at this depth + if optiontype_outside_record and any(x.is_OptionType for x in inputs): + return None + return ( ak._v2.contents.RecordArray( inputs, recordlookup, parameters=parameters diff --git a/src/awkward/operations/structure.py b/src/awkward/operations/structure.py index f10ac5ab64..32a8cecffd 100644 --- a/src/awkward/operations/structure.py +++ b/src/awkward/operations/structure.py @@ -475,6 +475,7 @@ def zip( highlevel=True, behavior=None, right_broadcast=False, + optiontype_outside_record=False, ): """ Args: @@ -496,6 +497,8 @@ def zip( high-level. right_broadcast (bool): If True, follow rules for implicit right-broadcasting, as described in #ak.broadcast_arrays. + optiontype_outside_record (bool): If True, continue broadcasting past + any option types before creating the new #ak.layout.RecordArray node. Combines `arrays` into a single structure as the fields of a collection of records or the slots of a collection of tuples. If the `arrays` have @@ -565,6 +568,23 @@ def zip( As an extreme, `depth_limit=1` is a handy way to make a record structure at the outermost level, regardless of whether the fields have matching structure or not. + + When zipping together arrays with optional values, it can be useful to create + the #ak.layout.RecordArray node after the option types. By default, #ak.zip + does not do this: + + >>> one = ak.Array([1, 2, None]) + >>> two = ak.Array([None, 5, 6]) + >>> ak.zip([one, two]) + + + If the `optiontype_outside_record` option is set to `True`, Awkward will continue to + broadcast the arrays together at the depth_limit until it reaches non-option + types. This effectively takes the union of the option mask: + + >>> ak.zip([one, two], optiontype_outside_record=True) + + """ if depth_limit is not None and depth_limit <= 0: raise ValueError( @@ -629,6 +649,12 @@ def getfunction(inputs, depth): for x in inputs ) ): + # If we want to zip after option types at this depth + if optiontype_outside_record and any( + isinstance(x, ak._util.optiontypes) for x in inputs + ): + return None + return lambda: ( ak.layout.RecordArray(inputs, recordlookup, parameters=parameters), ) diff --git a/tests/.tmpHaXt1g b/tests/.tmpHaXt1g new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_1308-zip-after-option.py b/tests/test_1308-zip-after-option.py new file mode 100644 index 0000000000..05cdabf13b --- /dev/null +++ b/tests/test_1308-zip-after-option.py @@ -0,0 +1,44 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 +import awkward as ak # noqa: F401 + + +def test_all_options(): + one = ak.Array([1, 2, None]) + two = ak.Array([None, 5, None]) + result = ak.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * ?(int64, int64)" + assert result.tolist() == [None, (2, 5), None] + + +def test_mixed_options(): + one = ak.Array([1, 2, None]) + two = ak.Array([4, 5, 6]) + result = ak.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * ?(int64, int64)" + assert result.tolist() == [(1, 4), (2, 5), None] + + +def test_no_options(): + one = ak.Array([1, 2, 3]) + two = ak.Array([4, 5, 6]) + result = ak.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * (int64, int64)" + assert result.tolist() == [(1, 4), (2, 5), (3, 6)] + + +def test_complex_inner(): + one = ak.Array([1, 2, 3]) + two = ak.Array([[7, 5], [1, 2], [4, None]]) + result = ak.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * var * ?(int64, int64)" + assert result.tolist() == [[(1, 7), (1, 5)], [(2, 1), (2, 2)], [(3, 4), None]] + + +def test_complex_outer(): + one = ak.Array([1, None, 3]) + two = ak.Array([[7, 5], [1, 2], [4, None]]) + result = ak.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * option[var * ?(int64, int64)]" + assert result.tolist() == [[(1, 7), (1, 5)], None, [(3, 4), None]] diff --git a/tests/v2/test_1308-zip-after-option.py b/tests/v2/test_1308-zip-after-option.py new file mode 100644 index 0000000000..bdb9bf98f5 --- /dev/null +++ b/tests/v2/test_1308-zip-after-option.py @@ -0,0 +1,44 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 +import awkward as ak # noqa: F401 + + +def test_all_options(): + one = ak._v2.highlevel.Array([1, 2, None]) + two = ak._v2.highlevel.Array([None, 5, None]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * ?(int64, int64)" + assert result.tolist() == [None, (2, 5), None] + + +def test_mixed_options(): + one = ak._v2.highlevel.Array([1, 2, None]) + two = ak._v2.highlevel.Array([4, 5, 6]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * ?(int64, int64)" + assert result.tolist() == [(1, 4), (2, 5), None] + + +def test_no_options(): + one = ak._v2.highlevel.Array([1, 2, 3]) + two = ak._v2.highlevel.Array([4, 5, 6]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * (int64, int64)" + assert result.tolist() == [(1, 4), (2, 5), (3, 6)] + + +def test_complex_inner(): + one = ak._v2.highlevel.Array([1, 2, 3]) + two = ak._v2.highlevel.Array([[7, 5], [1, 2], [4, None]]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * var * ?(int64, int64)" + assert result.tolist() == [[(1, 7), (1, 5)], [(2, 1), (2, 2)], [(3, 4), None]] + + +def test_complex_outer(): + one = ak._v2.highlevel.Array([1, None, 3]) + two = ak._v2.highlevel.Array([[7, 5], [1, 2], [4, None]]) + result = ak._v2.operations.structure.zip([one, two], optiontype_outside_record=True) + assert str(result.type) == "3 * option[var * ?(int64, int64)]" + assert result.tolist() == [[(1, 7), (1, 5)], None, [(3, 4), None]]