diff --git a/src/awkward/_v2/contents/listarray.py b/src/awkward/_v2/contents/listarray.py index a3f566142c..e30fa42bf1 100644 --- a/src/awkward/_v2/contents/listarray.py +++ b/src/awkward/_v2/contents/listarray.py @@ -349,8 +349,10 @@ def _getitem_next_jagged(self, slicestarts, slicestops, slicecontent, tail): slicer=ListArray(slicestarts, slicestops, slicecontent), ) - asListOffsetArray64 = self.toListOffsetArray64(True) - next_content = asListOffsetArray64._content + as_list_offset_array = self.toListOffsetArray64(False) + next_content = as_list_offset_array._content[ + as_list_offset_array.offsets[0] : as_list_offset_array.offsets[-1] + ] sliceoffsets = ak._v2.index.Index64(slicecontent._offsets) @@ -422,7 +424,6 @@ def _getitem_next_jagged(self, slicestarts, slicestops, slicecontent, tail): ), slicer=ak._v2.contents.ListArray(slicestarts, slicestops, slicecontent), ) - nextcontent = self._content._carry(nextcarry, True) nexthead, nexttail = ak._v2._slicing.headtail(tail) outcontent = nextcontent._getitem_next(nexthead, nexttail, None) @@ -511,7 +512,10 @@ def _getitem_next_jagged(self, slicestarts, slicestops, slicecontent, tail): slicecontent._content, ak._v2.contents.listoffsetarray.ListOffsetArray, ): - nextcontent = self._content._carry(nextcarry, True) + + # Generate ranges between starts and stops + as_list_offset_array = self.toListOffsetArray64(True) + nextcontent = as_list_offset_array._content._carry(nextcarry, True) next = ak._v2.contents.listoffsetarray.ListOffsetArray( smalloffsets, nextcontent, None, self._parameters, self._nplike ) diff --git a/src/libawkward/array/ListArray.cpp b/src/libawkward/array/ListArray.cpp index 315927de71..677b6f8ed0 100644 --- a/src/libawkward/array/ListArray.cpp +++ b/src/libawkward/array/ListArray.cpp @@ -1935,11 +1935,16 @@ namespace awkward { ContentPtr out; if (dynamic_cast(slicecontent.content().get())) { - ContentPtr nextcontent = content_.get()->carry(nextcarry, true); + ContentPtr asListOffsetArray64 = toListOffsetArray64(true); + ContentPtr next_content; + if (ListOffsetArrayOf* raw = + dynamic_cast*>(asListOffsetArray64.get())) { + next_content = raw->content()->carry(nextcarry, true); + } ContentPtr next = std::make_shared(Identities::none(), util::Parameters(), smalloffsets, - nextcontent); + next_content); out = next.get()->getitem_next_jagged(util::make_starts(smalloffsets), util::make_stops(smalloffsets), slicecontent.content(), diff --git a/tests/test_1502-getitem-jagged-issue1406.py b/tests/test_1502-getitem-jagged-issue1406.py new file mode 100644 index 0000000000..ade5203eb6 --- /dev/null +++ b/tests/test_1502-getitem-jagged-issue1406.py @@ -0,0 +1,120 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 +import awkward as ak # noqa: F401 +import numpy as np + +to_list = ak.to_list + + +def test_1406issue(): + array = ak.Array( + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([1, 3], dtype=np.int64)), + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 2, 2, 3], dtype=np.int64)), + ak.layout.NumpyArray(np.array([0, 1, 2], dtype=np.int64)), + ), + ), + check_valid=True, + ) + + index = ak.Array( + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 2], dtype=np.int64)), + ak.layout.IndexedOptionArray64( + ak.layout.Index64(np.array([0, 1], dtype=np.int64)), + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 0, 1], dtype=np.int64)), + ak.layout.NumpyArray(np.array([0], dtype=np.int64)), + ), + ), + ), + check_valid=True, + ) + assert to_list(array[index]) == [[[], [2]]] + + +def test_success_remove_option_type(): + array = ak.Array( + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([1, 3], dtype=np.int64)), + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 2, 2, 3], dtype=np.int64)), + ak.layout.NumpyArray(np.array([0, 1, 2], dtype=np.int64)), + ), + ), + check_valid=True, + ) + + index = ak.Array( + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 2], dtype=np.int64)), + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 0, 1], dtype=np.int64)), + ak.layout.NumpyArray(np.array([0], dtype=np.int64)), + ), + ), + check_valid=True, + ) + + assert to_list(array[index]) == [[[], [2]]] + + +def test_success_start_offset0(): + + array = ak.Array( + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 2], dtype=np.int64)), + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([2, 2, 3], dtype=np.int64)), + ak.layout.NumpyArray(np.array([0, 1, 2], dtype=np.int64)), + ), + ), + check_valid=True, + ) + + index = ak.Array( + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 2], dtype=np.int64)), + ak.layout.IndexedOptionArray64( + ak.layout.Index64(np.array([0, 1], dtype=np.int64)), + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 0, 1], dtype=np.int64)), + ak.layout.NumpyArray(np.array([0], dtype=np.int64)), + ), + ), + ), + check_valid=True, + ) + + assert to_list(array[index]) == [[[], [2]]] + + +def test_success_nonempty_list(): + array = ak.Array( + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([1, 3], dtype=np.int64)), + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 1, 2, 3], dtype=np.int64)), + ak.layout.NumpyArray(np.array([0, 1, 2], dtype=np.int64)), + ), + ), + check_valid=True, + ) + + index = ak.Array( + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 2], dtype=np.int64)), + ak.layout.IndexedOptionArray64( + ak.layout.Index64(np.array([0, 1], dtype=np.int64)), + ak.layout.ListOffsetArray64( + ak.layout.Index64(np.array([0, 1, 2], dtype=np.int64)), + ak.layout.NumpyArray(np.array([0, 0], dtype=np.int64)), + ), + ), + ), + check_valid=True, + ) + + assert to_list(array[index]) == [[[1], [2]]] diff --git a/tests/v2/test_1405-slicing-untested-cases.py b/tests/v2/test_1405-slicing-untested-cases.py new file mode 100644 index 0000000000..313fb4dbe4 --- /dev/null +++ b/tests/v2/test_1405-slicing-untested-cases.py @@ -0,0 +1,97 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 +import awkward as ak # noqa: F401 +import numpy as np + + +def test_index_packed(): + """Base test case""" + content = ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)), + # Here we have a third sublist [2, 3) that isn't mapped + ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 1, 2], dtype=np.int64)), + ak._v2.contents.NumpyArray(np.array([2, 2], dtype=np.int64)), + ), + ) + + index = ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)), + ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 0, 1], dtype=np.int64)), + ak._v2.contents.NumpyArray(np.array([0], dtype=np.int64)), + ), + ) + + assert content[index].to_list() == [[[], [2]]] + + +def test_index_unmapped(): + """Check that contents with unmapped sublists still support jagged indexing""" + content = ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)), + # Here we have a third sublist [2, 3) that isn't mapped + ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 1, 2, 3], dtype=np.int64)), + ak._v2.contents.NumpyArray(np.array([2, 2, 2], dtype=np.int64)), + ), + ) + + index = ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)), + ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 0, 1], dtype=np.int64)), + ak._v2.contents.NumpyArray(np.array([0], dtype=np.int64)), + ), + ) + + assert content[index].to_list() == [[[], [2]]] + + +def test_list_option_list(): + """Check that non-offset list(option(list indexes correctly""" + content = ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)), + ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([2, 2, 3], dtype=np.int64)), + ak._v2.contents.NumpyArray(np.array([2, 2, 2], dtype=np.int64)), + ), + ) + + index = ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)), + ak._v2.contents.IndexedOptionArray( + ak._v2.index.Index64(np.array([0, 1], dtype=np.int64)), + ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 0, 1], dtype=np.int64)), + ak._v2.contents.NumpyArray(np.array([0], dtype=np.int64)), + ), + ), + ) + + assert content[index].to_list() == [[[], [2]]] + + +def test_list_option_list_offset(): + """Check that offset list(option(list indexes correctly""" + content = ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([1, 3], dtype=np.int64)), + ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 2, 2, 3], dtype=np.int64)), + ak._v2.contents.NumpyArray(np.array([2, 2, 2], dtype=np.int64)), + ), + ) + + index = ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)), + ak._v2.contents.IndexedOptionArray( + ak._v2.index.Index64(np.array([0, 1], dtype=np.int64)), + ak._v2.contents.ListOffsetArray( + ak._v2.index.Index64(np.array([0, 0, 1], dtype=np.int64)), + ak._v2.contents.NumpyArray(np.array([0], dtype=np.int64)), + ), + ), + ) + + assert content[index].to_list() == [[[], [2]]] diff --git a/tests/v2/test_1502-getitem-jagged-issue1406.py b/tests/v2/test_1502-getitem-jagged-issue1406.py index 03c6ae9366..cbcbce13bb 100644 --- a/tests/v2/test_1502-getitem-jagged-issue1406.py +++ b/tests/v2/test_1502-getitem-jagged-issue1406.py @@ -117,4 +117,4 @@ def test_success_nonempty_list(): check_valid=True, ) - assert to_list(array[index]) == [[[0], [1]]] + assert to_list(array[index]) == [[[1], [2]]]