Skip to content

Commit

Permalink
fix: jagged slicing for ListArray (#1408)
Browse files Browse the repository at this point in the history
* Test: add two v2 test cases

* Test: refactor tests to make it clear what we're testing

* Test: test for `list(option(list` and control

* Hack: fix test_list_option_list_offset

* Fix: convert starts to nplike array

* Fix: handle typetracer specifically

* fix: slice `next_content` in `ListArray._getitem_next_jagged`

* test: fix tests for v2, add test for v1

* refactor: use `toListOffsetArray64` directly

* fix: use `toListOffsetArray64` before jagged slicing

* style: use snake case

Co-authored-by: Jim Pivarski <jpivarski@gmail.com>
Co-authored-by: Jim Pivarski <jpivarski@users.noreply.github.com>
  • Loading branch information
3 people authored Sep 1, 2022
1 parent 311d798 commit e692946
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 7 deletions.
12 changes: 8 additions & 4 deletions src/awkward/_v2/contents/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,10 @@ def _getitem_next_jagged(self, slicestarts, slicestops, slicecontent, tail):
slicer=ListArray(slicestarts, slicestops, slicecontent),
)

asListOffsetArray64 = self.toListOffsetArray64(True)
next_content = asListOffsetArray64._content
as_list_offset_array = self.toListOffsetArray64(False)
next_content = as_list_offset_array._content[
as_list_offset_array.offsets[0] : as_list_offset_array.offsets[-1]
]

sliceoffsets = ak._v2.index.Index64(slicecontent._offsets)

Expand Down Expand Up @@ -422,7 +424,6 @@ def _getitem_next_jagged(self, slicestarts, slicestops, slicecontent, tail):
),
slicer=ak._v2.contents.ListArray(slicestarts, slicestops, slicecontent),
)

nextcontent = self._content._carry(nextcarry, True)
nexthead, nexttail = ak._v2._slicing.headtail(tail)
outcontent = nextcontent._getitem_next(nexthead, nexttail, None)
Expand Down Expand Up @@ -511,7 +512,10 @@ def _getitem_next_jagged(self, slicestarts, slicestops, slicecontent, tail):
slicecontent._content,
ak._v2.contents.listoffsetarray.ListOffsetArray,
):
nextcontent = self._content._carry(nextcarry, True)

# Generate ranges between starts and stops
as_list_offset_array = self.toListOffsetArray64(True)
nextcontent = as_list_offset_array._content._carry(nextcarry, True)
next = ak._v2.contents.listoffsetarray.ListOffsetArray(
smalloffsets, nextcontent, None, self._parameters, self._nplike
)
Expand Down
9 changes: 7 additions & 2 deletions src/libawkward/array/ListArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1935,11 +1935,16 @@ namespace awkward {

ContentPtr out;
if (dynamic_cast<SliceJagged64*>(slicecontent.content().get())) {
ContentPtr nextcontent = content_.get()->carry(nextcarry, true);
ContentPtr asListOffsetArray64 = toListOffsetArray64(true);
ContentPtr next_content;
if (ListOffsetArrayOf<int64_t>* raw =
dynamic_cast<ListOffsetArrayOf<int64_t>*>(asListOffsetArray64.get())) {
next_content = raw->content()->carry(nextcarry, true);
}
ContentPtr next = std::make_shared<ListOffsetArray64>(Identities::none(),
util::Parameters(),
smalloffsets,
nextcontent);
next_content);
out = next.get()->getitem_next_jagged(util::make_starts(smalloffsets),
util::make_stops(smalloffsets),
slicecontent.content(),
Expand Down
120 changes: 120 additions & 0 deletions tests/test_1502-getitem-jagged-issue1406.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

import pytest # noqa: F401
import awkward as ak # noqa: F401
import numpy as np

to_list = ak.to_list


def test_1406issue():
array = ak.Array(
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([1, 3], dtype=np.int64)),
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 2, 2, 3], dtype=np.int64)),
ak.layout.NumpyArray(np.array([0, 1, 2], dtype=np.int64)),
),
),
check_valid=True,
)

index = ak.Array(
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 2], dtype=np.int64)),
ak.layout.IndexedOptionArray64(
ak.layout.Index64(np.array([0, 1], dtype=np.int64)),
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 0, 1], dtype=np.int64)),
ak.layout.NumpyArray(np.array([0], dtype=np.int64)),
),
),
),
check_valid=True,
)
assert to_list(array[index]) == [[[], [2]]]


def test_success_remove_option_type():
array = ak.Array(
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([1, 3], dtype=np.int64)),
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 2, 2, 3], dtype=np.int64)),
ak.layout.NumpyArray(np.array([0, 1, 2], dtype=np.int64)),
),
),
check_valid=True,
)

index = ak.Array(
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 2], dtype=np.int64)),
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 0, 1], dtype=np.int64)),
ak.layout.NumpyArray(np.array([0], dtype=np.int64)),
),
),
check_valid=True,
)

assert to_list(array[index]) == [[[], [2]]]


def test_success_start_offset0():

array = ak.Array(
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 2], dtype=np.int64)),
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([2, 2, 3], dtype=np.int64)),
ak.layout.NumpyArray(np.array([0, 1, 2], dtype=np.int64)),
),
),
check_valid=True,
)

index = ak.Array(
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 2], dtype=np.int64)),
ak.layout.IndexedOptionArray64(
ak.layout.Index64(np.array([0, 1], dtype=np.int64)),
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 0, 1], dtype=np.int64)),
ak.layout.NumpyArray(np.array([0], dtype=np.int64)),
),
),
),
check_valid=True,
)

assert to_list(array[index]) == [[[], [2]]]


def test_success_nonempty_list():
array = ak.Array(
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([1, 3], dtype=np.int64)),
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 1, 2, 3], dtype=np.int64)),
ak.layout.NumpyArray(np.array([0, 1, 2], dtype=np.int64)),
),
),
check_valid=True,
)

index = ak.Array(
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 2], dtype=np.int64)),
ak.layout.IndexedOptionArray64(
ak.layout.Index64(np.array([0, 1], dtype=np.int64)),
ak.layout.ListOffsetArray64(
ak.layout.Index64(np.array([0, 1, 2], dtype=np.int64)),
ak.layout.NumpyArray(np.array([0, 0], dtype=np.int64)),
),
),
),
check_valid=True,
)

assert to_list(array[index]) == [[[1], [2]]]
97 changes: 97 additions & 0 deletions tests/v2/test_1405-slicing-untested-cases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

import pytest # noqa: F401
import awkward as ak # noqa: F401
import numpy as np


def test_index_packed():
"""Base test case"""
content = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)),
# Here we have a third sublist [2, 3) that isn't mapped
ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 1, 2], dtype=np.int64)),
ak._v2.contents.NumpyArray(np.array([2, 2], dtype=np.int64)),
),
)

index = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)),
ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 0, 1], dtype=np.int64)),
ak._v2.contents.NumpyArray(np.array([0], dtype=np.int64)),
),
)

assert content[index].to_list() == [[[], [2]]]


def test_index_unmapped():
"""Check that contents with unmapped sublists still support jagged indexing"""
content = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)),
# Here we have a third sublist [2, 3) that isn't mapped
ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 1, 2, 3], dtype=np.int64)),
ak._v2.contents.NumpyArray(np.array([2, 2, 2], dtype=np.int64)),
),
)

index = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)),
ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 0, 1], dtype=np.int64)),
ak._v2.contents.NumpyArray(np.array([0], dtype=np.int64)),
),
)

assert content[index].to_list() == [[[], [2]]]


def test_list_option_list():
"""Check that non-offset list(option(list indexes correctly"""
content = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)),
ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([2, 2, 3], dtype=np.int64)),
ak._v2.contents.NumpyArray(np.array([2, 2, 2], dtype=np.int64)),
),
)

index = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)),
ak._v2.contents.IndexedOptionArray(
ak._v2.index.Index64(np.array([0, 1], dtype=np.int64)),
ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 0, 1], dtype=np.int64)),
ak._v2.contents.NumpyArray(np.array([0], dtype=np.int64)),
),
),
)

assert content[index].to_list() == [[[], [2]]]


def test_list_option_list_offset():
"""Check that offset list(option(list indexes correctly"""
content = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([1, 3], dtype=np.int64)),
ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2, 2, 3], dtype=np.int64)),
ak._v2.contents.NumpyArray(np.array([2, 2, 2], dtype=np.int64)),
),
)

index = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2], dtype=np.int64)),
ak._v2.contents.IndexedOptionArray(
ak._v2.index.Index64(np.array([0, 1], dtype=np.int64)),
ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 0, 1], dtype=np.int64)),
ak._v2.contents.NumpyArray(np.array([0], dtype=np.int64)),
),
),
)

assert content[index].to_list() == [[[], [2]]]
2 changes: 1 addition & 1 deletion tests/v2/test_1502-getitem-jagged-issue1406.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,4 @@ def test_success_nonempty_list():
check_valid=True,
)

assert to_list(array[index]) == [[[0], [1]]]
assert to_list(array[index]) == [[[1], [2]]]

0 comments on commit e692946

Please sign in to comment.