Skip to content

Commit

Permalink
fix: don't project categorical in ak._v2.packed (#1689)
Browse files Browse the repository at this point in the history
* test: test packing of categoricals

* fix: don't project categoricals

FIXME: in future we should be able to specify this at the behaviour level

* test: improve clarity
  • Loading branch information
agoose77 authored Sep 8, 2022
1 parent 31f3afb commit 3da5f32
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 14 deletions.
15 changes: 12 additions & 3 deletions src/awkward/_v2/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1244,7 +1244,16 @@ def continuation():
raise ak._v2._util.error(AssertionError(result))

def packed(self):
return self.project().packed()
if self.parameter("__array__") == "categorical":
return IndexedArray(
self._index,
self._content.packed(),
identifier=self._identifier,
parameters=self._parameters,
nplike=self._nplike,
)
else:
return self.project().packed()

def _to_list(self, behavior, json_conversions):
out = self._to_list_custom(behavior, json_conversions)
Expand All @@ -1261,8 +1270,8 @@ def _to_nplike(self, nplike):
return IndexedArray(
index,
content,
identifier=self.identifier,
parameters=self.parameters,
identifier=self._identifier,
parameters=self._parameters,
nplike=nplike,
)

Expand Down
23 changes: 12 additions & 11 deletions src/awkward/_v2/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1684,10 +1684,20 @@ def continuation():

def packed(self):
original_index = self._index.raw(self._nplike)

is_none = original_index < 0
num_none = self._nplike.index_nplike.count_nonzero(is_none)
if self._content.length > len(original_index) - num_none:
if self.parameter("__array__") == "categorical" or self._content.length <= (
len(original_index) - num_none
):
return ak._v2.contents.IndexedOptionArray(
self._index,
self._content.packed(),
self._identifier,
self._parameters,
self._nplike,
)

else:
new_index = self._nplike.index_nplike.empty(
len(original_index), dtype=original_index.dtype
)
Expand All @@ -1704,15 +1714,6 @@ def packed(self):
self._nplike,
)

else:
return ak._v2.contents.IndexedOptionArray(
self._index,
self._content.packed(),
self._identifier,
self._parameters,
self._nplike,
)

def _to_list(self, behavior, json_conversions):
out = self._to_list_custom(behavior, json_conversions)
if out is not None:
Expand Down
24 changes: 24 additions & 0 deletions tests/v2/test_1688-pack-categorical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

import pytest # noqa: F401
import awkward as ak # noqa: F401


numpy = ak.nplike.Numpy.instance()


def test():
this = ak._v2.to_categorical(["one", "two", "one", "three", "one", "four"])
assert ak._v2.is_categorical(this)
# Ensure packing by itself doesn't change the type
this_packed = ak._v2.packed(this)
assert this_packed.type == this.type
# Ensure the categories match between the two
assert ak._v2.all(ak._v2.categories(this_packed) == ak._v2.categories(this))

# Ensure the inner types match (ignoring the length change)
this_subset_packed = ak._v2.packed(this[:-1])
assert ak._v2.is_categorical(this_subset_packed)
assert this_subset_packed.type.content == this.type.content
# Ensure the categories match between the two
assert ak._v2.all(ak._v2.categories(this_subset_packed) == ak._v2.categories(this))

0 comments on commit 3da5f32

Please sign in to comment.