From c12862070b604b02ab6cb77c081baee51ab5e9fd Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Fri, 3 Jan 2020 16:15:51 -0500 Subject: [PATCH] Fixing category indexing --- CHANGELOG.md | 7 +++-- boost_histogram/_internal/hist.py | 38 +++++++++++++++++++++++--- tests/test_histogram_indexing.py | 44 +++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31d09a12..7b74d525 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,10 @@ Documentation update. Now using development branch of Boost.Histogram again. #### Bug fixes -* Allow slicing on flowless axes [#288][] -* Sum repr fixed [#293][] +* Fix sum over category axes in indexing [#298][] +* Allow single category item selection [#298][] +* Allow slicing on axes without flow bins [#288][] +* Sum repr no longer throws error [#293][] #### Developer changes @@ -14,6 +16,7 @@ Documentation update. Now using development branch of Boost.Histogram again. [#288]: https://github.com/scikit-hep/boost-histogram/pull/288 [#292]: https://github.com/scikit-hep/boost-histogram/pull/292 [#293]: https://github.com/scikit-hep/boost-histogram/pull/293 +[#298]: https://github.com/scikit-hep/boost-histogram/pull/298 ### Version 0.6.1 diff --git a/boost_histogram/_internal/hist.py b/boost_histogram/_internal/hist.py index d6aa6ad0..8a629cdd 100644 --- a/boost_histogram/_internal/hist.py +++ b/boost_histogram/_internal/hist.py @@ -484,6 +484,7 @@ def __getitem__(self, index): slices = [] zeroes_start = [] zeroes_stop = [] + pick = dict() # We could use python's sum here, but for now, a private sum is used class ext_sum: @@ -492,7 +493,8 @@ class ext_sum: # Compute needed slices and projections for i, ind in iterator: if hasattr(ind, "__index__"): - ind = slice(ind.__index__(), ind.__index__() + 1, ext_sum()) + pick[i] = ind.__index__() + ind = slice(None) elif not isinstance(ind, slice): raise IndexError( @@ -508,6 +510,8 @@ class ext_sum: zeroes_start.append(i) if ind.stop is not None: zeroes_stop.append(i) + if ind.stop is None and ind.start is None: + continue elif hasattr(ind.step, "factor"): merge = ind.step.factor else: @@ -530,11 +534,17 @@ class ext_sum: slices.append(_core.algorithm.slice_and_rebin(i, begin, end, merge)) reduced = self._reduce(*slices) + if not integrations: - return self.__class__(reduced) + result = self.__class__(reduced) else: projections = [i for i in range(self.rank) if i not in integrations] + # For each axes projected out, we need to lower "pick" since it + # operates after the projection + reduce_ax = lambda ax: sum(i in projections for i in range(ax)) + pick = {reduce_ax(ax): pick[ax] for ax in pick} + # Replacement for crop missing in BH for i in zeroes_start: if self.axes[i].options.underflow: @@ -543,12 +553,34 @@ class ext_sum: if self.axes[i].options.underflow: reduced._hist._reset_row(i, reduced.axes[i].size) - return ( + result = ( self.__class__(reduced.project(*projections)) if projections else reduced.sum(flow=True) ) + # Allow user to "pick" out values when mixed with slices + if pick: + # Sum out the axes. We will replace the contents, so "drop" would be + # just as good if there was one. + sresult = result[{ax: slice(None, None, ext_sum()) for ax in pick}] + + # Make an array of selections, starting with [:,:,...,:] + bins_indexes = [slice(None)] * result.rank + # Now make the axis selections, remembering we will be in flow view + for ax in pick: + bins_indexes[ax] = pick[ax] + result._axis(ax).options.underflow + + # If the result is a single value, we need to avoid [...] + if hasattr(sresult, "rank"): + sresult[...] = result.view(flow=True)[tuple(bins_indexes)] + else: + sresult = result.view(flow=True)[tuple(bins_indexes)] + + result = sresult + + return result + def __setitem__(self, index, value): """ There are several supported possibilities: diff --git a/tests/test_histogram_indexing.py b/tests/test_histogram_indexing.py index 47b8b342..6fa5a56b 100644 --- a/tests/test_histogram_indexing.py +++ b/tests/test_histogram_indexing.py @@ -218,3 +218,47 @@ def test_noflow_slicing(): assert_array_equal(h[:, :, True].view(), vals) assert_array_equal(h[:, :, False].view(), 0) + + +def test_pick_str_category(): + noflow = dict(underflow=False, overflow=False) + + h = bh.Histogram( + bh.axis.Regular(10, 0, 10), + bh.axis.Regular(10, 0, 10, **noflow), + bh.axis.StrCategory(["on", "off", "maybe"]), + ) + + vals = np.arange(100).reshape(10, 10) + h[:, :, bh.loc("on")] = vals + + assert h[0, 1, bh.loc("on")] == 1 + assert h[1, 0, bh.loc("on")] == 10 + assert h[1, 1, bh.loc("on")] == 11 + assert h[3, 4, bh.loc("maybe")] == 0 + + assert_array_equal(h[:, :, bh.loc("on")].view(), vals) + assert_array_equal(h[:, :, bh.loc("off")].view(), 0) + + +def test_pick_int_category(): + noflow = dict(underflow=False, overflow=False) + + h = bh.Histogram( + bh.axis.Regular(10, 0, 10), + bh.axis.Regular(10, 0, 10, **noflow), + bh.axis.IntCategory([3, 5, 7]), + ) + + vals = np.arange(100).reshape(10, 10) + h[:, :, bh.loc(3)] = vals + h[:, :, bh.loc(5)] = vals + 1 + + assert h[0, 1, bh.loc(3)] == 1 + assert h[1, 0, bh.loc(5)] == 10 + 1 + assert h[1, 1, bh.loc(5)] == 11 + 1 + assert h[3, 4, bh.loc(7)] == 0 + + assert_array_equal(h[:, :, bh.loc(3)].view(), vals) + assert_array_equal(h[:, :, bh.loc(5)].view(), vals + 1) + assert_array_equal(h[:, :, bh.loc(7)].view(), 0)