Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: histogram weights not handled correctly in hist / boost conversion #774

Merged
merged 31 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c58c9f7
remove unused dependency
lobis Oct 31, 2022
c6f671a
add missing asserts
lobis Oct 31, 2022
052de1a
add test for TH1 weights from root
lobis Oct 31, 2022
9f10718
add test for issue
lobis Oct 31, 2022
1971918
alternative way to detect weighted hist
lobis Oct 31, 2022
02a1ecb
made `fSumw2` None if storage is not weights
lobis Oct 31, 2022
bd90779
remove comments from test
lobis Oct 31, 2022
a160bff
add test for hist with weights
lobis Oct 31, 2022
0bd7de6
fix bad check for storage type
lobis Oct 31, 2022
af7d023
remove comment
lobis Nov 1, 2022
23a76ea
add test for hist with(out) weights and labels
lobis Nov 1, 2022
7ba24ce
updated TH1 `to_boost` to handle weights/labels better
lobis Nov 1, 2022
860b6b3
placed histogram `to_boost` in parent `Histogram` class to reduce cod…
lobis Nov 1, 2022
017cb35
updated `weighted` property
lobis Nov 1, 2022
fc80387
implemented histogram `weighted` property in parent `Histogram` class
lobis Nov 1, 2022
6f50836
using weighted property instead of copying check
lobis Nov 1, 2022
0fe0c1b
do not use mutable default arguments
lobis Nov 1, 2022
ea7fbdd
fix calling property
lobis Nov 1, 2022
6d43cc7
add missing asserts to test
lobis Nov 1, 2022
17459cb
add test for issue #722
lobis Nov 1, 2022
d3157bf
add weight test for 2D and 3D histograms
lobis Nov 1, 2022
99ea10d
add temporary skip to test until file is uploaded
lobis Nov 1, 2022
43d7c01
Merge branch 'main' into fix-hist-weights
lobis Nov 1, 2022
aa8c764
update issue test file
lobis Nov 1, 2022
383eb2c
Merge remote-tracking branch 'origin/fix-hist-weights' into fix-hist-…
lobis Nov 1, 2022
c36ccf3
add back temporary skip until file is available
lobis Nov 1, 2022
2aff474
Apply suggestions from code review
lobis Nov 1, 2022
11c14c1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 1, 2022
7d49e6d
add suggestion from https://github.com/scikit-hep/uproot5/pull/774#di…
lobis Nov 1, 2022
6591e5d
add check for length of `fSumw2` greater than 0 so empty histograms a…
lobis Nov 1, 2022
abc2cf5
remove unnecessary subclass method implementation
lobis Nov 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 73 additions & 40 deletions src/uproot/behaviors/TH1.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def weighted(self):
"""
True if the histogram has weights (``fSumw2``); False otherwise.
"""
raise NotImplementedError(repr(self))
sumw2 = self.member("fSumw2")
return len(sumw2) > 0 and len(sumw2) == self.member("fNcells")
lobis marked this conversation as resolved.
Show resolved Hide resolved

@property
def kind(self):
Expand Down Expand Up @@ -180,7 +181,7 @@ def counts(self, flow=False):
"""
return self.values(flow=flow)

def to_boost(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):
def to_boost(self, metadata=None, axis_metadata=None):
"""
Args:
metadata (dict of str \u2192 str): Metadata to collect (keys) and
Expand All @@ -190,9 +191,73 @@ def to_boost(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):

Converts the histogram into a ``boost-histogram`` object.
"""
raise NotImplementedError(repr(self))
if axis_metadata is None:
axis_metadata = boost_axis_metadata
if metadata is None:
metadata = boost_metadata

boost_histogram = uproot.extras.boost_histogram()

values = self.values(flow=True)

# 'fSumw2' will never be missing, if weights are not defined it is an array of length 0
sumw2 = None
if self.weighted:
lobis marked this conversation as resolved.
Show resolved Hide resolved
sumw2 = self.member("fSumw2")
sumw2 = numpy.asarray(sumw2, dtype=sumw2.dtype.newbyteorder("="))
sumw2 = numpy.reshape(sumw2, values.shape)
storage = boost_histogram.storage.Weight()
else:
if issubclass(values.dtype.type, numpy.integer):
storage = boost_histogram.storage.Int64()
else:
storage = boost_histogram.storage.Double()

axes = [
_boost_axis(self.member(name), axis_metadata)
for name in ["fXaxis", "fYaxis", "fZaxis"][0 : len(self.axes)]
]
out = boost_histogram.Histogram(*axes, storage=storage)
for k, v in metadata.items():
setattr(out, k, self.member(v))

assert len(axes) <= 3, "Only 1D, 2D, and 3D histograms are supported"
assert len(values.shape) == len(
axes
), "Number of dimensions must match number of axes"
for i, axis in enumerate(axes):
if not isinstance(
axis,
(boost_histogram.axis.IntCategory, boost_histogram.axis.StrCategory),
):
continue
# TODO: simplify this code (save multidim slice into a variable?)
if i == 0:
values = values[1:]
if sumw2 is not None:
sumw2 = sumw2[1:]
elif i == 1:
values = values[:, 1:]
if sumw2 is not None:
sumw2 = sumw2[:, 1:]
elif i == 2:
values = values[:, :, 1:]
if sumw2 is not None:
sumw2 = sumw2[:, :, 1:]
lobis marked this conversation as resolved.
Show resolved Hide resolved
lobis marked this conversation as resolved.
Show resolved Hide resolved

view = out.view(flow=True)
if sumw2 is not None:
assert (
sumw2.shape == values.shape
), "weights (fSumw2) and values should have same shape"
view.value = values
view.variance = sumw2
else:
view[...] = values

return out

def to_hist(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):
def to_hist(self, metadata=None, axis_metadata=None):
"""
Args:
metadata (dict of str \u2192 str): Metadata to collect (keys) and
Expand All @@ -203,7 +268,7 @@ def to_hist(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):
Converts the histogram into a ``hist`` object.
"""
return uproot.extras.hist().Hist(
self.to_boost(metadata=boost_metadata, axis_metadata=boost_axis_metadata)
self.to_boost(metadata=metadata, axis_metadata=axis_metadata)
)

# Support direct conversion to histograms, such as bh.Histogram(self) or hist.Hist(self)
Expand All @@ -229,8 +294,7 @@ def axis(self, axis=0): # default axis for one-dimensional is intentional

@property
def weighted(self):
sumw2 = self.member("fSumw2", none_if_missing=True)
return sumw2 is not None and len(sumw2) == self.member("fNcells")
return super().weighted
lobis marked this conversation as resolved.
Show resolved Hide resolved

@property
def kind(self):
Expand Down Expand Up @@ -292,36 +356,5 @@ def to_numpy(self, flow=False, dd=False):
else:
return values, xedges

def to_boost(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):
boost_histogram = uproot.extras.boost_histogram()

values = self.values(flow=True)

sumw2 = self.member("fSumw2", none_if_missing=True)

if sumw2 is not None and len(sumw2) == self.member("fNcells"):
sumw2 = numpy.asarray(sumw2, dtype=sumw2.dtype.newbyteorder("="))
sumw2 = numpy.reshape(sumw2, values.shape)
storage = boost_histogram.storage.Weight()
else:
if issubclass(values.dtype.type, numpy.integer):
storage = boost_histogram.storage.Int64()
else:
storage = boost_histogram.storage.Double()

xaxis = _boost_axis(self.member("fXaxis"), axis_metadata)
out = boost_histogram.Histogram(xaxis, storage=storage)
for k, v in metadata.items():
setattr(out, k, self.member(v))

if isinstance(xaxis, boost_histogram.axis.StrCategory):
values = values[1:]

view = out.view(flow=True)
if sumw2 is not None and len(sumw2) == len(values):
view.value = values
view.variance = sumw2
else:
view[...] = values

return out
def to_boost(self, metadata=None, axis_metadata=None):
return super().to_boost(metadata=metadata, axis_metadata=axis_metadata)
lobis marked this conversation as resolved.
Show resolved Hide resolved
42 changes: 3 additions & 39 deletions src/uproot/behaviors/TH2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import numpy

import uproot
from uproot.behaviors.TH1 import boost_axis_metadata, boost_metadata


class TH2(uproot.behaviors.TH1.Histogram):
Expand All @@ -34,8 +33,7 @@ def axis(self, axis):

@property
def weighted(self):
sumw2 = self.member("fSumw2", none_if_missing=True)
return sumw2 is not None and len(sumw2) == self.member("fNcells")
return super().weighted
lobis marked this conversation as resolved.
Show resolved Hide resolved

@property
def kind(self):
Expand Down Expand Up @@ -101,39 +99,5 @@ def to_numpy(self, flow=False, dd=False):
else:
return values, xedges, yedges

def to_boost(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):
boost_histogram = uproot.extras.boost_histogram()

values = self.values(flow=True)

sumw2 = self.member("fSumw2", none_if_missing=True)

if sumw2 is not None and len(sumw2) == self.member("fNcells"):
sumw2 = numpy.asarray(sumw2, dtype=sumw2.dtype.newbyteorder("="))
sumw2 = numpy.transpose(numpy.reshape(sumw2, values.shape[::-1]))
storage = boost_histogram.storage.Weight()
else:
if issubclass(values.dtype.type, numpy.integer):
storage = boost_histogram.storage.Int64()
else:
storage = boost_histogram.storage.Double()

xaxis = uproot.behaviors.TH1._boost_axis(self.member("fXaxis"), axis_metadata)
yaxis = uproot.behaviors.TH1._boost_axis(self.member("fYaxis"), axis_metadata)
out = boost_histogram.Histogram(xaxis, yaxis, storage=storage)
for k, v in metadata.items():
setattr(out, k, self.member(v))

if isinstance(xaxis, boost_histogram.axis.StrCategory):
values = values[1:, :]
if isinstance(yaxis, boost_histogram.axis.StrCategory):
values = values[:, 1:]

view = out.view(flow=True)
if sumw2 is not None and len(sumw2) == len(values):
view.value = values
view.variance = sumw2
else:
view[...] = values

return out
def to_boost(self, metadata=None, axis_metadata=None):
return super().to_boost(metadata=metadata, axis_metadata=axis_metadata)
lobis marked this conversation as resolved.
Show resolved Hide resolved
45 changes: 3 additions & 42 deletions src/uproot/behaviors/TH3.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import numpy

import uproot
from uproot.behaviors.TH1 import boost_axis_metadata, boost_metadata


class TH3(uproot.behaviors.TH1.Histogram):
Expand Down Expand Up @@ -38,8 +37,7 @@ def axis(self, axis):

@property
def weighted(self):
sumw2 = self.member("fSumw2", none_if_missing=True)
return sumw2 is not None and len(sumw2) == self.member("fNcells")
return super().weighted
lobis marked this conversation as resolved.
Show resolved Hide resolved

@property
def kind(self):
Expand Down Expand Up @@ -110,42 +108,5 @@ def to_numpy(self, flow=False, dd=False):
else:
return values, xedges, yedges, zedges

def to_boost(self, metadata=boost_metadata, axis_metadata=boost_axis_metadata):
boost_histogram = uproot.extras.boost_histogram()

values = self.values(flow=True)

sumw2 = self.member("fSumw2", none_if_missing=True)

if sumw2 is not None and len(sumw2) == self.member("fNcells"):
sumw2 = numpy.asarray(sumw2, dtype=sumw2.dtype.newbyteorder("="))
sumw2 = numpy.transpose(numpy.reshape(sumw2, values.shape[::-1]))
storage = boost_histogram.storage.Weight()
else:
if issubclass(values.dtype.type, numpy.integer):
storage = boost_histogram.storage.Int64()
else:
storage = boost_histogram.storage.Double()

xaxis = uproot.behaviors.TH1._boost_axis(self.member("fXaxis"), axis_metadata)
yaxis = uproot.behaviors.TH1._boost_axis(self.member("fYaxis"), axis_metadata)
zaxis = uproot.behaviors.TH1._boost_axis(self.member("fZaxis"), axis_metadata)
out = boost_histogram.Histogram(xaxis, yaxis, zaxis, storage=storage)
for k, v in metadata.items():
setattr(out, k, self.member(v))

if isinstance(xaxis, boost_histogram.axis.StrCategory):
values = values[1:, :, :]
if isinstance(yaxis, boost_histogram.axis.StrCategory):
values = values[:, 1:, :]
if isinstance(zaxis, boost_histogram.axis.StrCategory):
values = values[:, :, 1:]

view = out.view(flow=True)
if sumw2 is not None and len(sumw2) == len(values):
view.value = values
view.variance = sumw2
else:
view[...] = values

return out
def to_boost(self, metadata=None, axis_metadata=None):
return super().to_boost(metadata=metadata, axis_metadata=axis_metadata)
lobis marked this conversation as resolved.
Show resolved Hide resolved
36 changes: 22 additions & 14 deletions src/uproot/writing/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,11 @@ def to_writable(obj):
try:
# using flow=True if supported
data = obj.values(flow=True)
fSumw2 = obj.variances(flow=True)
fSumw2 = (
obj.variances(flow=True)
if obj.storage_type == boost_histogram.storage.Weight
else None
)

# and flow=True is different from flow=False (obj actually has flow bins)
data_noflow = obj.values(flow=False)
Expand All @@ -285,19 +289,23 @@ def to_writable(obj):
data = numpy.zeros((s[0] + 2, s[1] + 2, s[2] + 2), dtype=d)
data[1:-1, 1:-1, 1:-1] = tmp

tmp = obj.variances()
s = tmp.shape
if tmp is None:
fSumw2 = None
elif ndim == 1:
fSumw2 = numpy.zeros(s[0] + 2, dtype=">f8")
fSumw2[1:-1] = tmp
elif ndim == 2:
fSumw2 = numpy.zeros((s[0] + 2, s[1] + 2), dtype=">f8")
fSumw2[1:-1, 1:-1] = tmp
elif ndim == 3:
fSumw2 = numpy.zeros((s[0] + 2, s[1] + 2, s[2] + 2), dtype=">f8")
fSumw2[1:-1, 1:-1, 1:-1] = tmp
tmp = (
obj.variances()
if obj.storage_type == boost_histogram.storage.Weight
else None
)
fSumw2 = None
if tmp is not None:
s = tmp.shape
if ndim == 1:
fSumw2 = numpy.zeros(s[0] + 2, dtype=">f8")
fSumw2[1:-1] = tmp
elif ndim == 2:
fSumw2 = numpy.zeros((s[0] + 2, s[1] + 2), dtype=">f8")
fSumw2[1:-1, 1:-1] = tmp
elif ndim == 3:
fSumw2 = numpy.zeros((s[0] + 2, s[1] + 2, s[2] + 2), dtype=">f8")
fSumw2[1:-1, 1:-1, 1:-1] = tmp

else:
# continuing to use flow=True, because it is supported
Expand Down
17 changes: 15 additions & 2 deletions tests/test_0167-use-the-common-histogram-interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@

def test_axis():
with uproot.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f:
f["hpx"].axes[0] == f["hpx"].axis(0) == f["hpx"].axis(-1) == f["hpx"].axis("x")
assert (
f["hpx"].axes[0]
== f["hpx"].axis(0)
== f["hpx"].axis(-1)
== f["hpx"].axis("x")
)
axis = f["hpx"].axis()
assert len(axis) == 100
assert axis[0] == (-4.0, -3.92)
Expand Down Expand Up @@ -52,7 +57,7 @@ def test_axis():
)

with uproot.open(skhep_testdata.data_path("uproot-issue33.root")) as f:
f["cutflow"].axes[0] == f["cutflow"].axis(0) == f["cutflow"].axis("x")
assert f["cutflow"].axes[0] == f["cutflow"].axis(0) == f["cutflow"].axis("x")
axis = f["cutflow"].axis()
assert len(axis) == 7
assert axis[0] == "Dijet"
Expand Down Expand Up @@ -143,3 +148,11 @@ def test_boost_2():
# assert f["cutflow"].to_boost().title == "dijethad"
# assert f["cutflow"].to_boost().axes[0].name == "xaxis"
# assert f["cutflow"].to_boost().axes[0].title == ""


@pytest.mark.skip(reason="test file not yet available")
lobis marked this conversation as resolved.
Show resolved Hide resolved
lobis marked this conversation as resolved.
Show resolved Hide resolved
def test_issue_0722():
boost_histogram = pytest.importorskip("boost_histogram")

with uproot.open(skhep_testdata.data_path("uproot-issue-722.root")) as f:
f["hist"].to_boost()
Loading