Skip to content

Commit

Permalink
fix: Keep staterror from affecting multiple samples (#1965)
Browse files Browse the repository at this point in the history
* Fix staterror bug introduced after v0.6.3 where staterror erroneously affected
  multiple samples.
   - Amends PR #1639 which introduced regression.
   - c.f. Issue #1720
* Remove staterror_combined.__staterror_uncrt as it is unused by the codebase.
* Add test to verify that staterror does not affect more samples than shapesys equivalently.

Co-authored-by: Giordon Stark <kratsg@gmail.com>
  • Loading branch information
lukasheinrich and kratsg authored Aug 28, 2022
1 parent d4e1fc2 commit afbc4f3
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 17 deletions.
26 changes: 10 additions & 16 deletions src/pyhf/modifiers/staterror.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,12 @@ def finalize(self):
],
axis=0,
)
# here relerrs still has all the bins, while the staterror are usually per-channel
# so we need to pick out the masks for this modifier to extract the
# modifier configuration (sigmas, etc..)
# so loop over samples and extract the first mask
# while making sure any subsequent mask is consistent
relerrs = default_backend.sqrt(relerrs)

masks = {}
for modifier_data in self.builder_data[modname].values():
mask_this_sample = default_backend.astensor(
Expand All @@ -113,12 +117,14 @@ def finalize(self):
else:
assert (mask_this_sample == masks[modname]).all()

for modifier_data in self.builder_data[modname].values():
modifier_data['data']['mask'] = masks[modname]
# extract sigmas using this modifiers mask
sigmas = relerrs[masks[modname]]

# list of bools, consistent with other modifiers (no numpy.bool_)
fixed = default_backend.tolist(sigmas == 0)
# ensures non-Nan constraint term, but in a future PR we need to remove constraints for these
# FIXME: sigmas that are zero will be fixed to 1.0 arbitrarily to ensure
# non-Nan constraint term, but in a future PR need to remove constraints
# for these
sigmas[fixed] = 1.0
self.required_parsets.setdefault(parname, [required_parset(sigmas, fixed)])
return self.builder_data
Expand All @@ -145,18 +151,6 @@ def __init__(self, modifiers, pdfconfig, builder_data, batch_size=None):
[[builder_data[m][s]['data']['mask']] for s in pdfconfig.samples]
for m in keys
]
self.__staterror_uncrt = default_backend.astensor(
[
[
[
builder_data[m][s]['data']['uncrt'],
builder_data[m][s]['data']['nom_data'],
]
for s in pdfconfig.samples
]
for m in keys
]
)
global_concatenated_bin_indices = [
[[j for c in pdfconfig.channels for j in range(pdfconfig.channel_nbins[c])]]
]
Expand Down
52 changes: 52 additions & 0 deletions tests/test_modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,55 @@ def test_invalid_bin_wise_modifier(datadir, patch_file):

with pytest.raises(pyhf.exceptions.InvalidModifier):
pyhf.Model(bad_spec)


def test_issue1720_staterror_builder_mask(datadir):
with open(datadir.joinpath("issue1720_greedy_staterror.json")) as spec_file:
spec = json.load(spec_file)

spec["channels"][0]["samples"][1]["modifiers"][0]["type"] = "staterror"
config = pyhf.pdf._ModelConfig(spec)
builder = pyhf.modifiers.staterror.staterror_builder(config)

channel = spec["channels"][0]
sig_sample = channel["samples"][0]
bkg_sample = channel["samples"][1]
modifier = bkg_sample["modifiers"][0]

assert channel["name"] == "channel"
assert sig_sample["name"] == "signal"
assert bkg_sample["name"] == "bkg"
assert modifier["type"] == "staterror"

builder.append("staterror/NP", "channel", "bkg", modifier, bkg_sample)
collected_bkg = builder.collect(modifier, bkg_sample["data"])
assert collected_bkg == {"mask": [True], "nom_data": [1], "uncrt": [1.5]}

builder.append("staterror/NP", "channel", "signal", None, sig_sample)
collected_sig = builder.collect(None, sig_sample["data"])
assert collected_sig == {"mask": [False], "nom_data": [5], "uncrt": [0.0]}

finalized = builder.finalize()
assert finalized["staterror/NP"]["bkg"]["data"]["mask"].tolist() == [True]
assert finalized["staterror/NP"]["signal"]["data"]["mask"].tolist() == [False]


@pytest.mark.parametrize(
"inits",
[[-2.0], [-1.0], [0.0], [1.0], [2.0]],
)
def test_issue1720_greedy_staterror(datadir, inits):
"""
Test that the staterror does not affect more samples than shapesys equivalently.
"""
with open(datadir.joinpath("issue1720_greedy_staterror.json")) as spec_file:
spec = json.load(spec_file)

model_shapesys = pyhf.Workspace(spec).model()
expected_shapesys = model_shapesys.expected_actualdata(inits)

spec["channels"][0]["samples"][1]["modifiers"][0]["type"] = "staterror"
model_staterror = pyhf.Workspace(spec).model()
expected_staterror = model_staterror.expected_actualdata(inits)

assert expected_staterror == expected_shapesys
49 changes: 49 additions & 0 deletions tests/test_modifiers/issue1720_greedy_staterror.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"channels": [
{
"name": "channel",
"samples": [
{
"data": [
5
],
"modifiers": [],
"name": "signal"
},
{
"data": [
1
],
"modifiers": [
{
"data": [
1.5
],
"name": "NP",
"type": "shapesys"
}
],
"name": "bkg"
}
]
}
],
"measurements": [
{
"config": {
"parameters": [],
"poi": "NP"
},
"name": ""
}
],
"observations": [
{
"data": [
0
],
"name": "channel"
}
],
"version": "1.0.0"
}
2 changes: 1 addition & 1 deletion tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def test_testpoi(tmpdir, script_runner):
command = f'pyhf xml2json validation/xmlimport_input/config/example.xml --basedir validation/xmlimport_input/ --output-file {temp.strpath:s}'
ret = script_runner.run(*shlex.split(command))

pois = [1.0, 0.5, 0.0]
pois = [1.0, 0.5, 0.001]
results_exp = []
results_obs = []
for test_poi in pois:
Expand Down

0 comments on commit afbc4f3

Please sign in to comment.