From afbc4f3b79f3f441ec08745ec1e0a11de7b3c3ad Mon Sep 17 00:00:00 2001 From: Lukas Date: Sun, 28 Aug 2022 22:12:31 +0200 Subject: [PATCH] fix: Keep staterror from affecting multiple samples (#1965) * Fix staterror bug introduced after v0.6.3 where staterror erroneously affected multiple samples. - Amends PR #1639 which introduced regression. - c.f. Issue #1720 * Remove staterror_combined.__staterror_uncrt as it is unused by the codebase. * Add test to verify that staterror does not affect more samples than shapesys equivalently. Co-authored-by: Giordon Stark --- src/pyhf/modifiers/staterror.py | 26 ++++------ tests/test_modifiers.py | 52 +++++++++++++++++++ .../issue1720_greedy_staterror.json | 49 +++++++++++++++++ tests/test_scripts.py | 2 +- 4 files changed, 112 insertions(+), 17 deletions(-) create mode 100644 tests/test_modifiers/issue1720_greedy_staterror.json diff --git a/src/pyhf/modifiers/staterror.py b/src/pyhf/modifiers/staterror.py index 3cb1cd5c4e..1e7573d729 100644 --- a/src/pyhf/modifiers/staterror.py +++ b/src/pyhf/modifiers/staterror.py @@ -100,8 +100,12 @@ def finalize(self): ], axis=0, ) + # here relerrs still has all the bins, while the staterror are usually per-channel + # so we need to pick out the masks for this modifier to extract the + # modifier configuration (sigmas, etc..) + # so loop over samples and extract the first mask + # while making sure any subsequent mask is consistent relerrs = default_backend.sqrt(relerrs) - masks = {} for modifier_data in self.builder_data[modname].values(): mask_this_sample = default_backend.astensor( @@ -113,12 +117,14 @@ def finalize(self): else: assert (mask_this_sample == masks[modname]).all() - for modifier_data in self.builder_data[modname].values(): - modifier_data['data']['mask'] = masks[modname] + # extract sigmas using this modifiers mask sigmas = relerrs[masks[modname]] + # list of bools, consistent with other modifiers (no numpy.bool_) fixed = default_backend.tolist(sigmas == 0) - # ensures non-Nan constraint term, but in a future PR we need to remove constraints for these + # FIXME: sigmas that are zero will be fixed to 1.0 arbitrarily to ensure + # non-Nan constraint term, but in a future PR need to remove constraints + # for these sigmas[fixed] = 1.0 self.required_parsets.setdefault(parname, [required_parset(sigmas, fixed)]) return self.builder_data @@ -145,18 +151,6 @@ def __init__(self, modifiers, pdfconfig, builder_data, batch_size=None): [[builder_data[m][s]['data']['mask']] for s in pdfconfig.samples] for m in keys ] - self.__staterror_uncrt = default_backend.astensor( - [ - [ - [ - builder_data[m][s]['data']['uncrt'], - builder_data[m][s]['data']['nom_data'], - ] - for s in pdfconfig.samples - ] - for m in keys - ] - ) global_concatenated_bin_indices = [ [[j for c in pdfconfig.channels for j in range(pdfconfig.channel_nbins[c])]] ] diff --git a/tests/test_modifiers.py b/tests/test_modifiers.py index 82debbf421..493326e2c8 100644 --- a/tests/test_modifiers.py +++ b/tests/test_modifiers.py @@ -193,3 +193,55 @@ def test_invalid_bin_wise_modifier(datadir, patch_file): with pytest.raises(pyhf.exceptions.InvalidModifier): pyhf.Model(bad_spec) + + +def test_issue1720_staterror_builder_mask(datadir): + with open(datadir.joinpath("issue1720_greedy_staterror.json")) as spec_file: + spec = json.load(spec_file) + + spec["channels"][0]["samples"][1]["modifiers"][0]["type"] = "staterror" + config = pyhf.pdf._ModelConfig(spec) + builder = pyhf.modifiers.staterror.staterror_builder(config) + + channel = spec["channels"][0] + sig_sample = channel["samples"][0] + bkg_sample = channel["samples"][1] + modifier = bkg_sample["modifiers"][0] + + assert channel["name"] == "channel" + assert sig_sample["name"] == "signal" + assert bkg_sample["name"] == "bkg" + assert modifier["type"] == "staterror" + + builder.append("staterror/NP", "channel", "bkg", modifier, bkg_sample) + collected_bkg = builder.collect(modifier, bkg_sample["data"]) + assert collected_bkg == {"mask": [True], "nom_data": [1], "uncrt": [1.5]} + + builder.append("staterror/NP", "channel", "signal", None, sig_sample) + collected_sig = builder.collect(None, sig_sample["data"]) + assert collected_sig == {"mask": [False], "nom_data": [5], "uncrt": [0.0]} + + finalized = builder.finalize() + assert finalized["staterror/NP"]["bkg"]["data"]["mask"].tolist() == [True] + assert finalized["staterror/NP"]["signal"]["data"]["mask"].tolist() == [False] + + +@pytest.mark.parametrize( + "inits", + [[-2.0], [-1.0], [0.0], [1.0], [2.0]], +) +def test_issue1720_greedy_staterror(datadir, inits): + """ + Test that the staterror does not affect more samples than shapesys equivalently. + """ + with open(datadir.joinpath("issue1720_greedy_staterror.json")) as spec_file: + spec = json.load(spec_file) + + model_shapesys = pyhf.Workspace(spec).model() + expected_shapesys = model_shapesys.expected_actualdata(inits) + + spec["channels"][0]["samples"][1]["modifiers"][0]["type"] = "staterror" + model_staterror = pyhf.Workspace(spec).model() + expected_staterror = model_staterror.expected_actualdata(inits) + + assert expected_staterror == expected_shapesys diff --git a/tests/test_modifiers/issue1720_greedy_staterror.json b/tests/test_modifiers/issue1720_greedy_staterror.json new file mode 100644 index 0000000000..70354ac06a --- /dev/null +++ b/tests/test_modifiers/issue1720_greedy_staterror.json @@ -0,0 +1,49 @@ +{ + "channels": [ + { + "name": "channel", + "samples": [ + { + "data": [ + 5 + ], + "modifiers": [], + "name": "signal" + }, + { + "data": [ + 1 + ], + "modifiers": [ + { + "data": [ + 1.5 + ], + "name": "NP", + "type": "shapesys" + } + ], + "name": "bkg" + } + ] + } + ], + "measurements": [ + { + "config": { + "parameters": [], + "poi": "NP" + }, + "name": "" + } + ], + "observations": [ + { + "data": [ + 0 + ], + "name": "channel" + } + ], + "version": "1.0.0" +} diff --git a/tests/test_scripts.py b/tests/test_scripts.py index e67eb5a012..124dabafc6 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -297,7 +297,7 @@ def test_testpoi(tmpdir, script_runner): command = f'pyhf xml2json validation/xmlimport_input/config/example.xml --basedir validation/xmlimport_input/ --output-file {temp.strpath:s}' ret = script_runner.run(*shlex.split(command)) - pois = [1.0, 0.5, 0.0] + pois = [1.0, 0.5, 0.001] results_exp = [] results_obs = [] for test_poi in pois: