Skip to content

Commit

Permalink
Merge pull request #940 from kcormi/fewer_ordered_dicts_for_datacard
Browse files Browse the repository at this point in the history
Remove OrderedDicts etc except where empirically necessary
  • Loading branch information
kcormi committed Apr 26, 2024
2 parents 80e211b + 33fee63 commit f23b469
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 52 deletions.
33 changes: 16 additions & 17 deletions python/Datacard.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import print_function

import six
from collections import OrderedDict


class Datacard:
Expand All @@ -17,57 +16,57 @@ def __init__(self):
## list of [bins in datacard]
self.bins = []
## dict of {bin : number of observed events}
self.obs = OrderedDict()
self.obs = {}
## list of [processes]
self.processes = []
## list of [signal processes]
self.signals = []
## dict of {processes : boolean to indicate whether process is signal or not}
self.isSignal = OrderedDict()
self.isSignal = {}
## list of [(bin, process, boolean to indicate whether process is signal or not)]
self.keyline = []
## dict of {bin : {process : yield}}
self.exp = OrderedDict()
self.exp = {}
## list of [(name of uncert, boolean to indicate whether to float this nuisance or not, type, list of what additional arguments (e.g. for gmN), keyline element)]
self.systs = []
## list of [{bin : {process : [input file, path to shape, path to shape for uncertainty]}}]
self.shapeMap = OrderedDict()
self.shapeMap = {}
## boolean that indicates whether the datacard contains shapes or not
self.hasShapes = False
## dirct of {name of uncert, boolean to indicate whether it is a flat parametric uncertainty or not}
self.flatParamNuisances = OrderedDict()
self.flatParamNuisances = {}
## dict of rateParam, key is f"{bin}AND{process}", per bin/process they are a list
self.rateParams = OrderedDict()
self.rateParams = {}
## dict of extArgs
self.extArgs = OrderedDict()
self.extArgs = {}
## maintain the names of rate modifiers
self.rateParamsOrder = set()
## dirct of {name of uncert, boolean to indicate whether this nuisance is floating or not}
self.frozenNuisances = set()

# Allows for nuisance renaming of "shape" systematics
self.systematicsShapeMap = OrderedDict()
self.systematicsShapeMap = {}

# Allows for nuisance renaming of "param" systematics
self.systematicsParamMap = OrderedDict()
self.systematicsParamMap = {}

# Allow to pick out entry in self.systs.
self.systIDMap = OrderedDict()
self.systIDMap = {}

# Keep edits
self.nuisanceEditLines = []

# map of which bins should have automated Barlow-Beeston parameters
self.binParFlags = OrderedDict()
self.binParFlags = {}

self.groups = OrderedDict()
self.groups = {}
self.discretes = []

# list of parameters called _norm in user input workspace
self.pdfnorms = OrderedDict()
self.pdfnorms = {}

# collection of nuisances to auto-produce flat priors for
self.toCreateFlatParam = OrderedDict()
self.toCreateFlatParam = {}

def print_structure(self):
"""
Expand Down Expand Up @@ -171,7 +170,7 @@ def print_structure(self):
)

# map of which bins should have automated Barlow-Beeston parameters
self.binParFlags = OrderedDict()
self.binParFlags = {}

def list_of_bins(self):
"""
Expand Down Expand Up @@ -312,7 +311,7 @@ def renameNuisanceParameter(self, oldname, newname, process_list=[], channel_lis
for specific channels/processes, then you should specify a
process (list or leave empty for all) and channel (list or leave empty for all)
"""
existingclashes = OrderedDict()
existingclashes = {}
for lsyst, nofloat, pdf0, args0, errline0 in self.systs[:]:
if lsyst == newname: # found the nuisance exists
existingclashes[lsyst] = (nofloat, pdf0, args0, errline0)
Expand Down
16 changes: 7 additions & 9 deletions python/DatacardParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from HiggsAnalysis.CombinedLimit.Datacard import Datacard
from HiggsAnalysis.CombinedLimit.NuisanceModifier import doEditNuisance

from collections import OrderedDict

globalNuisances = re.compile("(lumi|pdf_(qqbar|gg|qg)|QCDscale_(ggH|qqH|VH|ggH1in|ggH2in|VV)|UEPS|FakeRate|CMS_(eff|fake|trigger|scale|res)_([gemtjb]|met))")


Expand Down Expand Up @@ -352,7 +350,7 @@ def parseCard(file, options):

# resetting these here to defaults, parseCard will fill them up
ret.discretes = []
ret.groups = OrderedDict()
ret.groups = {}

#
nbins = -1
Expand Down Expand Up @@ -388,7 +386,7 @@ def parseCard(file, options):
if len(f) < 4:
raise RuntimeError("Malformed shapes line")
if f[2] not in ret.shapeMap:
ret.shapeMap[f[2]] = OrderedDict()
ret.shapeMap[f[2]] = {}
if f[1] in ret.shapeMap[f[2]]:
raise RuntimeError("Duplicate definition for process '%s', channel '%s'" % (f[1], f[2]))
ret.shapeMap[f[2]][f[1]] = f[3:]
Expand All @@ -406,7 +404,7 @@ def parseCard(file, options):
if len(binline) != len(ret.obs):
raise RuntimeError("Found %d bins (%s) but %d bins have been declared" % (len(ret.bins), ret.bins, nbins))
ret.bins = binline
ret.obs = OrderedDict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
binline = []
if f[0] == "bin":
binline = []
Expand Down Expand Up @@ -447,10 +445,10 @@ def parseCard(file, options):
raise RuntimeError("Found %d processes (%s), declared jmax = %d" % (len(ret.processes), ret.processes, nprocesses))
if nbins != len(ret.bins):
raise RuntimeError("Found %d bins (%s), declared imax = %d" % (len(ret.bins), ret.bins, nbins))
ret.exp = OrderedDict([(b, OrderedDict()) for b in ret.bins])
ret.isSignal = OrderedDict([(p, None) for p in ret.processes])
ret.exp = dict([(b, {}) for b in ret.bins])
ret.isSignal = dict([(p, None) for p in ret.processes])
if ret.obs != [] and type(ret.obs) == list: # still as list, must change into map with bin names
ret.obs = OrderedDict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
for b, p, s in ret.keyline:
if ret.isSignal[p] == None:
ret.isSignal[p] = s
Expand Down Expand Up @@ -633,7 +631,7 @@ def parseCard(file, options):
raise RuntimeError(
"Malformed systematics line %s of length %d: while bins and process lines have length %d" % (lsyst, len(numbers), len(ret.keyline))
)
errline = OrderedDict([(b, OrderedDict()) for b in ret.bins])
errline = dict([(b, {}) for b in ret.bins])
nonNullEntries = 0
for (b, p, s), r in zip(ret.keyline, numbers):
if "/" in r: # "number/number"
Expand Down
10 changes: 4 additions & 6 deletions python/ModelTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import six
from six.moves import range

from collections import OrderedDict

import ROOT

ROOFIT_EXPR = "expr"
Expand Down Expand Up @@ -56,7 +54,7 @@ def __init__(self, options):
self.out = ROOT.RooWorkspace("w", "w")
# self.out.safe_import = getattr(self.out,"import") # workaround: import is a python keyword
self.out.safe_import = SafeWorkspaceImporter(self.out)
self.objstore = OrderedDict()
self.objstore = {}
self.out.dont_delete = []
if options.verbose == 0:
ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR)
Expand Down Expand Up @@ -223,7 +221,7 @@ def runPostProcesses(self):
self.out.arg(n).setConstant(True)

def doExtArgs(self):
open_files = OrderedDict()
open_files = {}
for rp in self.DC.extArgs.keys():
if self.out.arg(rp):
continue
Expand Down Expand Up @@ -279,7 +277,7 @@ def doExtArgs(self):
def doRateParams(self):
# First support external functions/parameters
# keep a map of open files/workspaces
open_files = OrderedDict()
open_files = {}

for rp in self.DC.rateParams.keys():
for rk in range(len(self.DC.rateParams[rp])):
Expand Down Expand Up @@ -848,7 +846,7 @@ def doAutoFlatNuisancePriors(self):

def doNuisancesGroups(self):
# Prepare a dictionary of which group a certain nuisance belongs to
groupsFor = OrderedDict()
groupsFor = {}
# existingNuisanceNames = tuple(set([syst[0] for syst in self.DC.systs]+self.DC.flatParamNuisances.keys()+self.DC.rateParams.keys()+self.DC.extArgs.keys()+self.DC.discretes))
existingNuisanceNames = self.DC.getAllVariables()
for groupName, nuisanceNames in six.iteritems(self.DC.groups):
Expand Down
6 changes: 2 additions & 4 deletions python/NuisanceModifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import sys
from math import exp, hypot, log

from collections import OrderedDict


def appendMap(tmap, k, thing):
if k in list(tmap.keys()):
Expand Down Expand Up @@ -79,7 +77,7 @@ def doAddNuisance(datacard, args):
cchannel = re.compile(channel.replace("+", r"\+"))
opts = args[5:]
found = False
errline = OrderedDict([(b, OrderedDict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
errline = dict([(b, dict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
for lsyst, nofloat, pdf0, args0, errline0 in datacard.systs:
if lsyst == name:
if pdf != pdf0:
Expand Down Expand Up @@ -228,7 +226,7 @@ def doRenameNuisance(datacard, args):
if pdf0 == "param":
continue
# for dcs in datacard.systs: print " --> ", dcs
errline2 = OrderedDict([(b, OrderedDict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
errline2 = dict([(b, dict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
found = False
if newname in list(datacard.systIDMap.keys()):
for id2 in datacard.systIDMap[newname]:
Expand Down
30 changes: 14 additions & 16 deletions python/ShapeTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
import six
from six.moves import range

from collections import OrderedDict

import ROOT
from HiggsAnalysis.CombinedLimit.ModelTools import ModelBuilder

Expand All @@ -33,7 +31,7 @@ class FileCache:
def __init__(self, basedir, maxsize=250):
self._basedir = basedir
self._maxsize = maxsize
self._files = OrderedDict()
self._files = {}
self._hits = defaultdict(int)
self._total = 0

Expand Down Expand Up @@ -73,10 +71,10 @@ def __init__(self, datacard, options):
if options.libs:
for lib in options.libs:
ROOT.gSystem.Load(lib)
self.wspnames = OrderedDict()
self.wspnames = {}
self.wsp = None
self.extraImports = []
self.norm_rename_map = OrderedDict()
self.norm_rename_map = {}
self._fileCache = FileCache(self.options.baseDir)

## ------------------------------------------
Expand Down Expand Up @@ -470,12 +468,12 @@ def RenameDupObjs(self, dupObjs, dupNames, newObj, postFix):
## --------------------------------------
def prepareAllShapes(self):
shapeTypes = []
shapeBins = OrderedDict()
shapeObs = OrderedDict()
self.pdfModes = OrderedDict()
shapeBins = {}
shapeObs = {}
self.pdfModes = {}
for ib, b in enumerate(self.DC.bins):
databins = OrderedDict()
bgbins = OrderedDict()
databins = {}
bgbins = {}
channelBinParFlag = b in list(self.DC.binParFlags.keys())
for p in [self.options.dataname] + list(self.DC.exp[b].keys()):
if len(self.DC.obs) == 0 and p == self.options.dataname:
Expand Down Expand Up @@ -563,7 +561,7 @@ def prepareAllShapes(self):
if i not in bgbins:
stderr.write("Channel %s has bin %d filled in data but empty in all backgrounds\n" % (b, i))
if shapeTypes.count("TH1"):
self.TH1Observables = OrderedDict()
self.TH1Observables = {}
self.out.binVars = ROOT.RooArgSet()
self.out.maxbins = max([shapeBins[k] for k in shapeBins.keys()])
if self.options.optimizeTemplateBins:
Expand Down Expand Up @@ -664,7 +662,7 @@ def doCombinedDataset(self):
## -------------------------------------
## -------- Low level helpers ----------
## -------------------------------------
def getShape(self, channel, process, syst="", _cache=OrderedDict(), allowNoSyst=False):
def getShape(self, channel, process, syst="", _cache={}, allowNoSyst=False):
if (channel, process, syst) in _cache:
if self.options.verbose > 2:
print(
Expand Down Expand Up @@ -853,10 +851,10 @@ def getShape(self, channel, process, syst="", _cache=OrderedDict(), allowNoSyst=
_cache[(channel, process, syst)] = ret
return ret

def getData(self, channel, process, syst="", _cache=OrderedDict()):
def getData(self, channel, process, syst="", _cache={}):
return self.shape2Data(self.getShape(channel, process, syst), channel, process)

def getPdf(self, channel, process, _cache=OrderedDict()):
def getPdf(self, channel, process, _cache={}):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
if (channel, process) in _cache:
return _cache[(channel, process)]
Expand Down Expand Up @@ -1214,7 +1212,7 @@ def rebinH1(self, shape):
rebinh1._original_bins = shapeNbins
return rebinh1

def shape2Data(self, shape, channel, process, _cache=OrderedDict()):
def shape2Data(self, shape, channel, process, _cache={}):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
if shape == None:
name = "shape%s_%s_%s" % (postFix, channel, process)
Expand Down Expand Up @@ -1250,7 +1248,7 @@ def shape2Data(self, shape, channel, process, _cache=OrderedDict()):
raise RuntimeError("shape2Data not implemented for %s" % shape.ClassName())
return _cache[shape.GetName()]

def shape2Pdf(self, shape, channel, process, _cache=OrderedDict()):
def shape2Pdf(self, shape, channel, process, _cache={}):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
channelBinParFlag = channel in list(self.DC.binParFlags.keys())
if shape == None:
Expand Down

0 comments on commit f23b469

Please sign in to comment.