Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add sum of weights for BTA_ttbar workflow, fix typo in suball script and make basepath in BTA producers configurable #111

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8a671bf
Fix small bug in --isArray mode
mondalspandan Jan 16, 2025
246a041
doc: add the documentation of CommFW
Ming-Yan Jan 6, 2025
c8b03bd
Linting
mondalspandan Jan 16, 2025
cb10ee0
Add latest 2022 veto maps (summer22 instead of winter22)
mondalspandan Jan 16, 2025
21c8694
feat: add JEC/jetveto with files in jsonpog-intergration
Ming-Yan Jan 17, 2025
c5884ec
fix typo in suball script
philippgadow Jan 20, 2025
8c87a24
change to nodefaults channels to avoid dependency on anacond for ente…
philippgadow Jan 20, 2025
936fbb3
add sum of weights for variations in BTA_ttbar, make basepath configu…
philippgadow Jan 20, 2025
167a689
add protection if lhe and ps scale weights are not present in BTA_ttb…
philippgadow Jan 20, 2025
d5dc253
Merge branch 'spandan_dev_2501' of github.com:mondalspandan/BTVNanoCo…
philippgadow Jan 20, 2025
af54ec9
also transfer files without events for correctly keeping track of sum…
philippgadow Jan 21, 2025
a2ad076
doc: improve plot description
Ming-Yan Jan 20, 2025
8ee0ee6
doc: add correction description
Ming-Yan Jan 21, 2025
90bd9b4
add default gen unc.
Ming-Yan Jan 21, 2025
35550d4
fix: JEC unc
Ming-Yan Jan 21, 2025
70684b8
fix: uncertainties
Ming-Yan Jan 22, 2025
e28a1e2
add white/black list to suball
philippgadow Jan 23, 2025
d24987f
merge
philippgadow Jan 23, 2025
f3b4234
pre merge master
philippgadow Jan 23, 2025
7dcf7e3
merge with master
philippgadow Jan 23, 2025
85d65b3
update BTA_ttbar producer
philippgadow Jan 28, 2025
af9395d
Merge branch 'master' of github.com:cms-btv-pog/BTVNanoCommissioning …
philippgadow Jan 29, 2025
546bcc2
Merge branch 'master' of github.com:cms-btv-pog/BTVNanoCommissioning …
philippgadow Jan 31, 2025
0213f28
update BTA ttbar producer
philippgadow Feb 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scripts/suball.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def get_lumi_from_web(year):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Mastering workflow submission")
parser = config_parser(parser)
paser = scaleout_parser(parser)
paser = debug_parser(parser)
parser = scaleout_parser(parser)
parser = debug_parser(parser)
parser.add_argument(
"-sc",
"--scheme",
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/BTVNanoCommissioning/helpers/func.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,5 +257,6 @@ def uproot_writeable(events, include=["events", "run", "luminosityBlock"]):
b_nest[n] = ak.fill_none(
ak.packed(ak.without_parameters(events[bname][n])), -99
)
ev[bname] = ak.zip(b_nest)
if bool(b_nest):
ev[bname] = ak.zip(b_nest)
return ev
4 changes: 2 additions & 2 deletions src/BTVNanoCommissioning/utils/AK4_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"Run2022D": "calibeHistoWrite_Data2022D_NANO130X_v1.root",
"MC": "calibeHistoWrite_MC2022_NANO130X_v2.root",
},
"jetveto": {"Run2022CD jetvetomap": "Winter22Run3_RunCD_v1.histo.root"},
"jetveto": {"Run2022CD jetvetomap": "Summer22_23Sep2023_RunCD_v1.root"},
},
"Summer22EE": {
"lumiMask": "Cert_Collisions2022_355100_362760_Golden.json",
Expand All @@ -80,7 +80,7 @@
"ele_Reco_med 2022Re-recoE+PromptFG Electron-ID-SF": "Reco20to75",
"ele_Reco_high 2022Re-recoE+PromptFG Electron-ID-SF": "RecoAbove75",
},
"jetveto": {"Run2022E jetvetomap_eep": "Winter22Run3_RunE_v1.histo.root"},
"jetveto": {"Run2022E jetvetomap_eep": "Summer22EE_23Sep2023_RunEFG_v1.root"},
# use for BTA production, jet probablity
"JPCalib": {
"Run2022E": "calibeHistoWrite_Data2022F_NANO130X_v1.root",
Expand Down
6 changes: 5 additions & 1 deletion src/BTVNanoCommissioning/utils/correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,11 @@ def JME_shifts(
if k in events.metadata["dataset"]
]
if len(jecname) > 1:
raise ("Multiple uncertainties match to this era")
raise ValueError("Multiple uncertainties match to this era")
elif len(jecname) == 0:
raise ValueError(
"Available JEC variations in this era are not compatible with this file. Did you choose the correct dataset-era combination?"
)
else:
jecname = jecname[0] + "_DATA"
else:
Expand Down
6 changes: 4 additions & 2 deletions src/BTVNanoCommissioning/workflows/BTA_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
chunksize=75000,
addPFMuons=False, # BTA custom argument
addAllTracks=False, # BTA custom argument
base_file_path="root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee",
):
self._year = year
self._campaign = campaign
Expand All @@ -46,6 +47,7 @@ def __init__(
self.addPFMuons = addPFMuons
self.addAllTracks = addAllTracks
self.isSyst = isSyst
self.base_file_path = base_file_path

@property
def accumulator(self):
Expand All @@ -65,7 +67,7 @@ def process(self, events):
if self.isSyst:
fname = "systematic/" + fname
checkf = os.popen(
f"gfal-ls root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
f"gfal-ls {self.base_file_path}/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
).read()
if len(checkf) > 0:
print("skip ", checkf)
Expand Down Expand Up @@ -1156,7 +1158,7 @@ def process_shift(self, events, shift_name):
output_root[bname] = ak.zip(b_nest)
fout["btagana/ttree"] = output_root
os.system(
f"xrdcp -p --silent {fname} root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
f"xrdcp -p --silent {fname} {self.base_file_path}/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
)
os.system(f"rm {fname}")

Expand Down
118 changes: 91 additions & 27 deletions src/BTVNanoCommissioning/workflows/BTA_ttbar_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def __init__(
isArray=True,
noHist=False,
chunksize=75000,
base_file_path="root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee",
output_dir="/tmp/{user}/phys_btag/btv_nano_commissioning",
):
self._year = year
self._campaign = campaign
Expand All @@ -37,6 +39,8 @@ def __init__(
# note: in BTA TTbarSelectionProducer it says "disable trigger selection in MC"
# for consistency, we will disable both trigger selection in MC and data here
self.do_trig_sel = False
self.base_file_path = base_file_path
self.output_dir = output_dir.format(user=os.environ["USER"]) if "{user}" in output_dir else output_dir

@property
def accumulator(self):
Expand Down Expand Up @@ -65,10 +69,10 @@ def process_shift(self, events, shift_name):
dataset = events.metadata["dataset"]

fname = f"{dataset}_{shift_name}/{events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(events.metadata['entrystop']/self.chunksize)}.root"
outfile_path = f"{self.base_file_path}/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"

checkf = os.popen(
f"gfal-ls root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"
).read()
gfal_ls_command = f"gfal-ls {outfile_path}"
checkf = os.popen(gfal_ls_command).read()
if len(checkf) > 0:
return {dataset: len(events)}

Expand Down Expand Up @@ -380,8 +384,13 @@ def process_shift(self, events, shift_name):
# store weights according to BTA code
# note: in original BTA code, all weights are appended in a same variable ttbar_w. Here we separate them according to the logic in NanoAOD
basic_vars["ttbar_w"] = events.genWeight
# LHE pdf variation weights (w_var / w_nominal) for LHA IDs 325300 - 325402
lhe_pdf_w_arrays = getattr(events, "LHEPdfWeight", None)
# [0] is renscfact=0.5d0 facscfact=0.5d0 ; [1] is renscfact=0.5d0 facscfact=1d0 ; [2] is renscfact=0.5d0 facscfact=2d0 ; [3] is renscfact=1d0 facscfact=0.5d0 ;
# [4] is renscfact=1d0 facscfact=1d0 ; [5] is renscfact=1d0 facscfact=2d0 ; [6] is renscfact=2d0 facscfact=0.5d0 ; [7] is renscfact=2d0 facscfact=1d0 ;
# [8] is renscfact=2d0 facscfact=2d0
lhe_scale_w_arrays = getattr(events, "LHEScaleWeight", None)
# [0] is ISR=2 FSR=1; [1] is ISR=1 FSR=2[2] is ISR=0.5 FSR=1; [3] is ISR=1 FSR=0.5;
ps_w_arrays = getattr(events, "PSWeight", None)

###############
Expand Down Expand Up @@ -515,7 +524,10 @@ def process_shift(self, events, shift_name):
passJetSel = (
((abs(chsel) == 11) | (abs(chsel) == 13)) & (ak.num(Jet_clean) >= 4)
) | ((abs(chsel) > 13) & (ak.num(Jet_clean) >= 1))
passMetSel = events.PuppiMET.pt > 0
if "Run3" in self._campaign:
passMetSel = events.PuppiMET.pt > 0
else:
passMetSel = events.MET.pt > 0
Comment on lines +549 to +552
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies, did not find out the implementation was using the JME_shifts
Indeed in the functions we already implement the JEC with puppiMET and replace it as MET branch.

If you use the puppiMET branch, this might not propagate the JEC correction correctly

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @philippgadow could you please fix this and I think we are good to merge :) thanks!


# and the channel selection, configured in TTbarSelectionFilter
# https://github.com/cms-btv-pog/RecoBTag-PerformanceMeasurements/blob/10_6_X/python/TTbarSelectionFilter_cfi.py#L4
Expand All @@ -527,25 +539,55 @@ def process_shift(self, events, shift_name):
###############
# Write root #
###############
fname = f"{dataset}_{shift_name}/{events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(events.metadata['entrystop']/self.chunksize)}.root"
os.system(f"mkdir -p {dataset}_{shift_name}")
local_outdir = f"{self.output_dir}/BTA_ttbar/{self._campaign.replace('Run3','')}"
# fname is composed of {dataset}_{shift_name}/ and filename
os.system(f"mkdir -p {local_outdir}/{dataset}_{shift_name}")
local_outfile_path = f"{local_outdir}/{fname}"
# no events pass selection, just write bookkeeping metadata to file
if ak.any(passEvent) == False:
with uproot.recreate(fname) as fout:
fout["sumw"] = {
"total_events": ak.Array([len(events)]),
}
if not isRealData:
fout["total_pos_events"] = ak.Array([ak.sum(events.genWeight > 0)])
fout["total_neg_events"] = ak.Array(
[-1.0 * ak.sum(events.genWeight < 0)]
)
fout["total_wei_events"] = ak.Array([ak.sum(events.genWeight)])
fout["total_poswei_events"] = ak.Array(
[ak.sum(events.genWeight[events.genWeight > 0.0])]
)
fout["total_negwei_events"] = ak.Array(
[ak.sum(events.genWeight[events.genWeight < 0.0])]
)
with uproot.recreate(local_outfile_path) as fout:
if isRealData:
fout["sumw"] = {"total_events": ak.Array([len(events)])}
else:
# add counters for systematic variations of lhe and parton shower weights
# (this is not pretty but uproot wants fixed structure of branches)
try:
number_lhe_scaleweights = len(lhe_scale_w_arrays) / len(lhe_scale_w_arrays[:,0])
except TypeError:
number_lhe_scaleweights = 0
try:
number_of_psweights = len(ps_w_arrays) / len(ps_w_arrays[:,0])
except TypeError:
number_of_psweights = 0
fout["sumw"] = {
"total_events": ak.Array([len(events)]),
"total_pos_events": ak.Array([ak.sum(events.genWeight > 0)]),
"total_neg_events": ak.Array([-1.0 * ak.sum(events.genWeight < 0)]),
"total_wei_events": ak.Array([ak.sum(events.genWeight)]),
"total_poswei_events": ak.Array(
[ak.sum(events.genWeight[events.genWeight > 0.0])]
),
"total_negwei_events": ak.Array(
[ak.sum(events.genWeight[events.genWeight < 0.0])]
),
"total_lhe_scaleweights_0": ak.Array([ak.sum(lhe_scale_w_arrays[:, 0] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 0 else ak.Array([0.]),
"total_lhe_scaleweights_1": ak.Array([ak.sum(lhe_scale_w_arrays[:, 1] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 1 else ak.Array([0.]),
"total_lhe_scaleweights_2": ak.Array([ak.sum(lhe_scale_w_arrays[:, 2] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 2 else ak.Array([0.]),
"total_lhe_scaleweights_3": ak.Array([ak.sum(lhe_scale_w_arrays[:, 3] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 3 else ak.Array([0.]),
"total_lhe_scaleweights_4": ak.Array([ak.sum(lhe_scale_w_arrays[:, 4] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 4 else ak.Array([0.]),
"total_lhe_scaleweights_5": ak.Array([ak.sum(lhe_scale_w_arrays[:, 5] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 5 else ak.Array([0.]),
"total_lhe_scaleweights_6": ak.Array([ak.sum(lhe_scale_w_arrays[:, 6] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 6 else ak.Array([0.]),
"total_lhe_scaleweights_7": ak.Array([ak.sum(lhe_scale_w_arrays[:, 7] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 7 else ak.Array([0.]),
"total_lhe_scaleweights_8": ak.Array([ak.sum(lhe_scale_w_arrays[:, 8] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 8 else ak.Array([0.]),
"total_psweights_0": ak.Array([ak.sum(ps_w_arrays[:, 0] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 0 else ak.Array([0.]),
"total_psweights_1": ak.Array([ak.sum(ps_w_arrays[:, 1] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 1 else ak.Array([0.]),
"total_psweights_2": ak.Array([ak.sum(ps_w_arrays[:, 2] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 2 else ak.Array([0.]),
"total_psweights_3": ak.Array([ak.sum(ps_w_arrays[:, 3] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 3 else ak.Array([0.]),
}
# transfer file for bookkeeping
transfer_command = f"xrdcp -p --silent {local_outfile_path} {outfile_path}"
os.system(transfer_command)
os.system(f"rm {local_outfile_path}")
return {dataset: 0}

output = {
Expand All @@ -563,7 +605,7 @@ def process_shift(self, events, shift_name):
output["ttbar_ps_w"] = ps_w_arrays[passEvent]

# customize output file name: <dataset>_<nanoAOD file name>_<chunk index>.root
with uproot.recreate(fname) as fout:
with uproot.recreate(local_outfile_path) as fout:
output_root = {}
for bname in output.keys():
if not output[bname].fields:
Expand All @@ -577,6 +619,16 @@ def process_shift(self, events, shift_name):
if isRealData:
fout["sumw"] = {"total_events": ak.Array([len(events)])}
else:
# add counters for systematic variations of lhe and parton shower weights
# (this is not pretty but uproot wants fixed structure of branches)
try:
number_lhe_scaleweights = len(lhe_scale_w_arrays) / len(lhe_scale_w_arrays[:,0])
except TypeError:
number_lhe_scaleweights = 0
try:
number_of_psweights = len(ps_w_arrays) / len(ps_w_arrays[:,0])
except TypeError:
number_of_psweights = 0
fout["sumw"] = {
"total_events": ak.Array([len(events)]),
"total_pos_events": ak.Array([ak.sum(events.genWeight > 0)]),
Expand All @@ -588,11 +640,23 @@ def process_shift(self, events, shift_name):
"total_negwei_events": ak.Array(
[ak.sum(events.genWeight[events.genWeight < 0.0])]
),
"total_lhe_scaleweights_0": ak.Array([ak.sum(lhe_scale_w_arrays[:, 0] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 0 else ak.Array([0.]),
"total_lhe_scaleweights_1": ak.Array([ak.sum(lhe_scale_w_arrays[:, 1] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 1 else ak.Array([0.]),
"total_lhe_scaleweights_2": ak.Array([ak.sum(lhe_scale_w_arrays[:, 2] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 2 else ak.Array([0.]),
"total_lhe_scaleweights_3": ak.Array([ak.sum(lhe_scale_w_arrays[:, 3] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 3 else ak.Array([0.]),
"total_lhe_scaleweights_4": ak.Array([ak.sum(lhe_scale_w_arrays[:, 4] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 4 else ak.Array([0.]),
"total_lhe_scaleweights_5": ak.Array([ak.sum(lhe_scale_w_arrays[:, 5] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 5 else ak.Array([0.]),
"total_lhe_scaleweights_6": ak.Array([ak.sum(lhe_scale_w_arrays[:, 6] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 6 else ak.Array([0.]),
"total_lhe_scaleweights_7": ak.Array([ak.sum(lhe_scale_w_arrays[:, 7] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 7 else ak.Array([0.]),
"total_lhe_scaleweights_8": ak.Array([ak.sum(lhe_scale_w_arrays[:, 8] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 8 else ak.Array([0.]),
"total_psweights_0": ak.Array([ak.sum(ps_w_arrays[:, 0] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 0 else ak.Array([0.]),
"total_psweights_1": ak.Array([ak.sum(ps_w_arrays[:, 1] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 1 else ak.Array([0.]),
"total_psweights_2": ak.Array([ak.sum(ps_w_arrays[:, 2] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 2 else ak.Array([0.]),
"total_psweights_3": ak.Array([ak.sum(ps_w_arrays[:, 3] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 3 else ak.Array([0.]),
}
os.system(
f"xrdcp -p --silent {fname} root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"
)
os.system(f"rm {fname}")
transfer_command = f"xrdcp -p --silent {local_outfile_path} {outfile_path}"
os.system(transfer_command)
os.system(f"rm {local_outfile_path}")

return {dataset: len(events)}

Expand Down
3 changes: 1 addition & 2 deletions test_env.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: btv_coffea
channels:
- conda-forge
- defaults
- nodefaults
dependencies:
- python[version='<=3.10']
- voms
Expand All @@ -19,4 +19,3 @@ dependencies:
- arrow
- dask-jobqueue
- xgboost

Loading