cms-btv-pog · philippgadow · Jan 16, 2025 · Jan 6, 2025 · Jan 16, 2025 · Jan 16, 2025
diff --git a/scripts/suball.py b/scripts/suball.py
@@ -58,8 +58,8 @@ def get_lumi_from_web(year):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Mastering workflow submission")
     parser = config_parser(parser)
-    paser = scaleout_parser(parser)
-    paser = debug_parser(parser)
+    parser = scaleout_parser(parser)
+    parser = debug_parser(parser)
     parser.add_argument(
         "-sc",
         "--scheme",

diff --git a/src/BTVNanoCommissioning/data/JME/2022_Summer22/Summer22_23Sep2023_RunCD_v1.root b/src/BTVNanoCommissioning/data/JME/2022_Summer22/Summer22_23Sep2023_RunCD_v1.root
diff --git a/src/BTVNanoCommissioning/data/JME/2022_Summer22EE/Summer22EE_23Sep2023_RunEFG_v1.root b/src/BTVNanoCommissioning/data/JME/2022_Summer22EE/Summer22EE_23Sep2023_RunEFG_v1.root
diff --git a/src/BTVNanoCommissioning/helpers/func.py b/src/BTVNanoCommissioning/helpers/func.py
@@ -257,5 +257,6 @@ def uproot_writeable(events, include=["events", "run", "luminosityBlock"]):
                 b_nest[n] = ak.fill_none(
                     ak.packed(ak.without_parameters(events[bname][n])), -99
                 )
-            ev[bname] = ak.zip(b_nest)
+            if bool(b_nest):
+                ev[bname] = ak.zip(b_nest)
     return ev
diff --git a/src/BTVNanoCommissioning/utils/AK4_parameters.py b/src/BTVNanoCommissioning/utils/AK4_parameters.py
@@ -61,7 +61,7 @@
             "Run2022D": "calibeHistoWrite_Data2022D_NANO130X_v1.root",
             "MC": "calibeHistoWrite_MC2022_NANO130X_v2.root",
         },
-        "jetveto": {"Run2022CD jetvetomap": "Winter22Run3_RunCD_v1.histo.root"},
+        "jetveto": {"Run2022CD jetvetomap": "Summer22_23Sep2023_RunCD_v1.root"},
     },
     "Summer22EE": {
         "lumiMask": "Cert_Collisions2022_355100_362760_Golden.json",
@@ -80,7 +80,7 @@
             "ele_Reco_med 2022Re-recoE+PromptFG Electron-ID-SF": "Reco20to75",
             "ele_Reco_high 2022Re-recoE+PromptFG Electron-ID-SF": "RecoAbove75",
         },
-        "jetveto": {"Run2022E jetvetomap_eep": "Winter22Run3_RunE_v1.histo.root"},
+        "jetveto": {"Run2022E jetvetomap_eep": "Summer22EE_23Sep2023_RunEFG_v1.root"},
         # use for BTA production, jet probablity
         "JPCalib": {
             "Run2022E": "calibeHistoWrite_Data2022F_NANO130X_v1.root",

diff --git a/src/BTVNanoCommissioning/utils/correction.py b/src/BTVNanoCommissioning/utils/correction.py
@@ -422,7 +422,11 @@ def JME_shifts(
                     if k in events.metadata["dataset"]
                 ]
                 if len(jecname) > 1:
-                    raise ("Multiple uncertainties match to this era")
+                    raise ValueError("Multiple uncertainties match to this era")
+                elif len(jecname) == 0:
+                    raise ValueError(
+                        "Available JEC variations in this era are not compatible with this file. Did you choose the correct dataset-era combination?"
+                    )
                 else:
                     jecname = jecname[0] + "_DATA"
             else:

diff --git a/src/BTVNanoCommissioning/workflows/BTA_producer.py b/src/BTVNanoCommissioning/workflows/BTA_producer.py
@@ -34,6 +34,7 @@ def __init__(
         chunksize=75000,
         addPFMuons=False,  # BTA custom argument
         addAllTracks=False,  # BTA custom argument
+        base_file_path="root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee",      
     ):
         self._year = year
         self._campaign = campaign
@@ -46,6 +47,7 @@ def __init__(
         self.addPFMuons = addPFMuons
         self.addAllTracks = addAllTracks
         self.isSyst = isSyst
+        self.base_file_path = base_file_path
 
     @property
     def accumulator(self):
@@ -65,7 +67,7 @@ def process(self, events):
         if self.isSyst:
             fname = "systematic/" + fname
         checkf = os.popen(
-            f"gfal-ls root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
+            f"gfal-ls {self.base_file_path}/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
         ).read()
         if len(checkf) > 0:
             print("skip ", checkf)
@@ -1156,7 +1158,7 @@ def process_shift(self, events, shift_name):
                     output_root[bname] = ak.zip(b_nest)
             fout["btagana/ttree"] = output_root
         os.system(
-            f"xrdcp -p --silent {fname} root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
+            f"xrdcp -p --silent {fname} {self.base_file_path}/{dirname}/{self._campaign.replace('Run3','')}/{fname}"
         )
         os.system(f"rm {fname}")
 

diff --git a/src/BTVNanoCommissioning/workflows/BTA_ttbar_producer.py b/src/BTVNanoCommissioning/workflows/BTA_ttbar_producer.py
@@ -24,6 +24,8 @@ def __init__(
         isArray=True,
         noHist=False,
         chunksize=75000,
+        base_file_path="root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee",
+        output_dir="/tmp/{user}/phys_btag/btv_nano_commissioning",
     ):
         self._year = year
         self._campaign = campaign
@@ -37,6 +39,8 @@ def __init__(
         # note: in BTA TTbarSelectionProducer it says "disable trigger selection in MC"
         # for consistency, we will disable both trigger selection in MC and data here
         self.do_trig_sel = False
+        self.base_file_path = base_file_path
+        self.output_dir = output_dir.format(user=os.environ["USER"]) if "{user}" in output_dir else output_dir
 
     @property
     def accumulator(self):
@@ -65,10 +69,10 @@ def process_shift(self, events, shift_name):
         dataset = events.metadata["dataset"]
 
         fname = f"{dataset}_{shift_name}/{events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(events.metadata['entrystop']/self.chunksize)}.root"
+        outfile_path = f"{self.base_file_path}/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"
 
-        checkf = os.popen(
-            f"gfal-ls root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"
-        ).read()
+        gfal_ls_command = f"gfal-ls {outfile_path}"
+        checkf = os.popen(gfal_ls_command).read()
         if len(checkf) > 0:
             return {dataset: len(events)}
 
@@ -380,8 +384,13 @@ def process_shift(self, events, shift_name):
             # store weights according to BTA code
             # note: in original BTA code, all weights are appended in a same variable ttbar_w. Here we separate them according to the logic in NanoAOD
             basic_vars["ttbar_w"] = events.genWeight
+            # LHE pdf variation weights (w_var / w_nominal) for LHA IDs 325300 - 325402
             lhe_pdf_w_arrays = getattr(events, "LHEPdfWeight", None)
+            # [0] is renscfact=0.5d0 facscfact=0.5d0 ; [1] is renscfact=0.5d0 facscfact=1d0 ; [2] is renscfact=0.5d0 facscfact=2d0 ; [3] is renscfact=1d0 facscfact=0.5d0 ;
+            # [4] is renscfact=1d0 facscfact=1d0 ; [5] is renscfact=1d0 facscfact=2d0 ; [6] is renscfact=2d0 facscfact=0.5d0 ; [7] is renscfact=2d0 facscfact=1d0 ;
+            # [8] is renscfact=2d0 facscfact=2d0
             lhe_scale_w_arrays = getattr(events, "LHEScaleWeight", None)
+            # [0] is ISR=2 FSR=1; [1] is ISR=1 FSR=2[2] is ISR=0.5 FSR=1; [3] is ISR=1 FSR=0.5;
             ps_w_arrays = getattr(events, "PSWeight", None)
 
         ###############
@@ -515,7 +524,10 @@ def process_shift(self, events, shift_name):
         passJetSel = (
             ((abs(chsel) == 11) | (abs(chsel) == 13)) & (ak.num(Jet_clean) >= 4)
         ) | ((abs(chsel) > 13) & (ak.num(Jet_clean) >= 1))
-        passMetSel = events.PuppiMET.pt > 0
+        if "Run3" in self._campaign:
+            passMetSel = events.PuppiMET.pt > 0
+        else:
+            passMetSel = events.MET.pt > 0
 
         # and the channel selection, configured in TTbarSelectionFilter
         # https://github.com/cms-btv-pog/RecoBTag-PerformanceMeasurements/blob/10_6_X/python/TTbarSelectionFilter_cfi.py#L4
@@ -527,25 +539,55 @@ def process_shift(self, events, shift_name):
         ###############
         #  Write root #
         ###############
-        fname = f"{dataset}_{shift_name}/{events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(events.metadata['entrystop']/self.chunksize)}.root"
-        os.system(f"mkdir -p {dataset}_{shift_name}")
+        local_outdir = f"{self.output_dir}/BTA_ttbar/{self._campaign.replace('Run3','')}"
+        # fname is composed of {dataset}_{shift_name}/ and filename
+        os.system(f"mkdir -p {local_outdir}/{dataset}_{shift_name}")
+        local_outfile_path = f"{local_outdir}/{fname}"
+        # no events pass selection, just write bookkeeping metadata to file
         if ak.any(passEvent) == False:
-            with uproot.recreate(fname) as fout:
-                fout["sumw"] = {
-                    "total_events": ak.Array([len(events)]),
-                }
-                if not isRealData:
-                    fout["total_pos_events"] = ak.Array([ak.sum(events.genWeight > 0)])
-                    fout["total_neg_events"] = ak.Array(
-                        [-1.0 * ak.sum(events.genWeight < 0)]
-                    )
-                    fout["total_wei_events"] = ak.Array([ak.sum(events.genWeight)])
-                    fout["total_poswei_events"] = ak.Array(
-                        [ak.sum(events.genWeight[events.genWeight > 0.0])]
-                    )
-                    fout["total_negwei_events"] = ak.Array(
-                        [ak.sum(events.genWeight[events.genWeight < 0.0])]
-                    )
+            with uproot.recreate(local_outfile_path) as fout:
+                if isRealData:
+                    fout["sumw"] = {"total_events": ak.Array([len(events)])}
+                else:
+                    # add counters for systematic variations of lhe and parton shower weights
+                    # (this is not pretty but uproot wants fixed structure of branches)
+                    try:
+                        number_lhe_scaleweights = len(lhe_scale_w_arrays) / len(lhe_scale_w_arrays[:,0])
+                    except TypeError:
+                        number_lhe_scaleweights = 0
+                    try:
+                        number_of_psweights = len(ps_w_arrays) / len(ps_w_arrays[:,0])
+                    except TypeError:
+                        number_of_psweights = 0
+                    fout["sumw"] = {
+                        "total_events": ak.Array([len(events)]),
+                        "total_pos_events": ak.Array([ak.sum(events.genWeight > 0)]),
+                        "total_neg_events": ak.Array([-1.0 * ak.sum(events.genWeight < 0)]),
+                        "total_wei_events": ak.Array([ak.sum(events.genWeight)]),
+                        "total_poswei_events": ak.Array(
+                            [ak.sum(events.genWeight[events.genWeight > 0.0])]
+                        ),
+                        "total_negwei_events": ak.Array(
+                            [ak.sum(events.genWeight[events.genWeight < 0.0])]
+                        ),
+                        "total_lhe_scaleweights_0": ak.Array([ak.sum(lhe_scale_w_arrays[:, 0] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 0 else ak.Array([0.]),
+                        "total_lhe_scaleweights_1": ak.Array([ak.sum(lhe_scale_w_arrays[:, 1] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 1 else ak.Array([0.]),
+                        "total_lhe_scaleweights_2": ak.Array([ak.sum(lhe_scale_w_arrays[:, 2] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 2 else ak.Array([0.]),
+                        "total_lhe_scaleweights_3": ak.Array([ak.sum(lhe_scale_w_arrays[:, 3] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 3 else ak.Array([0.]),
+                        "total_lhe_scaleweights_4": ak.Array([ak.sum(lhe_scale_w_arrays[:, 4] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 4 else ak.Array([0.]),
+                        "total_lhe_scaleweights_5": ak.Array([ak.sum(lhe_scale_w_arrays[:, 5] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 5 else ak.Array([0.]),
+                        "total_lhe_scaleweights_6": ak.Array([ak.sum(lhe_scale_w_arrays[:, 6] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 6 else ak.Array([0.]),
+                        "total_lhe_scaleweights_7": ak.Array([ak.sum(lhe_scale_w_arrays[:, 7] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 7 else ak.Array([0.]),
+                        "total_lhe_scaleweights_8": ak.Array([ak.sum(lhe_scale_w_arrays[:, 8] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 8 else ak.Array([0.]),
+                        "total_psweights_0": ak.Array([ak.sum(ps_w_arrays[:, 0] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 0 else ak.Array([0.]),
+                        "total_psweights_1": ak.Array([ak.sum(ps_w_arrays[:, 1] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 1 else ak.Array([0.]),
+                        "total_psweights_2": ak.Array([ak.sum(ps_w_arrays[:, 2] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 2 else ak.Array([0.]),
+                        "total_psweights_3": ak.Array([ak.sum(ps_w_arrays[:, 3] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 3 else ak.Array([0.]),
+                    }
+            # transfer file for bookkeeping
+            transfer_command = f"xrdcp -p --silent {local_outfile_path} {outfile_path}"
+            os.system(transfer_command)
+            os.system(f"rm {local_outfile_path}")
             return {dataset: 0}
 
         output = {
@@ -563,7 +605,7 @@ def process_shift(self, events, shift_name):
                 output["ttbar_ps_w"] = ps_w_arrays[passEvent]
 
         # customize output file name: <dataset>_<nanoAOD file name>_<chunk index>.root
-        with uproot.recreate(fname) as fout:
+        with uproot.recreate(local_outfile_path) as fout:
             output_root = {}
             for bname in output.keys():
                 if not output[bname].fields:
@@ -577,6 +619,16 @@ def process_shift(self, events, shift_name):
             if isRealData:
                 fout["sumw"] = {"total_events": ak.Array([len(events)])}
             else:
+                # add counters for systematic variations of lhe and parton shower weights
+                # (this is not pretty but uproot wants fixed structure of branches)
+                try:
+                    number_lhe_scaleweights = len(lhe_scale_w_arrays) / len(lhe_scale_w_arrays[:,0])
+                except TypeError:
+                    number_lhe_scaleweights = 0
+                try:
+                    number_of_psweights = len(ps_w_arrays) / len(ps_w_arrays[:,0])
+                except TypeError:
+                    number_of_psweights = 0
                 fout["sumw"] = {
                     "total_events": ak.Array([len(events)]),
                     "total_pos_events": ak.Array([ak.sum(events.genWeight > 0)]),
@@ -588,11 +640,23 @@ def process_shift(self, events, shift_name):
                     "total_negwei_events": ak.Array(
                         [ak.sum(events.genWeight[events.genWeight < 0.0])]
                     ),
+                    "total_lhe_scaleweights_0": ak.Array([ak.sum(lhe_scale_w_arrays[:, 0] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 0 else ak.Array([0.]),
+                    "total_lhe_scaleweights_1": ak.Array([ak.sum(lhe_scale_w_arrays[:, 1] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 1 else ak.Array([0.]),
+                    "total_lhe_scaleweights_2": ak.Array([ak.sum(lhe_scale_w_arrays[:, 2] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 2 else ak.Array([0.]),
+                    "total_lhe_scaleweights_3": ak.Array([ak.sum(lhe_scale_w_arrays[:, 3] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 3 else ak.Array([0.]),
+                    "total_lhe_scaleweights_4": ak.Array([ak.sum(lhe_scale_w_arrays[:, 4] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 4 else ak.Array([0.]),
+                    "total_lhe_scaleweights_5": ak.Array([ak.sum(lhe_scale_w_arrays[:, 5] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 5 else ak.Array([0.]),
+                    "total_lhe_scaleweights_6": ak.Array([ak.sum(lhe_scale_w_arrays[:, 6] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 6 else ak.Array([0.]),
+                    "total_lhe_scaleweights_7": ak.Array([ak.sum(lhe_scale_w_arrays[:, 7] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 7 else ak.Array([0.]),
+                    "total_lhe_scaleweights_8": ak.Array([ak.sum(lhe_scale_w_arrays[:, 8] * events.genWeight)]) if lhe_pdf_w_arrays is not None and number_lhe_scaleweights > 8 else ak.Array([0.]),
+                    "total_psweights_0": ak.Array([ak.sum(ps_w_arrays[:, 0] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 0 else ak.Array([0.]),
+                    "total_psweights_1": ak.Array([ak.sum(ps_w_arrays[:, 1] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 1 else ak.Array([0.]),
+                    "total_psweights_2": ak.Array([ak.sum(ps_w_arrays[:, 2] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 2 else ak.Array([0.]),
+                    "total_psweights_3": ak.Array([ak.sum(ps_w_arrays[:, 3] * events.genWeight)]) if ps_w_arrays is not None and number_of_psweights > 3 else ak.Array([0.]),
                 }
-        os.system(
-            f"xrdcp -p --silent {fname} root://eoscms.cern.ch//eos/cms/store/group/phys_btag/milee/BTA_ttbar/{self._campaign.replace('Run3','')}/{fname}"
-        )
-        os.system(f"rm {fname}")
+        transfer_command = f"xrdcp -p --silent {local_outfile_path} {outfile_path}"
+        os.system(transfer_command)
+        os.system(f"rm {local_outfile_path}")
 
         return {dataset: len(events)}
 

diff --git a/test_env.yml b/test_env.yml
@@ -1,7 +1,7 @@
 name: btv_coffea
 channels:
   - conda-forge
-  - defaults
+  - nodefaults
 dependencies:
   - python[version='<=3.10']
   - voms
@@ -19,4 +19,3 @@ dependencies:
   - arrow
   - dask-jobqueue
   - xgboost
-