cms-sw · cmsbuild · Aug 20, 2024 · May 27, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/Configuration/PyReleaseValidation/python/MatrixReader.py b/Configuration/PyReleaseValidation/python/MatrixReader.py
@@ -55,7 +55,8 @@ def reset(self, what='all'):
                              'relval_identity':'id-',
                              'relval_machine': 'mach-',
                              'relval_premix': 'premix-',
-                             'relval_nano':'nano-'
+                             'relval_nano':'nano-',
+                             'relval_data_highstats':'data-'
                              }
 
         self.files = ['relval_standard' ,
@@ -73,7 +74,8 @@ def reset(self, what='all'):
                       'relval_identity',
                       'relval_machine',
                       'relval_premix',
-                      'relval_nano'
+                      'relval_nano',
+                      'relval_data_highstats'
                       ]
         self.filesDefault = {'relval_standard':True ,
                              'relval_highstats':True ,
@@ -90,7 +92,8 @@ def reset(self, what='all'):
                              'relval_identity':False,
                              'relval_machine':True,
                              'relval_premix':True,
-                             'relval_nano':True
+                             'relval_nano':True,
+                             'relval_data_highstats':False
                              }
 
         self.relvalModule = None

diff --git a/Configuration/PyReleaseValidation/python/MatrixUtil.py b/Configuration/PyReleaseValidation/python/MatrixUtil.py
@@ -103,7 +103,7 @@ def selectedLS(list_runs=[],maxNum=-1,l_json=data_json2015):
 
 InputInfoNDefault=2000000    
 class InputInfo(object):
-    def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None) :
+    def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None,skimEvents=False) :
         self.run = run
         self.ls = ls
         self.files = files
@@ -115,37 +115,47 @@ def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,even
         self.ib_blacklist = ib_blacklist
         self.ib_block = ib_block
         self.dataSetParent = dataSetParent
-
+        self.skimEvents = skimEvents
+
     def das(self, das_options, dataset):
-        if len(self.run) != 0 or self.ls:
+        if not self.skimEvents and (len(self.run) != 0 or self.ls):
             queries = self.queries(dataset)
             if len(self.run) != 0:
-              command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
+                command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
             else:
               lumis = self.lumis()
               commands = []
               while queries:
-                commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
+                    commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
               command = ";".join(commands)
             command = "({0})".format(command)
-        else:
+        elif not self.skimEvents:
             command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])
-
+        elif self.skimEvents:
+            from os import getenv
+            if getenv("CMSSW_USE_IBEOS","false")=="true": 
+                # to be assured that whatever happens the files are only those at CERN
+                command = "das-up-to-nevents.py -d %s -e %d -s T2_CH_CERN"%(dataset,self.events) 
+            else:
+                command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events)
         # Run filter on DAS output 
         if self.ib_blacklist:
             command += " | grep -E -v "
             command += " ".join(["-e '{0}'".format(pattern) for pattern in self.ib_blacklist])
-        from os import getenv
-        if getenv("CMSSW_USE_IBEOS","false")=="true": return command + " | ibeos-lfn-sort"
-        return command + " | sort -u"
+        if not self.skimEvents: ## keep run-lumi sorting
+            from os import getenv
+            if getenv("CMSSW_USE_IBEOS","false")=="true": return command + " | ibeos-lfn-sort"
+            return command + " | sort -u"
+        else:
+            return command
 
     def lumiRanges(self):
         if len(self.run) != 0:
             return "echo '{\n"+",".join(('"%d":[[1,268435455]]\n'%(x,) for x in self.run))+"}'"
         if self.ls :
             return "echo '{\n"+",".join(('"%d" : %s\n'%( int(x),self.ls[x]) for x in self.ls.keys()))+"}'"
         return None
-
+    
     def lumis(self):
       query_lumis = []
       if self.ls:

diff --git a/Configuration/PyReleaseValidation/python/relval_data_highstats.py b/Configuration/PyReleaseValidation/python/relval_data_highstats.py
@@ -0,0 +1,30 @@
+# import the definition of the steps and input files:
+from  Configuration.PyReleaseValidation.relval_steps import *
+
+# here only define the workflows as a combination of the steps defined above:
+workflows = Matrix()
+
+## Here we define higher (>50k events) stats data workflows
+## not to be run as default. 150k, 250k, 500k or 1M events each
+
+## 2024
+base_wf_number_2024 = 2024.0
+offset_era = 0.1 # less than 10 eras
+offset_pd = 0.001 # less than 100 pds
+offset_events = 0.0001 # less than 10 event setups (50k,150k,250k,500k)
+
+for e_n,era in enumerate(eras_2024):
+    for p_n,pd in enumerate(pds_2024):
+        for e_key,evs in event_steps_dict.items():
+            if "50k" == e_key: # already defined in relval_standard
+                continue   
+            wf_number = base_wf_number_2024
+            wf_number = wf_number + offset_era * e_n
+            wf_number = wf_number + offset_pd * p_n
+            wf_number = wf_number + offset_events * evs 
+            wf_number = round(wf_number,6)
+            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]
+
+
+
diff --git a/Configuration/PyReleaseValidation/python/relval_highstats.py b/Configuration/PyReleaseValidation/python/relval_highstats.py
@@ -89,3 +89,26 @@
 
 
 
+## 2024 Data Higher Stats Workflows
+## with 150k, 250k, 500k or 1M events each
+
+base_wf_number_2024 = 2024.0
+offset_era = 0.1 # less than 10 eras
+offset_pd = 0.001 # less than 100 pds
+offset_events = 0.0001 # less than 10 event setups (50k,150k,250k,500k)
+
+for e_n,era in enumerate(eras_2024):
+    for p_n,pd in enumerate(pds_2024):
+        for e_key,evs in event_steps_dict.items():
+            if "50k" in e_key: # already defined in relval_standard
+                continue   
+            wf_number = base_wf_number_2024
+            wf_number = wf_number + offset_era * e_n
+            wf_number = wf_number + offset_pd * p_n
+            wf_number = wf_number + offset_events * evs 
+            wf_number = round(wf_number,6)
+            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]
+
+
+
diff --git a/Configuration/PyReleaseValidation/python/relval_standard.py b/Configuration/PyReleaseValidation/python/relval_standard.py
@@ -415,6 +415,7 @@
 workflows[136.903] = ['', ['RunDoubleMuon2017B', 'TauEmbedding_Selection_2017', 'TauEmbedding_Cleaning_2017', 'TauEmbedding_GenPreHLT_2017', 'TauEmbedding_GenHLT_2017', 'TauEmbedding_GenPostHLT_2017', 'TauEmbedding_Merging_2017']]
 workflows[136.904] = ['', ['RunDoubleMuon2018C', 'TauEmbedding_Selection_2018', 'TauEmbedding_Cleaning_2018', 'TauEmbedding_GenPreHLT_2018', 'TauEmbedding_GenHLT_2018', 'TauEmbedding_GenPostHLT_2018', 'TauEmbedding_Merging_2018']]
 
+
 ### run 2021 collisions ###
 workflows[139.001] = ['RunMinimumBias2021',['RunMinimumBias2021','HLTDR3_2022','RECODR3_reHLT_MinBiasOffline','HARVESTD2021MB_reHLT']]
 workflows[139.002] = ['',['RunZeroBias2021','HLTDR3_2022','RECODR3_reHLT_ZBOffline','HARVESTD2021ZB_reHLT']]
@@ -558,6 +559,21 @@
 workflows[142.901] = ['',['RunUPC2023','RECODR3_2024_UPC','HARVESTDPROMPTR3']]
 workflows[142.902] = ['',['RunUPC2023','RECODR3_2024_HIN','HARVESTDPROMPTR3']]
 
+## 2024 Data Workflows
+base_wf_number_2024 = 2024.0
+offset_era = 0.1 # less than 10 eras
+offset_pd = 0.001 # less than 100 pds
+
+for e_n,era in enumerate(eras_2024):
+    for p_n,pd in enumerate(pds_2024):
+        wf_number = base_wf_number_2024
+        wf_number = wf_number + offset_era * e_n
+        wf_number = wf_number + offset_pd * p_n
+        wf_number = wf_number + 0.0001 * 0.05 
+        wf_number = round(wf_number,6)
+        step_name = "Run" + pd + era.split("Run")[1] + "_50k"
+        workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]
+
 ### fastsim ###
 workflows[5.1] = ['TTbarFS', ['TTbarFS','HARVESTFS']]
 workflows[5.2] = ['SingleMuPt10FS', ['SingleMuPt10FS','HARVESTFS']]

diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py
@@ -44,6 +44,10 @@
 
 steps = Steps()
 
+#### Event to runs
+event_steps = [0.05,0.15,0.25,0.5,1] #in millions
+event_steps_k = ["50k","150k","250k","500k","1M"]
+event_steps_dict = dict(zip(event_steps_k,event_steps))
 #### Production test section ####
 steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults])
 steps['ProdTTbar']=merge([{'cfg':'TTbar_8TeV_TuneCUETP8M1_cfi','--relval':'9000,100'},step1Defaults])
@@ -478,7 +482,13 @@
 # UL AOD
 steps['RunJetHT2018D_reminiaodUL']={'INPUT':InputInfo(dataSet='/JetHT/Run2018D-12Nov2019_UL2018-v4/AOD',label='2018DrmaodUL',events=100000,location='STD', ls=Run2018D)}
 
-#### run3 ####
+####################################
+#### Run3 ##########################
+####################################
+
+###2022
+
+## Collisions at 900 GeV and ramp-up to 13.6 TeV
 Run2022A={353015: [[1, 100]]}
 steps['RunMinimumBias2022A']={'INPUT':InputInfo(dataSet='/MinimumBias/Run2022A-v1/RAW',label='2022A',events=100000,location='STD', ls=Run2022A)}
 steps['RunSingleMuon2022A']={'INPUT':InputInfo(dataSet='/SingleMuon/Run2022A-v1/RAW',label='2022A',events=100000,location='STD', ls=Run2022A)}
@@ -497,7 +507,7 @@
 steps['RunDoubleMuon2022A']={'INPUT':InputInfo(dataSet='/DoubleMuon/Run2022A-v1/RAW',label='2022A',events=100000,location='STD', ls=Run2022A)}
 steps['RunMuonEG2022A']={'INPUT':InputInfo(dataSet='/MuonEG/Run2022A-v1/RAW',label='2022A',events=100000,location='STD', ls=Run2022A)}
 
-Run2022B={355769: [[1, 106]]}
+Run2022B={355769: [[1, 106]]} ## this could be rised to "355769": [[1, 541]]
 steps['RunMinimumBias2022B']={'INPUT':InputInfo(dataSet='/MinimumBias/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
 steps['RunSingleMuon2022B']={'INPUT':InputInfo(dataSet='/SingleMuon/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
 steps['RunZeroBias2022B']={'INPUT':InputInfo(dataSet='/ZeroBias/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
@@ -514,7 +524,6 @@
 steps['RunTau2022B']={'INPUT':InputInfo(dataSet='/Tau/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
 steps['RunDoubleMuon2022B']={'INPUT':InputInfo(dataSet='/DoubleMuon/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
 steps['RunMuonEG2022B']={'INPUT':InputInfo(dataSet='/MuonEG/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
-#steps['RunParkingBPH2022B']={'INPUT':InputInfo(dataSet='/ParkingBPH/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
 
 Run2022C={356381: [[1, 1193]]}
 Run2022C_LS40={356381: [[1, 40]]}
@@ -576,7 +585,7 @@
 # reMINIAOD for 2022
 steps['RunJetMET2022D_reMINI']={'INPUT':InputInfo(dataSet='/JetMET/Run2022D-16Jun2023-v1/AOD',label='rmaod',events=100000,location='STD', ls=Run2022D_LS25)}
 
-#### run3 ####
+###2023
 Run2023B={366727: [[1, 244]]}
 steps['RunMuon2023B']={'INPUT':InputInfo(dataSet='/Muon0/Run2023B-v1/RAW',label='2023B',events=100000,location='STD', ls=Run2023B)}
 steps['RunZeroBias2023B']={'INPUT':InputInfo(dataSet='/ZeroBias/Run2023B-v1/RAW',label='2023B',events=100000,location='STD', ls=Run2023B)}
@@ -625,6 +634,23 @@
 RunHI2023={375491: [[100, 100]]}
 steps['RunHIPhysicsRawPrime2023A']={'INPUT':InputInfo(dataSet='/HIPhysicsRawPrime0/HIRun2023A-v1/RAW',label='HI2023A',events=100000,location='STD', ls=RunHI2023)}
 
+### Golden Data Wfs
+# reading good runs directly from the latest golden json 
+# in https://cms-service-dqmdc.web.cern.ch/CAF/certification/
+# or (if available) 
+
+###2024 
+# number of events limits the files used as input
+
+pds_2024  = ['BTagMu', 'DisplacedJet', 'EGamma0', 'HcalNZS', 'JetMET0', 'Muon0', 'MuonEG', 'NoBPTX', 'ParkingDoubleMuonLowMass0', 'ParkingHH', 'ParkingLLP', 'ParkingSingleMuon0', 'ParkingVBF0', 'Tau', 'ZeroBias']
+eras_2024 = ['Run2024B', 'Run2024C', 'Run2024D', 'Run2024E', 'Run2024F']
+for era in eras_2024:
+    for pd in pds_2024:
+        dataset = "/" + pd + "/" + era + "-v1/RAW"
+        for e_key,evs in event_steps_dict.items():
+            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')}
+
 # Highstat HLTPhysics
 Run2015DHS=selectedLS([258712,258713,258714,258741,258742,258745,258749,258750,259626,259637,259683,259685,259686,259721,259809,259810,259818,259820,259821,259822,259862,259890,259891])
 steps['RunHLTPhy2015DHS']={'INPUT':InputInfo(dataSet='/HLTPhysics/Run2015D-v1/RAW',label='2015DHS',events=100000,location='STD', ls=Run2015DHS)}
@@ -2166,6 +2192,8 @@ def lhegensim2018ml(fragment,howMuch):
 
 steps['HLTDR3_2023B']=merge( [ {'-s':'L1REPACK:Full,HLT:@%s'%hltKey2024,},{'--conditions':'auto:run3_hlt_relval'},{'--era' : 'Run3'},steps['HLTD'] ] )
 
+steps['HLTDR3_2024']=merge( [ {'-s':'L1REPACK:Full,HLT:@%s'%hltKey2024,},{'--conditions':'auto:run3_hlt_relval'},{'--era' : 'Run3_2024'},steps['HLTD'] ] )
+
 steps['HLTDR3_HI2023ARawprime']=merge([{'-s':'L1REPACK:Full,HLT:HIon'},
                                        {'--conditions':'auto:run3_hlt_HIon'},
                                        {'--era' : 'Run3_pp_on_PbPb_approxSiStripClusters_2023'},
@@ -2696,10 +2724,11 @@ def lhegensim2018ml(fragment,howMuch):
 
 steps['RECODR3_2023']=merge([{'--era':'Run3_2023'},steps['RECODR3']])
 steps['RECODR3_2024']=merge([{'--era':'Run3_2024'},steps['RECODR3']])
-                         
+
 steps['RECODR3_reHLT_2022']=merge([{'--conditions':'auto:run3_data_relval', '--hltProcess':'reHLT'},steps['RECODR3']])
 steps['RECODR3_reHLT_2023']=merge([{'--conditions':'auto:run3_data_prompt_relval', '--hltProcess':'reHLT'},steps['RECODR3_2023']])
 steps['RECODR3_reHLT_2023B']=merge([{'--conditions':'auto:run3_data_prompt_relval', '--hltProcess':'reHLT'},steps['RECODR3']])
+steps['RECODR3_reHLT_2024']=merge([{'--conditions':'auto:run3_data_prompt_relval', '--hltProcess':'reHLT'},steps['RECODR3']])
 
 steps['RECODR3_2023_HIN']=merge([{'--conditions':'auto:run3_data_prompt', '-s':'RAW2DIGI,L1Reco,RECO,DQM:@commonFakeHLT+@standardDQMFakeHLT', '--repacked':'', '-n':1000},steps['RECODR3_2023']])
 steps['RECODR3_2023_UPC']=merge([{'--era':'Run3_2023_UPC'},steps['RECODR3_2023_HIN']])
@@ -3058,6 +3087,8 @@ def gen2023HiMix(fragment,howMuch):
 
 steps['RECOHIRUN3_reHLT_2023']=merge([{'-s':'RAW2DIGI,L1Reco,RECO,PAT,DQM:@standardDQM','--datatier':'RECO,MINIAOD,DQMIO','--eventcontent':'RECO,MINIAOD,DQM','--era':'Run3_pp_on_PbPb_approxSiStripClusters_2023','--conditions':'auto:run3_data_HIon'},steps['RECODR3_reHLT_2023']])
 
+steps['AODNANORUN3_reHLT_2024']=merge([{'-s':'RAW2DIGI,L1Reco,RECO,PAT,NANO,DQM:@standardDQM+@miniAODDQM+@nanoAODDQM','--datatier':'AOD,MINIAOD,NANOAOD,DQMIO','--eventcontent':'AOD,MINIAOD,NANOEDMAOD,DQM'},steps['RECODR3_reHLT_2024']])
+
 # patatrack validation in data
 steps['RecoData_Patatrack_AllGPU_Validation_2023'] = merge([{'-s':'RAW2DIGI:RawToDigi_pixelOnly+RawToDigi_ecalOnly+RawToDigi_hcalOnly,RECO:reconstruction_pixelTrackingOnly+reconstruction_ecalOnly+reconstruction_hcalOnly,DQM:@pixelTrackingOnlyDQM+@ecalOnly+@hcalOnly+@hcal2Only',
                                                              '--conditions':'auto:run3_data_prompt',
@@ -3788,6 +3819,7 @@ def gen2023HiMix(fragment,howMuch):
 steps['HARVESTRUN3_COS_2022']=merge([{'--data':'', '--scenario':'cosmics', '--era':'Run3', '-s':'HARVESTING:dqmHarvesting'},steps['HARVESTDRUN3']])
 steps['HARVESTRUN3_2023']=merge([{'--era':'Run3_2023', '-s':'HARVESTING:@standardDQM+@miniAODDQM+@nanoAODDQM'},steps['HARVESTRUN3_2022']])
 steps['HARVESTRUN3_2023B']=merge([{'--era':'Run3', '-s':'HARVESTING:@standardDQM+@miniAODDQM+@nanoAODDQM'},steps['HARVESTRUN3_2022']])
+steps['HARVESTRUN3_2024']=merge([{'--era':'Run3', '-s':'HARVESTING:@standardDQM+@miniAODDQM+@nanoAODDQM'},steps['HARVESTDRUN3']])
 
 steps['HARVESTRUN3_HI2023A']=merge([{'--era':'Run3_pp_on_PbPb_approxSiStripClusters_2023', '-s':'HARVESTING:@standardDQM+@miniAODDQM'},steps['HARVESTRUN3_2022']])