Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2024 Data RelVals and InputInfo Events Skimming (actually) Working #45055

Merged
merged 3 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions Configuration/PyReleaseValidation/python/MatrixReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def reset(self, what='all'):
'relval_identity':'id-',
'relval_machine': 'mach-',
'relval_premix': 'premix-',
'relval_nano':'nano-'
'relval_nano':'nano-',
'relval_data_highstats':'data-'
}

self.files = ['relval_standard' ,
Expand All @@ -73,7 +74,8 @@ def reset(self, what='all'):
'relval_identity',
'relval_machine',
'relval_premix',
'relval_nano'
'relval_nano',
'relval_data_highstats'
]
self.filesDefault = {'relval_standard':True ,
'relval_highstats':True ,
Expand All @@ -90,7 +92,8 @@ def reset(self, what='all'):
'relval_identity':False,
'relval_machine':True,
'relval_premix':True,
'relval_nano':True
'relval_nano':True,
'relval_data_highstats':False
}

self.relvalModule = None
Expand Down
32 changes: 21 additions & 11 deletions Configuration/PyReleaseValidation/python/MatrixUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def selectedLS(list_runs=[],maxNum=-1,l_json=data_json2015):

InputInfoNDefault=2000000
class InputInfo(object):
def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None) :
def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None,skimEvents=False) :
self.run = run
self.ls = ls
self.files = files
Expand All @@ -115,37 +115,47 @@ def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,even
self.ib_blacklist = ib_blacklist
self.ib_block = ib_block
self.dataSetParent = dataSetParent

self.skimEvents = skimEvents

def das(self, das_options, dataset):
if len(self.run) != 0 or self.ls:
if not self.skimEvents and (len(self.run) != 0 or self.ls):
queries = self.queries(dataset)
if len(self.run) != 0:
command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
else:
lumis = self.lumis()
commands = []
while queries:
commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
command = ";".join(commands)
command = "({0})".format(command)
else:
elif not self.skimEvents:
command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])

elif self.skimEvents:
from os import getenv
if getenv("CMSSW_USE_IBEOS","false")=="true":
# to be assured that whatever happens the files are only those at CERN
command = "das-up-to-nevents.py -d %s -e %d -s T2_CH_CERN"%(dataset,self.events)
else:
command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events)
# Run filter on DAS output
if self.ib_blacklist:
command += " | grep -E -v "
command += " ".join(["-e '{0}'".format(pattern) for pattern in self.ib_blacklist])
from os import getenv
if getenv("CMSSW_USE_IBEOS","false")=="true": return command + " | ibeos-lfn-sort"
return command + " | sort -u"
if not self.skimEvents: ## keep run-lumi sorting
from os import getenv
if getenv("CMSSW_USE_IBEOS","false")=="true": return command + " | ibeos-lfn-sort"
return command + " | sort -u"
else:
return command

def lumiRanges(self):
if len(self.run) != 0:
return "echo '{\n"+",".join(('"%d":[[1,268435455]]\n'%(x,) for x in self.run))+"}'"
if self.ls :
return "echo '{\n"+",".join(('"%d" : %s\n'%( int(x),self.ls[x]) for x in self.ls.keys()))+"}'"
return None

def lumis(self):
query_lumis = []
if self.ls:
Expand Down
30 changes: 30 additions & 0 deletions Configuration/PyReleaseValidation/python/relval_data_highstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# import the definition of the steps and input files:
from Configuration.PyReleaseValidation.relval_steps import *

# here only define the workflows as a combination of the steps defined above:
workflows = Matrix()

## Here we define higher (>50k events) stats data workflows
## not to be run as default. 150k, 250k, 500k or 1M events each

## 2024
base_wf_number_2024 = 2024.0
offset_era = 0.1 # less than 10 eras
offset_pd = 0.001 # less than 100 pds
offset_events = 0.0001 # less than 10 event setups (50k,150k,250k,500k)

for e_n,era in enumerate(eras_2024):
for p_n,pd in enumerate(pds_2024):
for e_key,evs in event_steps_dict.items():
if "50k" == e_key: # already defined in relval_standard
continue
wf_number = base_wf_number_2024
wf_number = wf_number + offset_era * e_n
wf_number = wf_number + offset_pd * p_n
wf_number = wf_number + offset_events * evs
wf_number = round(wf_number,6)
step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]



23 changes: 23 additions & 0 deletions Configuration/PyReleaseValidation/python/relval_highstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,26 @@



## 2024 Data Higher Stats Workflows
## with 150k, 250k, 500k or 1M events each

base_wf_number_2024 = 2024.0
offset_era = 0.1 # less than 10 eras
offset_pd = 0.001 # less than 100 pds
offset_events = 0.0001 # less than 10 event setups (50k,150k,250k,500k)

for e_n,era in enumerate(eras_2024):
for p_n,pd in enumerate(pds_2024):
for e_key,evs in event_steps_dict.items():
if "50k" in e_key: # already defined in relval_standard
continue
wf_number = base_wf_number_2024
wf_number = wf_number + offset_era * e_n
wf_number = wf_number + offset_pd * p_n
wf_number = wf_number + offset_events * evs
wf_number = round(wf_number,6)
step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]



16 changes: 16 additions & 0 deletions Configuration/PyReleaseValidation/python/relval_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@
workflows[136.903] = ['', ['RunDoubleMuon2017B', 'TauEmbedding_Selection_2017', 'TauEmbedding_Cleaning_2017', 'TauEmbedding_GenPreHLT_2017', 'TauEmbedding_GenHLT_2017', 'TauEmbedding_GenPostHLT_2017', 'TauEmbedding_Merging_2017']]
workflows[136.904] = ['', ['RunDoubleMuon2018C', 'TauEmbedding_Selection_2018', 'TauEmbedding_Cleaning_2018', 'TauEmbedding_GenPreHLT_2018', 'TauEmbedding_GenHLT_2018', 'TauEmbedding_GenPostHLT_2018', 'TauEmbedding_Merging_2018']]


### run 2021 collisions ###
workflows[139.001] = ['RunMinimumBias2021',['RunMinimumBias2021','HLTDR3_2022','RECODR3_reHLT_MinBiasOffline','HARVESTD2021MB_reHLT']]
workflows[139.002] = ['',['RunZeroBias2021','HLTDR3_2022','RECODR3_reHLT_ZBOffline','HARVESTD2021ZB_reHLT']]
Expand Down Expand Up @@ -558,6 +559,21 @@
workflows[142.901] = ['',['RunUPC2023','RECODR3_2024_UPC','HARVESTDPROMPTR3']]
workflows[142.902] = ['',['RunUPC2023','RECODR3_2024_HIN','HARVESTDPROMPTR3']]

## 2024 Data Workflows
base_wf_number_2024 = 2024.0
offset_era = 0.1 # less than 10 eras
offset_pd = 0.001 # less than 100 pds

for e_n,era in enumerate(eras_2024):
for p_n,pd in enumerate(pds_2024):
wf_number = base_wf_number_2024
wf_number = wf_number + offset_era * e_n
wf_number = wf_number + offset_pd * p_n
wf_number = wf_number + 0.0001 * 0.05
wf_number = round(wf_number,6)
step_name = "Run" + pd + era.split("Run")[1] + "_50k"
workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]

### fastsim ###
workflows[5.1] = ['TTbarFS', ['TTbarFS','HARVESTFS']]
workflows[5.2] = ['SingleMuPt10FS', ['SingleMuPt10FS','HARVESTFS']]
Expand Down
42 changes: 37 additions & 5 deletions Configuration/PyReleaseValidation/python/relval_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@

steps = Steps()

#### Event to runs
event_steps = [0.05,0.15,0.25,0.5,1] #in millions
event_steps_k = ["50k","150k","250k","500k","1M"]
event_steps_dict = dict(zip(event_steps_k,event_steps))
#### Production test section ####
steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults])
steps['ProdTTbar']=merge([{'cfg':'TTbar_8TeV_TuneCUETP8M1_cfi','--relval':'9000,100'},step1Defaults])
Expand Down Expand Up @@ -478,7 +482,13 @@
# UL AOD
steps['RunJetHT2018D_reminiaodUL']={'INPUT':InputInfo(dataSet='/JetHT/Run2018D-12Nov2019_UL2018-v4/AOD',label='2018DrmaodUL',events=100000,location='STD', ls=Run2018D)}

#### run3 ####
####################################
#### Run3 ##########################
####################################

###2022

## Collisions at 900 GeV and ramp-up to 13.6 TeV
Run2022A={353015: [[1, 100]]}
steps['RunMinimumBias2022A']={'INPUT':InputInfo(dataSet='/MinimumBias/Run2022A-v1/RAW',label='2022A',events=100000,location='STD', ls=Run2022A)}
steps['RunSingleMuon2022A']={'INPUT':InputInfo(dataSet='/SingleMuon/Run2022A-v1/RAW',label='2022A',events=100000,location='STD', ls=Run2022A)}
Expand All @@ -497,7 +507,7 @@
steps['RunDoubleMuon2022A']={'INPUT':InputInfo(dataSet='/DoubleMuon/Run2022A-v1/RAW',label='2022A',events=100000,location='STD', ls=Run2022A)}
steps['RunMuonEG2022A']={'INPUT':InputInfo(dataSet='/MuonEG/Run2022A-v1/RAW',label='2022A',events=100000,location='STD', ls=Run2022A)}

Run2022B={355769: [[1, 106]]}
Run2022B={355769: [[1, 106]]} ## this could be rised to "355769": [[1, 541]]
steps['RunMinimumBias2022B']={'INPUT':InputInfo(dataSet='/MinimumBias/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
steps['RunSingleMuon2022B']={'INPUT':InputInfo(dataSet='/SingleMuon/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
steps['RunZeroBias2022B']={'INPUT':InputInfo(dataSet='/ZeroBias/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
Expand All @@ -514,7 +524,6 @@
steps['RunTau2022B']={'INPUT':InputInfo(dataSet='/Tau/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
steps['RunDoubleMuon2022B']={'INPUT':InputInfo(dataSet='/DoubleMuon/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
steps['RunMuonEG2022B']={'INPUT':InputInfo(dataSet='/MuonEG/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}
#steps['RunParkingBPH2022B']={'INPUT':InputInfo(dataSet='/ParkingBPH/Run2022B-v1/RAW',label='2022B',events=100000,location='STD', ls=Run2022B)}

Run2022C={356381: [[1, 1193]]}
Run2022C_LS40={356381: [[1, 40]]}
Expand Down Expand Up @@ -576,7 +585,7 @@
# reMINIAOD for 2022
steps['RunJetMET2022D_reMINI']={'INPUT':InputInfo(dataSet='/JetMET/Run2022D-16Jun2023-v1/AOD',label='rmaod',events=100000,location='STD', ls=Run2022D_LS25)}

#### run3 ####
###2023
Run2023B={366727: [[1, 244]]}
steps['RunMuon2023B']={'INPUT':InputInfo(dataSet='/Muon0/Run2023B-v1/RAW',label='2023B',events=100000,location='STD', ls=Run2023B)}
steps['RunZeroBias2023B']={'INPUT':InputInfo(dataSet='/ZeroBias/Run2023B-v1/RAW',label='2023B',events=100000,location='STD', ls=Run2023B)}
Expand Down Expand Up @@ -625,6 +634,23 @@
RunHI2023={375491: [[100, 100]]}
steps['RunHIPhysicsRawPrime2023A']={'INPUT':InputInfo(dataSet='/HIPhysicsRawPrime0/HIRun2023A-v1/RAW',label='HI2023A',events=100000,location='STD', ls=RunHI2023)}

### Golden Data Wfs
# reading good runs directly from the latest golden json
# in https://cms-service-dqmdc.web.cern.ch/CAF/certification/
# or (if available)

###2024
# number of events limits the files used as input

pds_2024 = ['BTagMu', 'DisplacedJet', 'EGamma0', 'HcalNZS', 'JetMET0', 'Muon0', 'MuonEG', 'NoBPTX', 'ParkingDoubleMuonLowMass0', 'ParkingHH', 'ParkingLLP', 'ParkingSingleMuon0', 'ParkingVBF0', 'Tau', 'ZeroBias']
eras_2024 = ['Run2024B', 'Run2024C', 'Run2024D', 'Run2024E', 'Run2024F']
for era in eras_2024:
for pd in pds_2024:
dataset = "/" + pd + "/" + era + "-v1/RAW"
for e_key,evs in event_steps_dict.items():
step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')}

# Highstat HLTPhysics
Run2015DHS=selectedLS([258712,258713,258714,258741,258742,258745,258749,258750,259626,259637,259683,259685,259686,259721,259809,259810,259818,259820,259821,259822,259862,259890,259891])
steps['RunHLTPhy2015DHS']={'INPUT':InputInfo(dataSet='/HLTPhysics/Run2015D-v1/RAW',label='2015DHS',events=100000,location='STD', ls=Run2015DHS)}
Expand Down Expand Up @@ -2166,6 +2192,8 @@ def lhegensim2018ml(fragment,howMuch):

steps['HLTDR3_2023B']=merge( [ {'-s':'L1REPACK:Full,HLT:@%s'%hltKey2024,},{'--conditions':'auto:run3_hlt_relval'},{'--era' : 'Run3'},steps['HLTD'] ] )

steps['HLTDR3_2024']=merge( [ {'-s':'L1REPACK:Full,HLT:@%s'%hltKey2024,},{'--conditions':'auto:run3_hlt_relval'},{'--era' : 'Run3_2024'},steps['HLTD'] ] )

steps['HLTDR3_HI2023ARawprime']=merge([{'-s':'L1REPACK:Full,HLT:HIon'},
{'--conditions':'auto:run3_hlt_HIon'},
{'--era' : 'Run3_pp_on_PbPb_approxSiStripClusters_2023'},
Expand Down Expand Up @@ -2696,10 +2724,11 @@ def lhegensim2018ml(fragment,howMuch):

steps['RECODR3_2023']=merge([{'--era':'Run3_2023'},steps['RECODR3']])
steps['RECODR3_2024']=merge([{'--era':'Run3_2024'},steps['RECODR3']])

steps['RECODR3_reHLT_2022']=merge([{'--conditions':'auto:run3_data_relval', '--hltProcess':'reHLT'},steps['RECODR3']])
steps['RECODR3_reHLT_2023']=merge([{'--conditions':'auto:run3_data_prompt_relval', '--hltProcess':'reHLT'},steps['RECODR3_2023']])
steps['RECODR3_reHLT_2023B']=merge([{'--conditions':'auto:run3_data_prompt_relval', '--hltProcess':'reHLT'},steps['RECODR3']])
steps['RECODR3_reHLT_2024']=merge([{'--conditions':'auto:run3_data_prompt_relval', '--hltProcess':'reHLT'},steps['RECODR3']])

steps['RECODR3_2023_HIN']=merge([{'--conditions':'auto:run3_data_prompt', '-s':'RAW2DIGI,L1Reco,RECO,DQM:@commonFakeHLT+@standardDQMFakeHLT', '--repacked':'', '-n':1000},steps['RECODR3_2023']])
steps['RECODR3_2023_UPC']=merge([{'--era':'Run3_2023_UPC'},steps['RECODR3_2023_HIN']])
Expand Down Expand Up @@ -3058,6 +3087,8 @@ def gen2023HiMix(fragment,howMuch):

steps['RECOHIRUN3_reHLT_2023']=merge([{'-s':'RAW2DIGI,L1Reco,RECO,PAT,DQM:@standardDQM','--datatier':'RECO,MINIAOD,DQMIO','--eventcontent':'RECO,MINIAOD,DQM','--era':'Run3_pp_on_PbPb_approxSiStripClusters_2023','--conditions':'auto:run3_data_HIon'},steps['RECODR3_reHLT_2023']])

steps['AODNANORUN3_reHLT_2024']=merge([{'-s':'RAW2DIGI,L1Reco,RECO,PAT,NANO,DQM:@standardDQM+@miniAODDQM+@nanoAODDQM','--datatier':'AOD,MINIAOD,NANOAOD,DQMIO','--eventcontent':'AOD,MINIAOD,NANOEDMAOD,DQM'},steps['RECODR3_reHLT_2024']])

# patatrack validation in data
steps['RecoData_Patatrack_AllGPU_Validation_2023'] = merge([{'-s':'RAW2DIGI:RawToDigi_pixelOnly+RawToDigi_ecalOnly+RawToDigi_hcalOnly,RECO:reconstruction_pixelTrackingOnly+reconstruction_ecalOnly+reconstruction_hcalOnly,DQM:@pixelTrackingOnlyDQM+@ecalOnly+@hcalOnly+@hcal2Only',
'--conditions':'auto:run3_data_prompt',
Expand Down Expand Up @@ -3788,6 +3819,7 @@ def gen2023HiMix(fragment,howMuch):
steps['HARVESTRUN3_COS_2022']=merge([{'--data':'', '--scenario':'cosmics', '--era':'Run3', '-s':'HARVESTING:dqmHarvesting'},steps['HARVESTDRUN3']])
steps['HARVESTRUN3_2023']=merge([{'--era':'Run3_2023', '-s':'HARVESTING:@standardDQM+@miniAODDQM+@nanoAODDQM'},steps['HARVESTRUN3_2022']])
steps['HARVESTRUN3_2023B']=merge([{'--era':'Run3', '-s':'HARVESTING:@standardDQM+@miniAODDQM+@nanoAODDQM'},steps['HARVESTRUN3_2022']])
steps['HARVESTRUN3_2024']=merge([{'--era':'Run3', '-s':'HARVESTING:@standardDQM+@miniAODDQM+@nanoAODDQM'},steps['HARVESTDRUN3']])

steps['HARVESTRUN3_HI2023A']=merge([{'--era':'Run3_pp_on_PbPb_approxSiStripClusters_2023', '-s':'HARVESTING:@standardDQM+@miniAODDQM'},steps['HARVESTRUN3_2022']])

Expand Down
Loading