Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add boosted taus to NanoAOD #150

Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions PhysicsTools/NanoAOD/python/boostedTaus_cff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import FWCore.ParameterSet.Config as cms
from PhysicsTools.NanoAOD.common_cff import *
from PhysicsTools.JetMCAlgos.TauGenJets_cfi import tauGenJets
from PhysicsTools.JetMCAlgos.TauGenJetsDecayModeSelectorAllHadrons_cfi import tauGenJetsSelectorAllHadrons

##################### Updated tau collection with MVA-based tau-Ids rerun #######
# Used only in some eras
from PhysicsTools.NanoAOD.taus_updatedMVAIds_cff import *

##################### User floats producers, selectors ##########################


finalBoostedTaus = cms.EDFilter("PATTauRefSelector",
src = cms.InputTag("slimmedTausBoostedNewID"),
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my test sequence this crashes because there are only slimmedTausBoosted. Is this maybe a remainder from the miniAOD fix? Please check.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What era are you performing tests under? This sounds like the IDs are not being added and therefore the new slimmedTausBoostedNewID collection does not get made.

My command for reference:

cmsDriver.py step1 --filein "/store/relval/CMSSW_11_3_0_pre1/RelValTTbar_14TeV/MINIAODSIM/PU_113X_mcRun3_2021_realistic_v1-v1/10000/e41c73d6-dc6b-405a-8aa4-2f79d974a1ab.root" --mc --eventcontent NANOAODSIM --datatier NANOAODSIM --conditions auto:run2_mc --step NANO --nThreads 8 --era Run2_2016,run2_nanoAOD_106Xv2 -n 1000 --fileout file:testRelval.root

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran a 2016 sample but probably missed the run2_nanoAOD_106Xv2 modifier. Will try again. Thanks for the reference!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, with this modifier, it runs successfully. I think what is currently problematic is that the boosted taus are always added but the IDs only rerun with the given modifiers. But after rearranging the modifiers as stated earlier, all cases should be runnable (independently from the question whether boosted taus are produced in the respective cases)

cut = cms.string("pt > 40 && tauID('decayModeFindingNewDMs') && (tauID('byVVLooseIsolationMVArun2017v2DBoldDMwLT2017') || tauID('byVVLooseIsolationMVArun2017v2DBoldDMdR0p3wLT2017') || tauID('byVVLooseIsolationMVArun2017v2DBnewDMwLT2017'))")
)


##################### Tables for final output and docs ##########################
def _tauIdWPMask(pattern, choices, doc=""):
return Var(" + ".join(["%d * tauID('%s')" % (pow(2,i), pattern % c) for (i,c) in enumerate(choices)]), "uint8",
doc=doc+": bitmask "+", ".join(["%d = %s" % (pow(2,i),c) for (i,c) in enumerate(choices)]))
def _tauId2WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("Loose","Tight"),doc=doc)
def _tauId3WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("Loose","Medium","Tight"),doc=doc)
def _tauId4WPMask(pattern,doc):
return _tauIdWPMask(pattern, choices=("VLoose", "Loose", "Medium", "Tight"), doc=doc)
def _tauId5WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("VLoose","Loose","Medium","Tight","VTight"),doc=doc)
def _tauId6WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("VLoose","Loose","Medium","Tight","VTight","VVTight"),doc=doc)
def _tauId7WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("VVLoose","VLoose","Loose","Medium","Tight","VTight","VVTight"),doc=doc)
def _tauId8WPMask(pattern,doc):
return _tauIdWPMask(pattern,choices=("VVVLoose","VVLoose","VLoose","Loose","Medium","Tight","VTight","VVTight"),doc=doc)

boostedTauTable = cms.EDProducer("SimpleCandidateFlatTableProducer",
src = cms.InputTag("finalBoostedTaus"),
cut = cms.string(""), #we should not filter on cross linked collections
name= cms.string("boostedTau"),
doc = cms.string("slimmedBoostedTaus after basic selection (" + finalBoostedTaus.cut.value()+")"),
singleton = cms.bool(False), # the number of entries is variable
extension = cms.bool(False), # this is the main table for the taus
variables = cms.PSet() # PSet defined below in era dependent way
)
_boostedTauVarsBase = cms.PSet(P4Vars,
charge = Var("charge", int, doc="electric charge"),
jetIdx = Var("?hasUserCand('jet')?userCand('jet').key():-1", int, doc="index of the associated jet (-1 if none)"),
decayMode = Var("decayMode()",int),
leadTkPtOverTauPt = Var("leadChargedHadrCand.pt/pt ",float, doc="pt of the leading track divided by tau pt",precision=10),
leadTkDeltaEta = Var("leadChargedHadrCand.eta - eta ",float, doc="eta of the leading track, minus tau eta",precision=8),
leadTkDeltaPhi = Var("deltaPhi(leadChargedHadrCand.phi, phi) ",float, doc="phi of the leading track, minus tau phi",precision=8),

rawIso = Var( "tauID('byCombinedIsolationDeltaBetaCorrRaw3Hits')", float, doc = "combined isolation (deltaBeta corrections)", precision=10),
rawIsodR03 = Var( "(tauID('chargedIsoPtSumdR03')+max(0.,tauID('neutralIsoPtSumdR03')-0.072*tauID('puCorrPtSum')))", float, doc = "combined isolation (deltaBeta corrections, dR=0.3)", precision=10),
chargedIso = Var( "tauID('chargedIsoPtSum')", float, doc = "charged isolation", precision=10),
neutralIso = Var( "tauID('neutralIsoPtSum')", float, doc = "neutral (photon) isolation", precision=10),
puCorr = Var( "tauID('puCorrPtSum')", float, doc = "pileup correction", precision=10),
photonsOutsideSignalCone = Var( "tauID('photonPtSumOutsideSignalCone')", float, doc = "sum of photons outside signal cone", precision=10),
idAntiMu = _tauId2WPMask("againstMuon%s3", doc= "Anti-muon discriminator V3: "),
#MVA 2017 v2 variables
rawMVAoldDM2017v2=Var("tauID('byIsolationMVArun2017v2DBoldDMwLTraw2017')",float, doc="byIsolationMVArun2017v2DBoldDMwLT raw output discriminator (2017v2)",precision=10),
rawMVAnewDM2017v2 = Var("tauID('byIsolationMVArun2017v2DBnewDMwLTraw2017')",float,doc='byIsolationMVArun2017v2DBnewDMwLT raw output discriminator (2017v2)',precision=10),
rawMVAoldDMdR032017v2 = Var("tauID('byIsolationMVArun2017v2DBoldDMdR0p3wLTraw2017')",float,doc='byIsolationMVArun2017v2DBoldDMdR0p3wLT raw output discriminator (2017v2)'),
idMVAnewDM2017v2 = _tauId7WPMask("by%sIsolationMVArun2017v2DBnewDMwLT2017", doc="IsolationMVArun2017v2DBnewDMwLT ID working point (2017v2)"),
idMVAoldDM2017v2=_tauId7WPMask("by%sIsolationMVArun2017v2DBoldDMwLT2017",doc="IsolationMVArun2017v2DBoldDMwLT ID working point (2017v2)"),
idMVAoldDMdR032017v2 = _tauId7WPMask("by%sIsolationMVArun2017v2DBoldDMdR0p3wLT2017",doc="IsolationMVArun2017v2DBoldDMdR0p3wLT ID working point (2017v2)"),
rawAntiEle2018 = Var("tauID('againstElectronMVA6Raw')", float, doc= "Anti-electron MVA discriminator V6 raw output discriminator (2018)", precision=10),
rawAntiEleCat2018 = Var("tauID('againstElectronMVA6category')", int, doc="Anti-electron MVA discriminator V6 category (2018)"),
idAntiEle2018 = _tauId5WPMask("againstElectron%sMVA6", doc= "Anti-electron MVA discriminator V6 (2018)"),
)

boostedTauTable.variables = _boostedTauVarsBase

tauGenJets.GenParticles = cms.InputTag("prunedGenParticles")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This redefines tauGenJets producer which is also used in configuration for standard taus. This cannot be done like this even if in both cases configurations of tauGenJets are identical. So this should be removed from here or if necessary (i.e. collection from tau_cff cannot be used) a clone should be created.
Then, is new, boosted specific, table of genVisTaus needed or one created for standard taus can be reused (see further comments)?
Finally: I think prunedGenParticles should be replaced by finalGenParticles here for correct mother indexing (it will happen also for standard taus)

tauGenJets.includeNeutrinos = cms.bool(False)

genVisBoostedTaus = cms.EDProducer("GenVisTauProducer",
src = cms.InputTag("tauGenJetsSelectorAllHadrons"),
srcGenParticles = cms.InputTag("prunedGenParticles")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not needed as one can use similar guys for standard taus (genVisTaus).
If kept, I think prunedGenParticles should be replaced by finalGenParticles here for correct mother indexing (it will happen also for standard taus), consistently with L76.

)

genVisBoostedTauTable = cms.EDProducer("SimpleCandidateFlatTableProducer",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this table needed? It basically does not differ from such table for standard taus called genVisTauTable

src = cms.InputTag("genVisBoostedTaus"),
cut = cms.string("pt > 20."),
name = cms.string("GenVisBoostedTau"),
doc = cms.string("gen hadronic taus "),
singleton = cms.bool(False), # the number of entries is variable
extension = cms.bool(False), # this is the main table for generator level hadronic tau decays
variables = cms.PSet(
pt = Var("pt", float,precision=8),
phi = Var("phi", float,precision=8),
eta = Var("eta", float,precision=8),
mass = Var("mass", float,precision=8),
charge = Var("charge", int),
status = Var("status", int, doc="Hadronic tau decay mode. 0=OneProng0PiZero, 1=OneProng1PiZero, 2=OneProng2PiZero, 10=ThreeProng0PiZero, 11=ThreeProng1PiZero, 15=Other"),
genPartIdxMother = Var("?numberOfMothers>0?motherRef(0).key():-1", int, doc="index of the mother particle"),
)
)

boostedTausMCMatchLepTauForTable = cms.EDProducer("MCMatcher", # cut on deltaR, deltaPt/Pt; pick best by deltaR
src = boostedTauTable.src, # final reco collection
matched = cms.InputTag("finalGenParticles"), # final mc-truth particle collection
mcPdgId = cms.vint32(11,13), # one or more PDG ID (11 = electron, 13 = muon); absolute values (see below)
checkCharge = cms.bool(False), # True = require RECO and MC objects to have the same charge
mcStatus = cms.vint32(), # PYTHIA status code (1 = stable, 2 = shower, 3 = hard scattering)
maxDeltaR = cms.double(0.3), # Minimum deltaR for the match
maxDPtRel = cms.double(0.5), # Minimum deltaPt/Pt for the match
resolveAmbiguities = cms.bool(True), # Forbid two RECO objects to match to the same GEN object
resolveByMatchQuality = cms.bool(True), # False = just match input in order; True = pick lowest deltaR pair first
)

boostedTausMCMatchHadTauForTable = cms.EDProducer("MCMatcher", # cut on deltaR, deltaPt/Pt; pick best by deltaR
src = boostedTauTable.src, # final reco collection
matched = cms.InputTag("genVisBoostedTaus"), # generator level hadronic tau decays
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment above - probably genVisTaus for standard taus can be used instead.

mcPdgId = cms.vint32(15), # one or more PDG ID (15 = tau); absolute values (see below)
checkCharge = cms.bool(False), # True = require RECO and MC objects to have the same charge
mcStatus = cms.vint32(), # CV: no *not* require certain status code for matching (status code corresponds to decay mode for hadronic tau decays)
maxDeltaR = cms.double(0.3), # Maximum deltaR for the match
maxDPtRel = cms.double(1.), # Maximum deltaPt/Pt for the match
resolveAmbiguities = cms.bool(True), # Forbid two RECO objects to match to the same GEN object
resolveByMatchQuality = cms.bool(True), # False = just match input in order; True = pick lowest deltaR pair first
)

boostedTauMCTable = cms.EDProducer("CandMCMatchTableProducer",
src = boostedTauTable.src,
mcMap = cms.InputTag("boostedTausMCMatchLepTauForTable"),
mcMapVisTau = cms.InputTag("boostedTausMCMatchHadTauForTable"),
objName = boostedTauTable.name,
objType = cms.string("Tau"),
branchName = cms.string("genPart"),
docString = cms.string("MC matching to status==2 taus"),
)


boostedTauSequence = cms.Sequence(finalBoostedTaus)
boostedTauTables = cms.Sequence(boostedTauTable)
boostedTauMC = cms.Sequence(tauGenJets + tauGenJetsSelectorAllHadrons + genVisBoostedTaus + genVisBoostedTauTable + boostedTausMCMatchLepTauForTable + boostedTausMCMatchHadTauForTable + boostedTauMCTable)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adjust MC sequence to removals proposed in earlier comments.


31 changes: 28 additions & 3 deletions PhysicsTools/NanoAOD/python/nano_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from PhysicsTools.NanoAOD.jets_cff import *
from PhysicsTools.NanoAOD.muons_cff import *
from PhysicsTools.NanoAOD.taus_cff import *
from PhysicsTools.NanoAOD.boostedTaus_cff import *
from PhysicsTools.NanoAOD.electrons_cff import *
from PhysicsTools.NanoAOD.photons_cff import *
from PhysicsTools.NanoAOD.globals_cff import *
Expand Down Expand Up @@ -105,22 +106,28 @@
(run2_miniAOD_80XLegacy | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1).toModify(l1bits, storeUnprefireableBit=False)

nanoSequenceCommon = cms.Sequence(
nanoMetadata + jetSequence + muonSequence + tauSequence + electronSequence+photonSequence+vertexSequence+
nanoMetadata + jetSequence + muonSequence + tauSequence + boostedTauSequence + electronSequence+photonSequence+vertexSequence+
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will need to be excluded for old eras

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which era should it be enabled/excluded for? The most recent era I know is 106Xv2, Should I default to including it only for this?

isoTrackSequence + jetLepSequence + # must be after all the leptons
linkedObjects +
jetTables + muonTables + tauTables + electronTables + photonTables + globalTables +vertexTables+ metTables+simpleCleanerTable + isoTrackTables
jetTables + muonTables + tauTables + boostedTauTables + electronTables + photonTables + globalTables +vertexTables+ metTables+simpleCleanerTable + isoTrackTables
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

)
#remove boosted tau from previous eras
(run2_miniAOD_80XLegacy | run2_nanoAOD_92X | run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1 | run2_nanoAOD_106Xv1).toReplaceWith(nanoSequenceCommon, nanoSequenceCommon.copyAndExclude([boostedTauSequence, boostedTauTables]))

nanoSequenceOnlyFullSim = cms.Sequence(triggerObjectTables + l1bits)
nanoSequenceOnlyData = cms.Sequence(protonTables + lhcInfoTable)

nanoSequence = cms.Sequence(nanoSequenceCommon + nanoSequenceOnlyData + nanoSequenceOnlyFullSim)

nanoSequenceFS = cms.Sequence(genParticleSequence + genVertexTables + particleLevelSequence + nanoSequenceCommon + jetMC + muonMC + electronMC + photonMC + tauMC + metMC + ttbarCatMCProducers + globalTablesMC + btagWeightTable + genWeightsTable + genVertexTable + genParticleTables + particleLevelTables + lheInfoTable + ttbarCategoryTable )
nanoSequenceFS = cms.Sequence(genParticleSequence + genVertexTables + particleLevelSequence + nanoSequenceCommon + jetMC + muonMC + electronMC + photonMC + tauMC + boostedTauMC + metMC + ttbarCatMCProducers + globalTablesMC + btagWeightTable + genWeightsTable + genVertexTable + genParticleTables + particleLevelTables + lheInfoTable + ttbarCategoryTable )
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here. In the lines below, this is done for something else and you can do it the same way.


(run2_nanoAOD_92X | run2_miniAOD_80XLegacy | run2_nanoAOD_94X2016 | run2_nanoAOD_94X2016 | \
run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94XMiniAODv2 | \
run2_nanoAOD_102Xv1).toReplaceWith(nanoSequenceFS, nanoSequenceFS.copyAndExclude([genVertexTable, genVertexT0Table]))

#remove boosted tau from previous eras
(run2_miniAOD_80XLegacy | run2_nanoAOD_92X | run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1 | run2_nanoAOD_106Xv1).toReplaceWith(nanoSequenceFS, nanoSequenceFS.copyAndExclude([boostedTauMC]))

# GenVertex only stored in newer MiniAOD
nanoSequenceMC = nanoSequenceFS.copy()
nanoSequenceMC.insert(nanoSequenceFS.index(nanoSequenceCommon)+1,nanoSequenceOnlyFullSim)
Expand All @@ -141,6 +148,23 @@ def nanoAOD_addTauIds(process):
process.rerunMvaIsolationSequence)
return process

def nanoAOD_addBoostedTauIds(process):
updatedBoostedTauName = "slimmedTausBoostedNewID"
boostedTauIdEmbedder = tauIdConfig.TauIDEmbedder(process, debug=False,
originalTauName = "slimmedTausBoosted",
updatedTauName = updatedBoostedTauName,
postfix="Boosted",
toKeep = [ "2017v2", "dR0p32017v2", "newDM2017v2","againstEle2018",])
boostedTauIdEmbedder.runTauID()
process.boostedTauSequence.insert(process.boostedTauSequence.index(process.finalBoostedTaus),
process.rerunMvaIsolationSequenceBoosted)

process.boostedTauSequence.insert(process.boostedTauSequence.index(process.finalBoostedTaus),
getattr(process, updatedBoostedTauName))

return process


from PhysicsTools.PatAlgos.tools.jetTools import updateJetCollection
def nanoAOD_addDeepInfo(process,addDeepBTag,addDeepFlavour):
_btagDiscriminators=[]
Expand Down Expand Up @@ -371,6 +395,7 @@ def nanoAOD_customizeCommon(process):
addParticleNet=nanoAOD_addDeepInfoAK8_switch.nanoAOD_addParticleNet_switch,
jecPayload=nanoAOD_addDeepInfoAK8_switch.jecPayload)
(run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1 | run2_nanoAOD_106Xv1).toModify(process, lambda p : nanoAOD_addTauIds(p))
(~(run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1 | run2_nanoAOD_106Xv1)).toModify(process, lambda p : nanoAOD_addBoostedTauIds(p))
return process

def nanoAOD_customizeData(process):
Expand Down