Add boosted taus to NanoAOD (#150)

* Merge pull request cms-sw#33150 from cms-tau-pog/CMSSW_11_3_X_tau-pog_tauIDtoolsDev Updates to tauID python tool * Initial working commit of boosted taus in CMSSW_11_3 NanoAOD * Remove 2015 anti-E for 10_6v2 era compatibility in boosted taus * Initial working commit of boosted taus in CMSSW_11_3 NanoAOD * Remove 2015 anti-E for 10_6v2 era compatibility in boosted taus * Remove commented boosted tau nanoAOD code * Remove main nanoAOD config comments * Remove leading charged hadronic candidate dxy and dz * Update boosted tau configuration to remove excess ID variables * Fix removal of boosted tau vars base * Remove boosted tau sequences from previous eras * Remove redundant decay mode information * some polishing * change Gen information for boosted taus * Update nanoDQM for boosted taus Co-authored-by: cmsbuild <cmsbuild@cern.ch> Co-authored-by: Andrew Loeliger <aloelige@cern.ch> Co-authored-by: Andrew David Loeliger <andrew.loeliger@cern.ch>
cms-tau-pog · Apr 26, 2021 · 96962a9 · 96962a9
1 parent 1d76fdb
commit 96962a9
Show file tree

Hide file tree

Showing 3 changed files with 176 additions and 3 deletions.
diff --git a/PhysicsTools/NanoAOD/python/boostedTaus_cff.py b/PhysicsTools/NanoAOD/python/boostedTaus_cff.py
@@ -0,0 +1,114 @@
+import FWCore.ParameterSet.Config as cms
+from PhysicsTools.NanoAOD.common_cff import *
+
+##################### Updated tau collection with MVA-based tau-Ids rerun #######
+# Used only in some eras
+from PhysicsTools.NanoAOD.taus_updatedMVAIds_cff import *
+
+##################### User floats producers, selectors ##########################
+
+
+finalBoostedTaus = cms.EDFilter("PATTauRefSelector",
+    src = cms.InputTag("slimmedTausBoostedNewID"),
+    cut = cms.string("pt > 40 && tauID('decayModeFindingNewDMs') && (tauID('byVVLooseIsolationMVArun2017v2DBoldDMwLT2017') || tauID('byVVLooseIsolationMVArun2017v2DBoldDMdR0p3wLT2017') || tauID('byVVLooseIsolationMVArun2017v2DBnewDMwLT2017'))")
+)
+
+
+##################### Tables for final output and docs ##########################
+def _tauIdWPMask(pattern, choices, doc=""):
+    return Var(" + ".join(["%d * tauID('%s')" % (pow(2,i), pattern % c) for (i,c) in enumerate(choices)]), "uint8", 
+               doc=doc+": bitmask "+", ".join(["%d = %s" % (pow(2,i),c) for (i,c) in enumerate(choices)]))
+def _tauId2WPMask(pattern,doc):
+    return _tauIdWPMask(pattern,choices=("Loose","Tight"),doc=doc)
+def _tauId3WPMask(pattern,doc):
+    return _tauIdWPMask(pattern,choices=("Loose","Medium","Tight"),doc=doc)
+def _tauId4WPMask(pattern,doc):
+    return _tauIdWPMask(pattern, choices=("VLoose", "Loose", "Medium", "Tight"), doc=doc)
+def _tauId5WPMask(pattern,doc):
+    return _tauIdWPMask(pattern,choices=("VLoose","Loose","Medium","Tight","VTight"),doc=doc)
+def _tauId6WPMask(pattern,doc):
+    return _tauIdWPMask(pattern,choices=("VLoose","Loose","Medium","Tight","VTight","VVTight"),doc=doc)
+def _tauId7WPMask(pattern,doc):
+    return _tauIdWPMask(pattern,choices=("VVLoose","VLoose","Loose","Medium","Tight","VTight","VVTight"),doc=doc)
+def _tauId8WPMask(pattern,doc):
+    return _tauIdWPMask(pattern,choices=("VVVLoose","VVLoose","VLoose","Loose","Medium","Tight","VTight","VVTight"),doc=doc)
+
+boostedTauTable = cms.EDProducer("SimpleCandidateFlatTableProducer",
+    src = cms.InputTag("finalBoostedTaus"),
+    cut = cms.string(""), #we should not filter on cross linked collections
+    name= cms.string("boostedTau"),
+    doc = cms.string("slimmedBoostedTaus after basic selection (" + finalBoostedTaus.cut.value()+")"),
+    singleton = cms.bool(False), # the number of entries is variable
+    extension = cms.bool(False), # this is the main table for the taus
+    variables = cms.PSet() # PSet defined below in era dependent way
+)
+_boostedTauVarsBase = cms.PSet(P4Vars,
+       charge = Var("charge", int, doc="electric charge"),
+       jetIdx = Var("?hasUserCand('jet')?userCand('jet').key():-1", int, doc="index of the associated jet (-1 if none)"),
+       decayMode = Var("decayMode()",int),
+       leadTkPtOverTauPt = Var("leadChargedHadrCand.pt/pt ",float, doc="pt of the leading track divided by tau pt",precision=10),
+       leadTkDeltaEta = Var("leadChargedHadrCand.eta - eta ",float, doc="eta of the leading track, minus tau eta",precision=8),
+       leadTkDeltaPhi = Var("deltaPhi(leadChargedHadrCand.phi, phi) ",float, doc="phi of the leading track, minus tau phi",precision=8),
+
+       rawIso = Var( "tauID('byCombinedIsolationDeltaBetaCorrRaw3Hits')", float, doc = "combined isolation (deltaBeta corrections)", precision=10),
+       rawIsodR03 = Var( "(tauID('chargedIsoPtSumdR03')+max(0.,tauID('neutralIsoPtSumdR03')-0.072*tauID('puCorrPtSum')))", float, doc = "combined isolation (deltaBeta corrections, dR=0.3)", precision=10),
+       chargedIso = Var( "tauID('chargedIsoPtSum')", float, doc = "charged isolation", precision=10),
+       neutralIso = Var( "tauID('neutralIsoPtSum')", float, doc = "neutral (photon) isolation", precision=10),
+       puCorr = Var( "tauID('puCorrPtSum')", float, doc = "pileup correction", precision=10),
+       photonsOutsideSignalCone = Var( "tauID('photonPtSumOutsideSignalCone')", float, doc = "sum of photons outside signal cone", precision=10),
+       idAntiMu = _tauId2WPMask("againstMuon%s3", doc= "Anti-muon discriminator V3: "),
+       #MVA 2017 v2 variables
+       rawMVAoldDM2017v2=Var("tauID('byIsolationMVArun2017v2DBoldDMwLTraw2017')",float, doc="byIsolationMVArun2017v2DBoldDMwLT raw output discriminator (2017v2)",precision=10),
+       rawMVAnewDM2017v2 = Var("tauID('byIsolationMVArun2017v2DBnewDMwLTraw2017')",float,doc='byIsolationMVArun2017v2DBnewDMwLT raw output discriminator (2017v2)',precision=10),
+       rawMVAoldDMdR032017v2 = Var("tauID('byIsolationMVArun2017v2DBoldDMdR0p3wLTraw2017')",float,doc='byIsolationMVArun2017v2DBoldDMdR0p3wLT raw output discriminator (2017v2)'),    
+       idMVAnewDM2017v2 = _tauId7WPMask("by%sIsolationMVArun2017v2DBnewDMwLT2017", doc="IsolationMVArun2017v2DBnewDMwLT ID working point (2017v2)"),
+       idMVAoldDM2017v2=_tauId7WPMask("by%sIsolationMVArun2017v2DBoldDMwLT2017",doc="IsolationMVArun2017v2DBoldDMwLT ID working point (2017v2)"),
+       idMVAoldDMdR032017v2 = _tauId7WPMask("by%sIsolationMVArun2017v2DBoldDMdR0p3wLT2017",doc="IsolationMVArun2017v2DBoldDMdR0p3wLT ID working point (2017v2)"),
+       rawAntiEle2018 = Var("tauID('againstElectronMVA6Raw')", float, doc= "Anti-electron MVA discriminator V6 raw output discriminator (2018)", precision=10),
+       rawAntiEleCat2018 = Var("tauID('againstElectronMVA6category')", int, doc="Anti-electron MVA discriminator V6 category (2018)"),
+       idAntiEle2018 = _tauId5WPMask("againstElectron%sMVA6", doc= "Anti-electron MVA discriminator V6 (2018)"),
+)
+
+boostedTauTable.variables = _boostedTauVarsBase
+
+
+boostedTausMCMatchLepTauForTable = cms.EDProducer("MCMatcher",  # cut on deltaR, deltaPt/Pt; pick best by deltaR
+    src         = boostedTauTable.src,                 # final reco collection
+    matched     = cms.InputTag("finalGenParticles"), # final mc-truth particle collection
+    mcPdgId     = cms.vint32(11,13),            # one or more PDG ID (11 = electron, 13 = muon); absolute values (see below)
+    checkCharge = cms.bool(False),              # True = require RECO and MC objects to have the same charge
+    mcStatus    = cms.vint32(),                 # PYTHIA status code (1 = stable, 2 = shower, 3 = hard scattering)
+    maxDeltaR   = cms.double(0.3),              # Minimum deltaR for the match
+    maxDPtRel   = cms.double(0.5),              # Minimum deltaPt/Pt for the match
+    resolveAmbiguities    = cms.bool(True),     # Forbid two RECO objects to match to the same GEN object
+    resolveByMatchQuality = cms.bool(True),     # False = just match input in order; True = pick lowest deltaR pair first
+)
+
+#This requires genVisTaus in taus_cff.py
+boostedTausMCMatchHadTauForTable = cms.EDProducer("MCMatcher",  # cut on deltaR, deltaPt/Pt; pick best by deltaR
+    src         = boostedTauTable.src,                 # final reco collection
+    matched     = cms.InputTag("genVisTaus"),   # generator level hadronic tau decays
+    mcPdgId     = cms.vint32(15),               # one or more PDG ID (15 = tau); absolute values (see below)
+    checkCharge = cms.bool(False),              # True = require RECO and MC objects to have the same charge
+    mcStatus    = cms.vint32(),                 # CV: no *not* require certain status code for matching (status code corresponds to decay mode for hadronic tau decays)
+    maxDeltaR   = cms.double(0.3),              # Maximum deltaR for the match
+    maxDPtRel   = cms.double(1.),               # Maximum deltaPt/Pt for the match
+    resolveAmbiguities    = cms.bool(True),     # Forbid two RECO objects to match to the same GEN object
+    resolveByMatchQuality = cms.bool(True),     # False = just match input in order; True = pick lowest deltaR pair first
+)
+
+boostedTauMCTable = cms.EDProducer("CandMCMatchTableProducer",
+    src = boostedTauTable.src,
+    mcMap = cms.InputTag("boostedTausMCMatchLepTauForTable"),
+    mcMapVisTau = cms.InputTag("boostedTausMCMatchHadTauForTable"),                         
+    objName = boostedTauTable.name,
+    objType = cms.string("Tau"),
+    branchName = cms.string("genPart"),
+    docString = cms.string("MC matching to status==2 taus"),
+)
+
+
+boostedTauSequence = cms.Sequence(finalBoostedTaus)
+boostedTauTables = cms.Sequence(boostedTauTable)
+boostedTauMC = cms.Sequence(boostedTausMCMatchLepTauForTable + boostedTausMCMatchHadTauForTable + boostedTauMCTable)
+
diff --git a/PhysicsTools/NanoAOD/python/nanoDQM_cfi.py b/PhysicsTools/NanoAOD/python/nanoDQM_cfi.py
@@ -801,5 +801,39 @@
                 Plot1D('pt', 'pt', 40, 0, 400, 'pt'),
             )
         ),
+        boostedTau = cms.PSet(
+            sels = cms.PSet(),
+            plots = cms.VPSet(
+                Count1D('_size', 7, -0.5, 6.5, "slimmedBoostedTaus after basic selection (pt > 40 && tauID('decayModeFindingNewDMs') && (tauID('byVVLooseIsolationMVArun2017v2DBoldDMwLT2017') || tauID('byVVLooseIsolationMVArun2017v2DBoldDMdR0p3wLT2017') || tauID('byVVLooseIsolationMVArun2017v2DBnewDMwLT2017')))"),
+                Plot1D('charge', 'charge', 3, -1.5, 1.5, 'electric charge'),
+                Plot1D('chargedIso', 'chargedIso', 20, 0, 200, 'charged isolation'),
+                Plot1D('decayMode', 'decayMode', 12, -0.5, 11.5, 'decayMode()'),
+                Plot1D('eta', 'eta', 20, -3, 3, 'eta'),
+                Plot1D('genPartFlav', 'genPartFlav', 6, -0.5, 5.5, 'Flavour of genParticle for MC matching to status==2 taus: 1 = prompt electron, 2 = prompt muon, 3 = tau->e decay, 4 = tau->mu decay, 5 = hadronic tau decay, 0 = unknown or unmatched'),
+                NoPlot('genPartIdx'),
+                Plot1D('idAntiEle2018', 'idAntiEle2018', 32, -0.5, 31.5, 'Anti-electron MVA discriminator V6 (2018): bitmask 1 = VLoose, 2 = Loose, 4 = Medium, 8 = Tight, 16 = VTight'),
+                Plot1D('idAntiMu', 'idAntiMu', 4, -0.5, 3.5, 'Anti-muon discriminator V3: : bitmask 1 = Loose, 2 = Tight'),
+                Plot1D('idMVAnewDM2017v2', 'idMVAnewDM2017v2', 128, -0.5, 127.5, 'IsolationMVArun2017v2DBnewDMwLT ID working point (2017v2): bitmask 1 = VVLoose, 2 = VLoose, 4 = Loose, 8 = Medium, 16 = Tight, 32 = VTight, 64 = VVTight'),
+                Plot1D('idMVAoldDM2017v2', 'idMVAoldDM2017v2', 128, -0.5, 127.5, 'IsolationMVArun2017v2DBoldDMwLT ID working point (2017v2): bitmask 1 = VVLoose, 2 = VLoose, 4 = Loose, 8 = Medium, 16 = Tight, 32 = VTight, 64 = VVTight'),
+                Plot1D('idMVAoldDMdR032017v2', 'idMVAoldDMdR032017v2', 128, -0.5, 127.5, 'IsolationMVArun2017v2DBoldDMdR0p3wLT ID working point (2017v2): bitmask 1 = VVLoose, 2 = VLoose, 4 = Loose, 8 = Medium, 16 = Tight, 32 = VTight, 64 = VVTight'),
+                NoPlot('jetIdx'),
+                Plot1D('leadTkDeltaEta', 'leadTkDeltaEta', 20, -0.1, 0.1, 'eta of the leading track, minus tau eta'),
+                Plot1D('leadTkDeltaPhi', 'leadTkDeltaPhi', 20, -0.1, 0.1, 'phi of the leading track, minus tau phi'),
+                Plot1D('leadTkPtOverTauPt', 'leadTkPtOverTauPt', 20, 0, 2, 'pt of the leading track divided by tau pt'),
+                Plot1D('mass', 'mass', 20, 0, 5, 'mass'),
+                Plot1D('neutralIso', 'neutralIso', 20, 0, 200, 'neutral (photon) isolation'),
+                Plot1D('phi', 'phi', 20, -3.14159, 3.14159, 'phi'),
+                Plot1D('photonsOutsideSignalCone', 'photonsOutsideSignalCone', 20, 0, 30, 'sum of photons outside signal cone'),
+                Plot1D('pt', 'pt', 20, 0, 200, 'pt'),
+                Plot1D('puCorr', 'puCorr', 20, 0, 90, 'pileup correction'),
+                Plot1D('rawAntiEle2018', 'rawAntiEle2018', 20, -100, 100, 'Anti-electron MVA discriminator V6 raw output discriminator (2018)'),
+                Plot1D('rawAntiEleCat2018', 'rawAntiEleCat2018', 20, -100, 100, 'Anti-electron MVA discriminator V6 category (2018)'),
+                Plot1D('rawIso', 'rawIso', 20, 0, 200, 'combined isolation (deltaBeta corrections)'),
+                Plot1D('rawIsodR03', 'rawIsodR03', 20, 0, 200, 'combined isolation (deltaBeta corrections, dR=0.3)'),
+                Plot1D('rawMVAnewDM2017v2', 'rawMVAnewDM2017v2', 20, -1, 1, 'byIsolationMVArun2017v2DBnewDMwLT raw output discriminator (2017v2)'),
+                Plot1D('rawMVAoldDM2017v2', 'rawMVAoldDM2017v2', 20, -1, 1, 'byIsolationMVArun2017v2DBoldDMwLT raw output discriminator (2017v2)'),
+                Plot1D('rawMVAoldDMdR032017v2', 'rawMVAoldDMdR032017v2', 20, -1, 1, 'byIsolationMVArun2017v2DBoldDMdR0p3wLT raw output discriminator (2017v2)'),
+            )
+        ),
     )
 )
diff --git a/PhysicsTools/NanoAOD/python/nano_cff.py b/PhysicsTools/NanoAOD/python/nano_cff.py
@@ -5,6 +5,7 @@
 from PhysicsTools.NanoAOD.jets_cff import *
 from PhysicsTools.NanoAOD.muons_cff import *
 from PhysicsTools.NanoAOD.taus_cff import *
+from PhysicsTools.NanoAOD.boostedTaus_cff import *
 from PhysicsTools.NanoAOD.electrons_cff import *
 from PhysicsTools.NanoAOD.photons_cff import *
 from PhysicsTools.NanoAOD.globals_cff import *
@@ -105,22 +106,28 @@
 (run2_miniAOD_80XLegacy | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1).toModify(l1bits, storeUnprefireableBit=False)
 
 nanoSequenceCommon = cms.Sequence(
-        nanoMetadata + jetSequence + muonSequence + tauSequence + electronSequence+photonSequence+vertexSequence+
+        nanoMetadata + jetSequence + muonSequence + tauSequence + boostedTauSequence + electronSequence+photonSequence+vertexSequence+
         isoTrackSequence + jetLepSequence + # must be after all the leptons
         linkedObjects  +
-        jetTables + muonTables + tauTables + electronTables + photonTables +  globalTables +vertexTables+ metTables+simpleCleanerTable + isoTrackTables
+        jetTables + muonTables + tauTables + boostedTauTables + electronTables + photonTables +  globalTables +vertexTables+ metTables+simpleCleanerTable + isoTrackTables
         )
+#remove boosted tau from previous eras
+(run2_miniAOD_80XLegacy | run2_nanoAOD_92X | run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1 | run2_nanoAOD_106Xv1).toReplaceWith(nanoSequenceCommon, nanoSequenceCommon.copyAndExclude([boostedTauSequence, boostedTauTables]))
+
 nanoSequenceOnlyFullSim = cms.Sequence(triggerObjectTables + l1bits)
 nanoSequenceOnlyData = cms.Sequence(protonTables + lhcInfoTable)
 
 nanoSequence = cms.Sequence(nanoSequenceCommon + nanoSequenceOnlyData + nanoSequenceOnlyFullSim)
 
-nanoSequenceFS = cms.Sequence(genParticleSequence + genVertexTables + particleLevelSequence + nanoSequenceCommon + jetMC + muonMC + electronMC + photonMC + tauMC + metMC + ttbarCatMCProducers +  globalTablesMC + btagWeightTable + genWeightsTable + genVertexTable + genParticleTables + particleLevelTables + lheInfoTable  + ttbarCategoryTable )
+nanoSequenceFS = cms.Sequence(genParticleSequence + genVertexTables + particleLevelSequence + nanoSequenceCommon + jetMC + muonMC + electronMC + photonMC + tauMC + boostedTauMC + metMC + ttbarCatMCProducers +  globalTablesMC + btagWeightTable + genWeightsTable + genVertexTable + genParticleTables + particleLevelTables + lheInfoTable  + ttbarCategoryTable )
 
 (run2_nanoAOD_92X | run2_miniAOD_80XLegacy | run2_nanoAOD_94X2016 | run2_nanoAOD_94X2016 | \
     run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94XMiniAODv2 | \
     run2_nanoAOD_102Xv1).toReplaceWith(nanoSequenceFS, nanoSequenceFS.copyAndExclude([genVertexTable, genVertexT0Table]))
 
+#remove boosted tau from previous eras
+(run2_miniAOD_80XLegacy | run2_nanoAOD_92X | run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1 | run2_nanoAOD_106Xv1).toReplaceWith(nanoSequenceFS, nanoSequenceFS.copyAndExclude([boostedTauMC]))
+
 # GenVertex only stored in newer MiniAOD
 nanoSequenceMC = nanoSequenceFS.copy()
 nanoSequenceMC.insert(nanoSequenceFS.index(nanoSequenceCommon)+1,nanoSequenceOnlyFullSim)
@@ -141,6 +148,23 @@ def nanoAOD_addTauIds(process):
                                    process.rerunMvaIsolationSequence)
     return process
 
+def nanoAOD_addBoostedTauIds(process):
+    updatedBoostedTauName = "slimmedTausBoostedNewID"
+    boostedTauIdEmbedder = tauIdConfig.TauIDEmbedder(process, debug=False, 
+                                                     originalTauName = "slimmedTausBoosted",
+                                                     updatedTauName = updatedBoostedTauName,
+                                                     postfix="Boosted",
+                                                     toKeep = [ "2017v2", "dR0p32017v2", "newDM2017v2","againstEle2018",])
+    boostedTauIdEmbedder.runTauID()
+    process.boostedTauSequence.insert(process.boostedTauSequence.index(process.finalBoostedTaus),
+                                      process.rerunMvaIsolationSequenceBoosted)
+
+    process.boostedTauSequence.insert(process.boostedTauSequence.index(process.finalBoostedTaus),
+                                      getattr(process, updatedBoostedTauName))
+
+    return process
+
+
 from PhysicsTools.PatAlgos.tools.jetTools import updateJetCollection
 def nanoAOD_addDeepInfo(process,addDeepBTag,addDeepFlavour):
     _btagDiscriminators=[]
@@ -371,6 +395,7 @@ def nanoAOD_customizeCommon(process):
                                      addParticleNet=nanoAOD_addDeepInfoAK8_switch.nanoAOD_addParticleNet_switch,
                                      jecPayload=nanoAOD_addDeepInfoAK8_switch.jecPayload)
     (run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1 | run2_nanoAOD_106Xv1).toModify(process, lambda p : nanoAOD_addTauIds(p))
+    (~(run2_nanoAOD_94XMiniAODv1 | run2_nanoAOD_94X2016 | run2_nanoAOD_94XMiniAODv2 | run2_nanoAOD_102Xv1 | run2_nanoAOD_106Xv1)).toModify(process, lambda p : nanoAOD_addBoostedTauIds(p))
     return process
 
 def nanoAOD_customizeData(process):