diff --git a/RecoTauTag/RecoTau/interface/DeepTauBase.h b/RecoTauTag/RecoTau/interface/DeepTauBase.h index d717ec7159f68..11b79a85f6a3d 100644 --- a/RecoTauTag/RecoTau/interface/DeepTauBase.h +++ b/RecoTauTag/RecoTau/interface/DeepTauBase.h @@ -41,7 +41,7 @@ class DeepTauCache { public: using GraphPtr = std::shared_ptr; - DeepTauCache(const std::string& graph_name, const bool& mem_mapped); + DeepTauCache(const std::string& graph_name, bool mem_mapped); ~DeepTauCache(); // A Session allows concurrent calls to Run(), though a Session must @@ -49,7 +49,7 @@ class DeepTauCache { tensorflow::Session& getSession() const { return *session_; } const tensorflow::GraphDef& getGraph() const { return *graph_; } -protected: +private: GraphPtr graph_; tensorflow::Session* session_; std::unique_ptr memmappedEnv_; diff --git a/RecoTauTag/RecoTau/plugins/DPFIsolation.cc b/RecoTauTag/RecoTau/plugins/DPFIsolation.cc index 4caaa6c5dca44..df9c63b77f1fa 100644 --- a/RecoTauTag/RecoTau/plugins/DPFIsolation.cc +++ b/RecoTauTag/RecoTau/plugins/DPFIsolation.cc @@ -71,8 +71,14 @@ class DPFIsolation : public deep_tau::DeepTauBase { vtx_token(consumes(cfg.getParameter("vertices"))), graphVersion(cfg.getParameter("version")) { + const auto& shape = cache_->getGraph().node(0).attr().at("shape").shape(); + if(!(graphVersion == 1 || graphVersion == 0 )) throw cms::Exception("DPFIsolation") << "unknown version of the graph_ file."; + + if(!(shape.dim(1).size() == getNumberOfParticles(graphVersion) && shape.dim(2).size() == GetNumberOfFeatures(graphVersion))) + throw cms::Exception("DPFIsolation") << "number of inputs does not match the expected inputs for the given version"; + } private: diff --git a/RecoTauTag/RecoTau/plugins/DeepTauId.cc b/RecoTauTag/RecoTau/plugins/DeepTauId.cc index 987d3ceb717d4..3c44c4ea75cc3 100644 --- a/RecoTauTag/RecoTau/plugins/DeepTauId.cc +++ b/RecoTauTag/RecoTau/plugins/DeepTauId.cc @@ -277,6 +277,10 @@ class DeepTauId : public deep_tau::DeepTauBase { input_layer(cache_->getGraph().node(0).name()), output_layer(cache_->getGraph().node(cache_->getGraph().node_size() - 1).name()) { + const auto& shape = cache_->getGraph().node(0).attr().at("shape").shape(); + if(shape.dim(1).size() != dnn_inputs_2017v1::NumberOfInputs) + throw cms::Exception("DeepTauId") << "number of inputs does not match the expected inputs for the given version"; + } static std::unique_ptr initializeGlobalCache(const edm::ParameterSet& cfg) diff --git a/RecoTauTag/RecoTau/python/tools/runTauIdMVA.py b/RecoTauTag/RecoTau/python/tools/runTauIdMVA.py index bb3c9319b94ae..147c2c074c8f0 100644 --- a/RecoTauTag/RecoTau/python/tools/runTauIdMVA.py +++ b/RecoTauTag/RecoTau/python/tools/runTauIdMVA.py @@ -1,6 +1,7 @@ from RecoTauTag.RecoTau.TauDiscriminatorTools import noPrediscriminants from RecoTauTag.RecoTau.PATTauDiscriminationByMVAIsolationRun2_cff import patDiscriminationByIsolationMVArun2v1raw, patDiscriminationByIsolationMVArun2v1VLoose import os +import re class TauIDEmbedder(object): """class to rerun the tau seq and acces trainings from the database""" @@ -589,7 +590,7 @@ def runTauID(self): tauIDSources.byVTightIsolationMVArun2v1DBnewDMwLT2016 = self.cms.InputTag('rerunDiscriminationByIsolationNewDMMVArun2v1VTight') tauIDSources.byVVTightIsolationMVArun2v1DBnewDMwLT2016 = self.cms.InputTag('rerunDiscriminationByIsolationNewDMMVArun2v1VVTight') - if "deepTau2017v1" in self.toKeep or "deepTau2017v1Q" in self.toKeep: + if "deepTau2017v1" in self.toKeep: print "Adding DeepTau IDs" workingPoints_ = { @@ -625,38 +626,21 @@ def runTauID(self): "VVTight": 0.9859 } } + file_name = 'RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2017v1_20L1024N_quantized.pb' + self.process.deepTau2017v1 = self.cms.EDProducer("DeepTauId", + electrons = self.cms.InputTag('slimmedElectrons'), + muons = self.cms.InputTag('slimmedMuons'), + taus = self.cms.InputTag('slimmedTaus'), + graph_file = self.cms.string(file_name), + mem_mapped = self.cms.bool(False) + ) - if "deepTau2017v1" in self.toKeep: - self.process.deepTau2017v1 = self.cms.EDProducer("DeepTauId", - electrons = self.cms.InputTag('slimmedElectrons'), - muons = self.cms.InputTag('slimmedMuons'), - taus = self.cms.InputTag('slimmedTaus'), - graph_file = self.cms.string('RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2017v1_20L1024N.pb'), - mem_mapped = self.cms.bool(False) - ) - - self.processDeepProducer('deepTau2017v1', tauIDSources, workingPoints_) - - self.process.rerunMvaIsolationTask.add(self.process.deepTau2017v1) - self.process.rerunMvaIsolationSequence += self.process.deepTau2017v1 - - if "deepTau2017v1Q" in self.toKeep: - self.process.deepTau2017v1Q = self.cms.EDProducer("DeepTauId", - electrons = self.cms.InputTag('slimmedElectrons'), - muons = self.cms.InputTag('slimmedMuons'), - taus = self.cms.InputTag('slimmedTaus'), - graph_file = self.cms.string('RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2017v1_20L1024N_quantized.pb'), - mem_mapped = self.cms.bool(False) - ) - - self.processDeepProducer('deepTau2017v1Q', tauIDSources, workingPoints_) - - self.process.rerunMvaIsolationTask.add(self.process.deepTau2017v1Q) - self.process.rerunMvaIsolationSequence += self.process.deepTau2017v1Q - + self.processDeepProducer('deepTau2017v1', tauIDSources, workingPoints_) + self.process.rerunMvaIsolationTask.add(self.process.deepTau2017v1) + self.process.rerunMvaIsolationSequence += self.process.deepTau2017v1 - if "DPFTau_2016_v0" in self.toKeep or "DPFTau_2016_v0Q" in self.toKeep: + if "DPFTau_2016_v0" in self.toKeep: print "Adding DPFTau isolation (v0)" workingPoints_ = { @@ -673,38 +657,23 @@ def runTauID(self): # (decayMode == 10) * (0.873958 - 0.0002328 * pt) " } } + file_name = 'RecoTauTag/TrainingFiles/data/DPFTauId/DPFIsolation_2017v0_quantized.pb' + self.process.dpfTau2016v0 = self.cms.EDProducer("DPFIsolation", + pfcands = self.cms.InputTag('packedPFCandidates'), + taus = self.cms.InputTag('slimmedTaus'), + vertices = self.cms.InputTag('offlineSlimmedPrimaryVertices'), + graph_file = self.cms.string(file_name), + version = self.cms.uint32(self.getDpfTauVersion(file_name)), + mem_mapped = self.cms.bool(False) + ) - if "DPFTau_2016_v0" in self.toKeep: - self.process.dpfTau2016v0 = self.cms.EDProducer("DPFIsolation", - pfcands = self.cms.InputTag('packedPFCandidates'), - taus = self.cms.InputTag('slimmedTaus'), - vertices = self.cms.InputTag('offlineSlimmedPrimaryVertices'), - graph_file = self.cms.string('RecoTauTag/TrainingFiles/data/DPFTauId/DPFIsolation_2017v0.pb'), - version = self.cms.uint32(0), - mem_mapped = self.cms.bool(False) - ) - - self.processDeepProducer('dpfTau2016v0', tauIDSources, workingPoints_) - - self.process.rerunMvaIsolationTask.add(self.process.dpfTau2016v0) - self.process.rerunMvaIsolationSequence += self.process.dpfTau2016v0 - - if "DPFTau_2016_v0Q" in self.toKeep: - self.process.dpfTau2016v0Q = self.cms.EDProducer("DPFIsolation", - pfcands = self.cms.InputTag('packedPFCandidates'), - taus = self.cms.InputTag('slimmedTaus'), - vertices = self.cms.InputTag('offlineSlimmedPrimaryVertices'), - graph_file = self.cms.string('RecoTauTag/TrainingFiles/data/DPFTauId/DPFIsolation_2017v0_quantized.pb'), - version = self.cms.uint32(0), - mem_mapped = self.cms.bool(False) - ) + self.processDeepProducer('dpfTau2016v0', tauIDSources, workingPoints_) - self.processDeepProducer('dpfTau2016v0Q', tauIDSources, workingPoints_) + self.process.rerunMvaIsolationTask.add(self.process.dpfTau2016v0) + self.process.rerunMvaIsolationSequence += self.process.dpfTau2016v0 - self.process.rerunMvaIsolationTask.add(self.process.dpfTau2016v0Q) - self.process.rerunMvaIsolationSequence += self.process.dpfTau2016v0Q - if "DPFTau_2016_v1" in self.toKeep or "DPFTau_2016_v1Q" in self.toKeep: + if "DPFTau_2016_v1" in self.toKeep: print "Adding DPFTau isolation (v1)" print "WARNING: WPs are not defined for DPFTau_2016_v1" print "WARNING: The score of DPFTau_2016_v1 is inverted: i.e. for Sig->0, for Bkg->1 with -1 for undefined input (preselection not passed)." @@ -713,35 +682,20 @@ def runTauID(self): "all": {"Tight" : 0.123} #FIXME: define WP } - if "DPFTau_2016_v1" in self.toKeep: - self.process.dpfTau2016v1 = self.cms.EDProducer("DPFIsolation", - pfcands = self.cms.InputTag('packedPFCandidates'), - taus = self.cms.InputTag('slimmedTaus'), - vertices = self.cms.InputTag('offlineSlimmedPrimaryVertices'), - graph_file = self.cms.string('RecoTauTag/TrainingFiles/data/DPFTauId/DPFIsolation_2017v1.pb'), - version = self.cms.uint32(1), - mem_mapped = self.cms.bool(False) - ) - - self.processDeepProducer('dpfTau2016v1', tauIDSources, workingPoints_) - - self.process.rerunMvaIsolationTask.add(self.process.dpfTau2016v1) - self.process.rerunMvaIsolationSequence += self.process.dpfTau2016v1 - - if "DPFTau_2016_v1Q" in self.toKeep: - self.process.dpfTau2016v1Q = self.cms.EDProducer("DPFIsolation", - pfcands = self.cms.InputTag('packedPFCandidates'), - taus = self.cms.InputTag('slimmedTaus'), - vertices = self.cms.InputTag('offlineSlimmedPrimaryVertices'), - graph_file = self.cms.string('RecoTauTag/TrainingFiles/data/DPFTauId/DPFIsolation_2017v1_quantized.pb'), - version = self.cms.uint32(1), - mem_mapped = self.cms.bool(False) - ) + file_name = 'RecoTauTag/TrainingFiles/data/DPFTauId/DPFIsolation_2017v1_quantized.pb' + self.process.dpfTau2016v1 = self.cms.EDProducer("DPFIsolation", + pfcands = self.cms.InputTag('packedPFCandidates'), + taus = self.cms.InputTag('slimmedTaus'), + vertices = self.cms.InputTag('offlineSlimmedPrimaryVertices'), + graph_file = self.cms.string(file_name), + version = self.cms.uint32(self.getDpfTauVersion(file_name)), + mem_mapped = self.cms.bool(False) + ) - self.processDeepProducer('dpfTau2016v1Q', tauIDSources, workingPoints_) + self.processDeepProducer('dpfTau2016v1', tauIDSources, workingPoints_) - self.process.rerunMvaIsolationTask.add(self.process.dpfTau2016v1Q) - self.process.rerunMvaIsolationSequence += self.process.dpfTau2016v1Q + self.process.rerunMvaIsolationTask.add(self.process.dpfTau2016v1) + self.process.rerunMvaIsolationSequence += self.process.dpfTau2016v1 print('Embedding new TauIDs into \"'+self.updatedTauName+'\"') embedID = self.cms.EDProducer("PATTauIDEmbedder", @@ -763,3 +717,14 @@ def processDeepProducer(self, producer_name, tauIDSources, workingPoints_): self.cms.InputTag(producer_name, 'VS{}{}'.format(target, point))) setattr(getattr(self.process, producer_name), 'VS{}WP'.format(target), cuts) + + + def getDpfTauVersion(self, file_name): + """returns the DNN version. File name should contain a version label with data takig year (2011-2, 2015-8) and \ + version number (vX), e.g. 2017v0, in general the following format: {year}v{version}""" + version_search = re.search('201[125678]v([0-9]+)[\._]', file_name) + if not version_search: + raise RuntimeError('File "{}" has an invalid name pattern, should be in the format "{year}v{version}". \ + Unable to extract version number.'.format(file_name)) + version = version_search.group(1) + return int(version) diff --git a/RecoTauTag/RecoTau/src/DeepTauBase.cc b/RecoTauTag/RecoTau/src/DeepTauBase.cc index 8972fefaefd59..f6f33877a3789 100644 --- a/RecoTauTag/RecoTau/src/DeepTauBase.cc +++ b/RecoTauTag/RecoTau/src/DeepTauBase.cc @@ -117,7 +117,7 @@ std::unique_ptr DeepTauBase::initializeGlobalCache(const edm::Para return std::make_unique(graph_name, mem_mapped); } -DeepTauCache::DeepTauCache(const std::string& graph_name, const bool& mem_mapped) +DeepTauCache::DeepTauCache(const std::string& graph_name, bool mem_mapped) { tensorflow::SessionOptions options; tensorflow::setThreading(options, 1, "no_threads"); diff --git a/RecoTauTag/RecoTau/test/runDeepTauIDsOnMiniAOD.py b/RecoTauTag/RecoTau/test/runDeepTauIDsOnMiniAOD.py index caca88f4cf0fb..8996a2b1251c7 100644 --- a/RecoTauTag/RecoTau/test/runDeepTauIDsOnMiniAOD.py +++ b/RecoTauTag/RecoTau/test/runDeepTauIDsOnMiniAOD.py @@ -33,11 +33,8 @@ updatedTauName = updatedTauName, toKeep = [ "2017v2", "dR0p32017v2", "newDM2017v2", "deepTau2017v1", - # "DPFTau_2016_v0", + "DPFTau_2016_v0", # "DPFTau_2016_v1", - # "deepTau2017v1Q", - "DPFTau_2016_v0Q", - # "DPFTau_2016_v1Q", ]) tauIdEmbedder.runTauID()