diff --git a/CommonTools/MVAUtils/BuildFile.xml b/CommonTools/MVAUtils/BuildFile.xml new file mode 100644 index 0000000000000..26b7b5c239e4e --- /dev/null +++ b/CommonTools/MVAUtils/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/CommonTools/MVAUtils/interface/GBRForestTools.h b/CommonTools/MVAUtils/interface/GBRForestTools.h new file mode 100644 index 0000000000000..9979113edd572 --- /dev/null +++ b/CommonTools/MVAUtils/interface/GBRForestTools.h @@ -0,0 +1,27 @@ +#ifndef CommonTools_MVAUtils_GBRForestTools_h +#define CommonTools_MVAUtils_GBRForestTools_h + +//-------------------------------------------------------------------------------------------------- +// +// GRBForestTools +// +// Utility to parse an XML weights files specifying an ensemble of decision trees into a GRBForest. +// +// Author: Jonas Rembser +//-------------------------------------------------------------------------------------------------- + + +#include "CondFormats/EgammaObjects/interface/GBRForest.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" + +#include + +// Create a GBRForest from an XML weight file +std::unique_ptr createGBRForest(const std::string &weightsFile); +std::unique_ptr createGBRForest(const edm::FileInPath &weightsFile); + +// Overloaded versions which are taking string vectors by reference to strore the variable names in +std::unique_ptr createGBRForest(const std::string &weightsFile, std::vector &varNames); +std::unique_ptr createGBRForest(const edm::FileInPath &weightsFile, std::vector &varNames); + +#endif diff --git a/CommonTools/MVAUtils/interface/TMVAEvaluator.h b/CommonTools/MVAUtils/interface/TMVAEvaluator.h new file mode 100644 index 0000000000000..3c141b69d3c1f --- /dev/null +++ b/CommonTools/MVAUtils/interface/TMVAEvaluator.h @@ -0,0 +1,50 @@ +#ifndef CommonTools_MVAUtils_TMVAEvaluator_h +#define CommonTools_MVAUtils_TMVAEvaluator_h + +#include +#include +#include +#include +#include + +#include "CondFormats/EgammaObjects/interface/GBRForest.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Utilities/interface/thread_safety_macros.h" +#include "TMVA/IMethod.h" +#include "TMVA/Reader.h" + +class TMVAEvaluator { + +public: + TMVAEvaluator(); + + void initialize(const std::string& options, const std::string& method, const std::string& weightFile, + const std::vector& variables, const std::vector& spectators, + bool useGBRForest = false, bool useAdaBoost = false); + + void initializeGBRForest(const GBRForest* gbrForest, const std::vector& variables, + const std::vector& spectators, bool useAdaBoost = false); + + void initializeGBRForest(const edm::EventSetup& iSetup, const std::string& label, + const std::vector& variables, const std::vector& spectators, + bool useAdaBoost = false); + + float evaluateTMVA(const std::map& inputs, bool useSpectators) const; + float evaluateGBRForest(const std::map& inputs) const; + float evaluate(const std::map& inputs, bool useSpectators = false) const; + +private: + bool mIsInitialized; + bool mUsingGBRForest; + bool mUseAdaBoost; + + std::string mMethod; + mutable std::mutex m_mutex; + CMS_THREAD_GUARD(m_mutex) std::unique_ptr mReader; + std::shared_ptr mGBRForest; + + CMS_THREAD_GUARD(m_mutex) mutable std::map> mVariables; + CMS_THREAD_GUARD(m_mutex) mutable std::map> mSpectators; +}; + +#endif // CommonTools_Utils_TMVAEvaluator_h diff --git a/CommonTools/Utils/interface/TMVAZipReader.h b/CommonTools/MVAUtils/interface/TMVAZipReader.h similarity index 68% rename from CommonTools/Utils/interface/TMVAZipReader.h rename to CommonTools/MVAUtils/interface/TMVAZipReader.h index 59b86406f0460..64aa6af65d9ce 100644 --- a/CommonTools/Utils/interface/TMVAZipReader.h +++ b/CommonTools/MVAUtils/interface/TMVAZipReader.h @@ -20,22 +20,21 @@ * ===================================================================================== */ -#ifndef TMVAZIPREADER_7RXIGO70 -#define TMVAZIPREADER_7RXIGO70 +#ifndef CommonTools_MVAUtils_TMVAZipReader_h +#define CommonTools_MVAUtils_TMVAZipReader_h -#include "TMVA/Reader.h" #include "TMVA/IMethod.h" +#include "TMVA/Reader.h" #include -namespace reco { - namespace details { - - bool hasEnding(std::string const &fullString, std::string const &ending); +namespace reco::details { + + bool hasEnding(std::string const& fullString, std::string const& ending); char* readGzipFile(const std::string& weightFile); - TMVA::IMethod* loadTMVAWeights(TMVA::Reader* reader, const std::string& method, - const std::string& weightFile, bool verbose=false); + TMVA::IMethod* loadTMVAWeights( + TMVA::Reader* reader, const std::string& method, const std::string& weightFile, bool verbose = false); +} -}} -#endif /* end of include guard: TMVAZIPREADER_7RXIGO70 */ +#endif diff --git a/CommonTools/MVAUtils/plugins/BuildFile.xml b/CommonTools/MVAUtils/plugins/BuildFile.xml new file mode 100644 index 0000000000000..2ec0bf10656e6 --- /dev/null +++ b/CommonTools/MVAUtils/plugins/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/CommonTools/MVAUtils/plugins/GBRForestWriter.cc b/CommonTools/MVAUtils/plugins/GBRForestWriter.cc new file mode 100644 index 0000000000000..02d72639295b3 --- /dev/null +++ b/CommonTools/MVAUtils/plugins/GBRForestWriter.cc @@ -0,0 +1,84 @@ +#include "CommonTools/MVAUtils/plugins/GBRForestWriter.h" + +#include "FWCore/Utilities/interface/Exception.h" + +#include "CondCore/DBOutputService/interface/PoolDBOutputService.h" +#include "FWCore/ServiceRegistry/interface/Service.h" + +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" + +#include + +GBRForestWriter::GBRForestWriter(const edm::ParameterSet& cfg) + : moduleLabel_(cfg.getParameter("@module_label")) +{ + edm::VParameterSet cfgJobs = cfg.getParameter("jobs"); + for (edm::VParameterSet::const_iterator cfgJob = cfgJobs.begin(); cfgJob != cfgJobs.end(); ++cfgJob) { + jobEntryType* job = new jobEntryType(*cfgJob); + jobs_.push_back(job); + } +} + +GBRForestWriter::~GBRForestWriter() +{ + for (std::vector::iterator it = jobs_.begin(); it != jobs_.end(); ++it) { + delete (*it); + } +} + +void GBRForestWriter::analyze(const edm::Event&, const edm::EventSetup&) +{ + + for (std::vector::iterator job = jobs_.begin(); job != jobs_.end(); ++job) { + std::map gbrForests; // key = name + for (std::vector::iterator category = (*job)->categories_.begin(); + category != (*job)->categories_.end(); ++category) { + const GBRForest* gbrForest = nullptr; + if ((*category)->inputFileType_ == categoryEntryType::kXML) { + gbrForest = createGBRForest((*category)->inputFileName_).release(); + } else if ((*category)->inputFileType_ == categoryEntryType::kGBRForest) { + TFile* inputFile = new TFile((*category)->inputFileName_.data()); + // gbrForest = dynamic_cast(inputFile->Get((*category)->gbrForestName_.data())); // CV: + // dynamic_cast fails for some reason ?! + gbrForest = (GBRForest*)inputFile->Get((*category)->gbrForestName_.data()); + delete inputFile; + } + if (!gbrForest) + throw cms::Exception("GBRForestWriter") + << " Failed to load GBRForest = " << (*category)->gbrForestName_.data() + << " from file = " << (*category)->inputFileName_ << " !!\n"; + gbrForests[(*category)->gbrForestName_] = gbrForest; + } + if ((*job)->outputFileType_ == jobEntryType::kGBRForest) { + TFile* outputFile = new TFile((*job)->outputFileName_.data(), "RECREATE"); + + for (std::map::iterator gbrForest = gbrForests.begin(); + gbrForest != gbrForests.end(); ++gbrForest) { + outputFile->WriteObject(gbrForest->second, gbrForest->first.data()); + } + delete outputFile; + } else if ((*job)->outputFileType_ == jobEntryType::kSQLLite) { + edm::Service dbService; + if (!dbService.isAvailable()) + throw cms::Exception("GBRForestWriter") << " Failed to access PoolDBOutputService !!\n"; + + for (std::map::iterator gbrForest = gbrForests.begin(); + gbrForest != gbrForests.end(); ++gbrForest) { + std::string outputRecord = (*job)->outputRecord_; + if (gbrForests.size() > 1) + outputRecord.append("_").append(gbrForest->first); + dbService->writeOne(gbrForest->second, dbService->beginOfTime(), outputRecord); + } + } + + // gbrforest deletion + for (std::map::iterator gbrForest = gbrForests.begin(); + gbrForest != gbrForests.end(); ++gbrForest) { + delete gbrForest->second; + } + } +} + +#include "FWCore/Framework/interface/MakerMacros.h" + +DEFINE_FWK_MODULE(GBRForestWriter); diff --git a/CommonTools/MVAUtils/plugins/GBRForestWriter.h b/CommonTools/MVAUtils/plugins/GBRForestWriter.h new file mode 100644 index 0000000000000..38111f30fe390 --- /dev/null +++ b/CommonTools/MVAUtils/plugins/GBRForestWriter.h @@ -0,0 +1,117 @@ +#ifndef CommonTools_MVAUtils_GBRForestWriter_h +#define CommonTools_MVAUtils_GBRForestWriter_h + +/** \class GBRForestWriter + * + * Read GBRForest objects from ROOT file input + * and store it in SQL-lite output file + * + * \authors Christian Veelken, LLR + * + */ + +#include "FWCore/Framework/interface/EDAnalyzer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include +#include + +class GBRForestWriter : public edm::EDAnalyzer { +public: + GBRForestWriter(const edm::ParameterSet&); + ~GBRForestWriter() override; + +private: + void analyze(const edm::Event&, const edm::EventSetup&) override; + + std::string moduleLabel_; + + bool hasRun_; + + typedef std::vector vstring; + + struct categoryEntryType { + categoryEntryType(const edm::ParameterSet& cfg) + { + if (cfg.existsAs("inputFileName")) { + edm::FileInPath inputFileName_fip = cfg.getParameter("inputFileName"); + inputFileName_ = inputFileName_fip.fullPath(); + } else if (cfg.existsAs("inputFileName")) { + inputFileName_ = cfg.getParameter("inputFileName"); + } else + throw cms::Exception("GBRForestWriter") << " Undefined Configuration Parameter 'inputFileName !!\n"; + std::string inputFileType_string = cfg.getParameter("inputFileType"); + if (inputFileType_string == "XML") + inputFileType_ = kXML; + else if (inputFileType_string == "GBRForest") + inputFileType_ = kGBRForest; + else + throw cms::Exception("GBRForestWriter") + << " Invalid Configuration Parameter 'inputFileType' = " << inputFileType_string << " !!\n"; + if (inputFileType_ == kXML) { + inputVariables_ = cfg.getParameter("inputVariables"); + spectatorVariables_ = cfg.getParameter("spectatorVariables"); + methodName_ = cfg.getParameter("methodName"); + gbrForestName_ + = (cfg.existsAs("gbrForestName") ? cfg.getParameter("gbrForestName") + : methodName_); + } else { + gbrForestName_ = cfg.getParameter("gbrForestName"); + } + } + ~categoryEntryType() {} + std::string inputFileName_; + enum { kXML, kGBRForest }; + int inputFileType_; + vstring inputVariables_; + vstring spectatorVariables_; + std::string gbrForestName_; + std::string methodName_; + }; + struct jobEntryType { + jobEntryType(const edm::ParameterSet& cfg) + { + if (cfg.exists("categories")) { + edm::VParameterSet cfgCategories = cfg.getParameter("categories"); + for (edm::VParameterSet::const_iterator cfgCategory = cfgCategories.begin(); + cfgCategory != cfgCategories.end(); ++cfgCategory) { + categoryEntryType* category = new categoryEntryType(*cfgCategory); + categories_.push_back(category); + } + } else { + categoryEntryType* category = new categoryEntryType(cfg); + categories_.push_back(category); + } + std::string outputFileType_string = cfg.getParameter("outputFileType"); + if (outputFileType_string == "GBRForest") + outputFileType_ = kGBRForest; + else if (outputFileType_string == "SQLLite") + outputFileType_ = kSQLLite; + else + throw cms::Exception("GBRForestWriter") + << " Invalid Configuration Parameter 'outputFileType' = " << outputFileType_string << " !!\n"; + if (outputFileType_ == kGBRForest) { + outputFileName_ = cfg.getParameter("outputFileName"); + } + if (outputFileType_ == kSQLLite) { + outputRecord_ = cfg.getParameter("outputRecord"); + } + } + ~jobEntryType() + { + for (std::vector::iterator it = categories_.begin(); it != categories_.end(); ++it) { + delete (*it); + } + } + std::vector categories_; + enum { kGBRForest, kSQLLite }; + int outputFileType_; + std::string outputFileName_; + std::string outputRecord_; + }; + std::vector jobs_; +}; + +#endif diff --git a/CommonTools/MVAUtils/src/GBRForestTools.cc b/CommonTools/MVAUtils/src/GBRForestTools.cc new file mode 100644 index 0000000000000..63bfe6b3a0cb1 --- /dev/null +++ b/CommonTools/MVAUtils/src/GBRForestTools.cc @@ -0,0 +1,304 @@ +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" +#include "CommonTools/MVAUtils/interface/TMVAZipReader.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "FWCore/Utilities/interface/Exception.h" + +#include +#include +#include +#include +#include + +namespace { + + size_t readVariables(tinyxml2::XMLElement* root, const char * key, std::vector& names) + { + size_t n = 0; + names.clear(); + + if (root != nullptr) { + for(tinyxml2::XMLElement* e = root->FirstChildElement(key); + e != nullptr; e = e->NextSiblingElement(key)) + { + names.push_back(e->Attribute("Expression")); + ++n; + } + } + + return n; + } + + bool isTerminal(tinyxml2::XMLElement* node) + { + bool is = true; + for(tinyxml2::XMLElement* e = node->FirstChildElement("Node"); + e != nullptr; e = e->NextSiblingElement("Node")) { + is = false; + } + return is; + } + + unsigned int countIntermediateNodes(tinyxml2::XMLElement* node) + { + + unsigned int count = 0; + for(tinyxml2::XMLElement* e = node->FirstChildElement("Node"); + e != nullptr; e = e->NextSiblingElement("Node")) { + count += countIntermediateNodes(e); + } + return count > 0 ? count + 1 : 0; + + } + + unsigned int countTerminalNodes(tinyxml2::XMLElement* node) + { + + unsigned int count = 0; + for(tinyxml2::XMLElement* e = node->FirstChildElement("Node"); + e != nullptr; e = e->NextSiblingElement("Node")) { + count += countTerminalNodes(e); + } + return count > 0 ? count : 1; + + } + + void addNode(GBRTree& tree, tinyxml2::XMLElement* node, + double scale, bool isRegression, bool useYesNoLeaf, + bool adjustboundary, bool isAdaClassifier) + { + + bool nodeIsTerminal = isTerminal(node); + if (nodeIsTerminal) { + double response = 0.; + if (isRegression) { + node->QueryDoubleAttribute("res", &response); + } + else { + if (useYesNoLeaf) { + node->QueryDoubleAttribute("nType", &response); + } + else { + if (isAdaClassifier) { + node->QueryDoubleAttribute("purity", &response); + } else { + node->QueryDoubleAttribute("res", &response); + } + } + } + response *= scale; + tree.Responses().push_back(response); + } + else { + + int thisidx = tree.CutIndices().size(); + + int selector; + float cutval; + bool ctype; + + node->QueryIntAttribute("IVar", &selector); + node->QueryFloatAttribute("Cut", &cutval); + node->QueryBoolAttribute("cType", &ctype); + + tree.CutIndices().push_back(static_cast(selector)); + + //newer tmva versions use >= instead of > in decision tree splits, so adjust cut value + //to reproduce the correct behaviour + if (adjustboundary) { + cutval = std::nextafter(cutval,std::numeric_limits::lowest()); + } + tree.CutVals().push_back(cutval); + tree.LeftIndices().push_back(0); + tree.RightIndices().push_back(0); + + tinyxml2::XMLElement* left = nullptr; + tinyxml2::XMLElement* right = nullptr; + for(tinyxml2::XMLElement* e = node->FirstChildElement("Node"); + e != nullptr; e = e->NextSiblingElement("Node")) { + if (*(e->Attribute("pos")) == 'l') left = e; + else if (*(e->Attribute("pos")) == 'r') right = e; + } + if (!ctype) { + std::swap(left, right); + } + + tree.LeftIndices()[thisidx] = isTerminal(left) ? -tree.Responses().size() : tree.CutIndices().size() ; + addNode(tree, left, scale, isRegression, useYesNoLeaf, adjustboundary,isAdaClassifier); + + tree.RightIndices()[thisidx] = isTerminal(right) ? -tree.Responses().size() : tree.CutIndices().size() ; + addNode(tree, right, scale, isRegression, useYesNoLeaf, adjustboundary,isAdaClassifier); + + } + + } + + std::unique_ptr init(const std::string& weightsFileFullPath, + std::vector& varNames) + { + + // + // Load weights file, for gzipped or raw xml file + // + tinyxml2::XMLDocument xmlDoc; + + using namespace reco::details; + + if (hasEnding(weightsFileFullPath, ".xml")) { + xmlDoc.LoadFile(weightsFileFullPath.c_str()); + } else if (hasEnding(weightsFileFullPath, ".gz") || + hasEnding(weightsFileFullPath, ".gzip")) { + char * buffer = readGzipFile(weightsFileFullPath); + xmlDoc.Parse(buffer); + free(buffer); + } + + tinyxml2::XMLElement* root = xmlDoc.FirstChildElement("MethodSetup"); + readVariables(root->FirstChildElement("Variables"), "Variable", varNames); + + // Read in the TMVA general info + std::map info; + tinyxml2::XMLElement* infoElem = xmlDoc.FirstChildElement("MethodSetup")->FirstChildElement("GeneralInfo"); + if (infoElem == nullptr) { + throw cms::Exception("XMLError") + << "No GeneralInfo found in " << weightsFileFullPath << " !!\n"; + } + for(tinyxml2::XMLElement* e = infoElem->FirstChildElement("Info"); + e != nullptr; e = e->NextSiblingElement("Info")) + { + const char * name; + const char * value; + e->QueryStringAttribute("name", &name); + e->QueryStringAttribute("value", &value); + info[name] = value; + } + + // Read in the TMVA options + std::map options; + tinyxml2::XMLElement* optionsElem = xmlDoc.FirstChildElement("MethodSetup")->FirstChildElement("Options"); + if (optionsElem == nullptr) { + throw cms::Exception("XMLError") + << "No Options found in " << weightsFileFullPath << " !!\n"; + } + for(tinyxml2::XMLElement* e = optionsElem->FirstChildElement("Option"); + e != nullptr; e = e->NextSiblingElement("Option")) + { + const char * name; + e->QueryStringAttribute("name", &name); + options[name] = e->GetText(); + } + + // Get root version number if available + int rootTrainingVersion(0); + if (info.find("ROOT Release") != info.end()) { + std::string s = info["ROOT Release"]; + rootTrainingVersion = std::stoi(s.substr(s.find("[")+1,s.find("]")-s.find("[")-1)); + } + + // Get the boosting weights + std::vector boostWeights; + tinyxml2::XMLElement* weightsElem = xmlDoc.FirstChildElement("MethodSetup")->FirstChildElement("Weights"); + if (weightsElem == nullptr) { + throw cms::Exception("XMLError") + << "No Weights found in " << weightsFileFullPath << " !!\n"; + } + bool hasTrees = false; + for(tinyxml2::XMLElement* e = weightsElem->FirstChildElement("BinaryTree"); + e != nullptr; e = e->NextSiblingElement("BinaryTree")) + { + hasTrees = true; + double w; + e->QueryDoubleAttribute("boostWeight", &w); + boostWeights.push_back(w); + } + if (!hasTrees) { + throw cms::Exception("XMLError") + << "No BinaryTrees found in " << weightsFileFullPath << " !!\n"; + } + + bool isRegression = info["AnalysisType"] == "Regression"; + + //special handling for non-gradient-boosted (ie ADABoost) classifiers, where tree responses + //need to be renormalized after the training for evaluation purposes + bool isAdaClassifier = !isRegression && options["BoostType"] != "Grad"; + bool useYesNoLeaf = isAdaClassifier && options["UseYesNoLeaf"] == "True"; + + //newer tmva versions use >= instead of > in decision tree splits, so adjust cut value + //to reproduce the correct behaviour + bool adjustBoundaries = (rootTrainingVersion>=ROOT_VERSION(5,34,20) && + rootTrainingVersion=ROOT_VERSION(6,2,0); + + auto forest = std::make_unique(); + forest->SetInitialResponse(isRegression ? boostWeights[0] : 0.); + + double norm = 0; + if (isAdaClassifier) { + for (double w : boostWeights) { + norm += w; + } + } + + forest->Trees().reserve(boostWeights.size()); + size_t itree = 0; + // Loop over tree estimators + for(tinyxml2::XMLElement* e = weightsElem->FirstChildElement("BinaryTree"); + e != nullptr; e = e->NextSiblingElement("BinaryTree")) { + double scale = isAdaClassifier ? boostWeights[itree]/norm : 1.0; + + tinyxml2::XMLElement* root = e->FirstChildElement("Node"); + forest->Trees().push_back(GBRTree(countIntermediateNodes(root), countTerminalNodes(root))); + auto & tree = forest->Trees().back(); + + addNode(tree, root, scale, isRegression, useYesNoLeaf, adjustBoundaries, isAdaClassifier); + + //special case, root node is terminal, create fake intermediate node at root + if (tree.CutIndices().empty()) { + tree.CutIndices().push_back(0); + tree.CutVals().push_back(0); + tree.LeftIndices().push_back(0); + tree.RightIndices().push_back(0); + } + + ++itree; + } + + return forest; + } + +} + +// Create a GBRForest from an XML weight file +std::unique_ptr +createGBRForest(const std::string &weightsFile) +{ + std::vector varNames; + return createGBRForest(weightsFile, varNames); +} + +std::unique_ptr +createGBRForest(const edm::FileInPath &weightsFile) +{ + std::vector varNames; + return createGBRForest(weightsFile.fullPath(), varNames); +} + +// Overloaded versions which are taking string vectors by reference to store the variable names in +std::unique_ptr +createGBRForest(const std::string &weightsFile, std::vector &varNames) +{ + std::unique_ptr gbrForest; + + if(weightsFile[0] == '/') { + gbrForest = init(weightsFile, varNames); + } + else { + edm::FileInPath weightsFileEdm(weightsFile); + gbrForest = init( weightsFileEdm.fullPath(), varNames); + } + return gbrForest; +} + +std::unique_ptr +createGBRForest(const edm::FileInPath &weightsFile, std::vector &varNames) +{ + return createGBRForest(weightsFile.fullPath(), varNames); +} diff --git a/CommonTools/MVAUtils/src/TMVAEvaluator.cc b/CommonTools/MVAUtils/src/TMVAEvaluator.cc new file mode 100644 index 0000000000000..62d8902876dd4 --- /dev/null +++ b/CommonTools/MVAUtils/src/TMVAEvaluator.cc @@ -0,0 +1,175 @@ +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" +#include "CommonTools/MVAUtils/interface/TMVAEvaluator.h" +#include "CommonTools/MVAUtils/interface/TMVAZipReader.h" + +#include "CondFormats/DataRecord/interface/GBRWrapperRcd.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" + +TMVAEvaluator::TMVAEvaluator() + : mIsInitialized(false) + , mUsingGBRForest(false) + , mUseAdaBoost(false) +{ +} + +void TMVAEvaluator::initialize(const std::string& options, const std::string& method, const std::string& weightFile, + const std::vector& variables, const std::vector& spectators, bool useGBRForest, + bool useAdaBoost) +{ + // initialize the TMVA reader + mReader.reset(new TMVA::Reader(options.c_str())); + mReader->SetVerbose(false); + mMethod = method; + + // add input variables + for (std::vector::const_iterator it = variables.begin(); it != variables.end(); ++it) { + mVariables.insert(std::make_pair(*it, std::make_pair(it - variables.begin(), 0.))); + mReader->AddVariable(it->c_str(), &(mVariables.at(*it).second)); + } + + // add spectator variables + for (std::vector::const_iterator it = spectators.begin(); it != spectators.end(); ++it) { + mSpectators.insert(std::make_pair(*it, std::make_pair(it - spectators.begin(), 0.))); + mReader->AddSpectator(it->c_str(), &(mSpectators.at(*it).second)); + } + + // load the TMVA weights + reco::details::loadTMVAWeights(mReader.get(), mMethod, weightFile); + + if (useGBRForest) { + mGBRForest = createGBRForest(weightFile); + + // now can free some memory + mReader.reset(nullptr); + + mUsingGBRForest = true; + mUseAdaBoost = useAdaBoost; + } + + mIsInitialized = true; +} + +void TMVAEvaluator::initializeGBRForest(const GBRForest* gbrForest, const std::vector& variables, + const std::vector& spectators, bool useAdaBoost) +{ + // add input variables + for (std::vector::const_iterator it = variables.begin(); it != variables.end(); ++it) + mVariables.insert(std::make_pair(*it, std::make_pair(it - variables.begin(), 0.))); + + // add spectator variables + for (std::vector::const_iterator it = spectators.begin(); it != spectators.end(); ++it) + mSpectators.insert(std::make_pair(*it, std::make_pair(it - spectators.begin(), 0.))); + + // do not take ownership if getting GBRForest from an external source + mGBRForest = std::shared_ptr(gbrForest, [](const GBRForest*) {}); + + mIsInitialized = true; + mUsingGBRForest = true; + mUseAdaBoost = useAdaBoost; +} + +void TMVAEvaluator::initializeGBRForest(const edm::EventSetup& iSetup, const std::string& label, + const std::vector& variables, const std::vector& spectators, bool useAdaBoost) +{ + edm::ESHandle gbrForestHandle; + + iSetup.get().get(label.c_str(), gbrForestHandle); + + initializeGBRForest(gbrForestHandle.product(), variables, spectators, useAdaBoost); +} + +float TMVAEvaluator::evaluateTMVA(const std::map& inputs, bool useSpectators) const +{ + // default value + float value = -99.; + + // TMVA::Reader is not thread safe + std::lock_guard lock(m_mutex); + + // set the input variable values + for (auto it = mVariables.begin(); it != mVariables.end(); ++it) { + if (inputs.count(it->first) > 0) + it->second.second = inputs.at(it->first); + else + edm::LogError("MissingInputVariable") + << "Input variable " << it->first + << " is missing from the list of inputs. The returned discriminator value might not be sensible."; + } + + // if using spectator variables + if (useSpectators) { + // set the spectator variable values + for (auto it = mSpectators.begin(); it != mSpectators.end(); ++it) { + if (inputs.count(it->first) > 0) + it->second.second = inputs.at(it->first); + else + edm::LogError("MissingSpectatorVariable") + << "Spectator variable " << it->first + << " is missing from the list of inputs. The returned discriminator value might not be sensible."; + } + } + + // evaluate the MVA + value = mReader->EvaluateMVA(mMethod.c_str()); + + return value; +} + +float TMVAEvaluator::evaluateGBRForest(const std::map& inputs) const +{ + // default value + float value = -99.; + + std::unique_ptr vars(new float[mVariables.size()]); // allocate n floats + + // set the input variable values + for (auto it = mVariables.begin(); it != mVariables.end(); ++it) { + if (inputs.count(it->first) > 0) + vars[it->second.first] = inputs.at(it->first); + else + edm::LogError("MissingInputVariable") + << "Input variable " << it->first + << " is missing from the list of inputs. The returned discriminator value might not be sensible."; + } + + // evaluate the MVA + if (mUseAdaBoost) + value = mGBRForest->GetAdaBoostClassifier(vars.get()); + else + value = mGBRForest->GetGradBoostClassifier(vars.get()); + + return value; +} + +float TMVAEvaluator::evaluate(const std::map& inputs, bool useSpectators) const +{ + // default value + float value = -99.; + + if (!mIsInitialized) { + edm::LogError("InitializationError") << "TMVAEvaluator not properly initialized."; + return value; + } + + if (useSpectators && inputs.size() < (mVariables.size() + mSpectators.size())) { + edm::LogError("MissingInputs") << "Too few inputs provided (" << inputs.size() << " provided but " + << mVariables.size() << " input and " << mSpectators.size() + << " spectator variables expected)."; + return value; + } else if (inputs.size() < mVariables.size()) { + edm::LogError("MissingInputVariable(s)") << "Too few input variables provided (" << inputs.size() + << " provided but " << mVariables.size() << " expected)."; + return value; + } + + if (mUsingGBRForest) { + if (useSpectators) + edm::LogWarning("UnsupportedFunctionality") + << "Use of spectator variables with GBRForest is not supported. Spectator variables will be ignored."; + value = evaluateGBRForest(inputs); + } else + value = evaluateTMVA(inputs, useSpectators); + + return value; +} diff --git a/CommonTools/MVAUtils/src/TMVAZipReader.cc b/CommonTools/MVAUtils/src/TMVAZipReader.cc new file mode 100644 index 0000000000000..0188978a732a9 --- /dev/null +++ b/CommonTools/MVAUtils/src/TMVAZipReader.cc @@ -0,0 +1,112 @@ +#include "CommonTools/MVAUtils/interface/TMVAZipReader.h" +#include "FWCore/Utilities/interface/Exception.h" + +#include +#include +#include + +using namespace std; + +// From http://stackoverflow.com/questions/874134/find-if-string-endswith-another-string-in-c +bool reco::details::hasEnding(std::string const& fullString, std::string const& ending) +{ + if (fullString.length() >= ending.length()) { + return (0 == fullString.compare(fullString.length() - ending.length(), ending.length(), ending)); + } else { + return false; + } +} + +char* reco::details::readGzipFile(const std::string& weightFile) +{ + FILE* f = fopen(weightFile.c_str(), "r"); + if (f == nullptr) { + throw cms::Exception("InvalidFileState") << "Failed to open MVA file = " << weightFile << " !!\n"; + } + int magic; + int size; + fread(&magic, 4, 1, f); + fseek(f, -4, SEEK_END); + fread(&size, 4, 1, f); + fclose(f); + // printf("%x, %i\n", magic, size); + + gzFile file = gzopen(weightFile.c_str(), "r"); + + int bytes_read; + char* buffer = (char*)malloc(size); + bytes_read = gzread(file, buffer, size - 1); + buffer[bytes_read] = '\0'; + if (!gzeof(file)) { + int err; + const char* error_string; + error_string = gzerror(file, &err); + if (err) { + free(buffer); + throw cms::Exception("InvalidFileState") << "Error while reading gzipped file = " + << weightFile << " !!\n" << error_string; + } + } + gzclose(file); + return buffer; +} + +TMVA::IMethod* reco::details::loadTMVAWeights( + TMVA::Reader* reader, const std::string& method, const std::string& weightFile, bool verbose) +{ + + TMVA::IMethod* ptr = nullptr; + + verbose = false; + if (verbose) + std::cout << "Booking TMVA Reader with " << method << " and weight file: " << weightFile << std::endl; + + if (reco::details::hasEnding(weightFile, ".xml")) { + if (verbose) + std::cout << "Weight file is pure xml." << std::endl; + // Let TMVA read the file + ptr = reader->BookMVA(method, weightFile); + } else if (reco::details::hasEnding(weightFile, ".gz") || reco::details::hasEnding(weightFile, ".gzip")) { + if (verbose) + std::cout << "Unzipping file." << std::endl; + char* c = readGzipFile(weightFile); + + // We can't use tmpnam, gcc emits a warning about security. + // This is also technically insecure in the same way, since we append + // a suffix and then open another file. + char tmpFilename[] = "/tmp/tmva.XXXXXX"; + int fdToUselessFile = mkstemp(tmpFilename); + std::string weight_file_name(tmpFilename); + weight_file_name += ".xml"; + FILE* theActualFile = fopen(weight_file_name.c_str(), "w"); + if (theActualFile != nullptr) { + // write xml + fputs(c, theActualFile); + fputs("\n", theActualFile); + fclose(theActualFile); + close(fdToUselessFile); + } else { + throw cms::Exception("CannotWriteFile") << "Error while writing file = " << weight_file_name << " !!\n"; + } + if (verbose) + std::cout << "Booking MvA" << std::endl; + ptr = reader->BookMVA(method, weight_file_name); + if (verbose) + std::cout << "Cleaning up" << std::endl; + remove(weight_file_name.c_str()); + remove(tmpFilename); + + // Someday this will work. + // reader->BookMVA(TMVA::Types::Instance().GetMethodType(TString(method)), c); + if (verbose) { + std::cout << "Reader booked" << std::endl; + } + free(c); + } else { + throw cms::Exception("BadTMVAWeightFilename") + << "I don't understand the extension on the filename: " << weightFile + << ", it should be .xml, .gz, or .gzip" << std::endl; + } + + return ptr; +} diff --git a/CommonTools/Utils/BuildFile.xml b/CommonTools/Utils/BuildFile.xml index c8c2be257353f..d03fe200870cb 100644 --- a/CommonTools/Utils/BuildFile.xml +++ b/CommonTools/Utils/BuildFile.xml @@ -1,10 +1,8 @@ - - diff --git a/CommonTools/Utils/interface/TMVAEvaluator.h b/CommonTools/Utils/interface/TMVAEvaluator.h deleted file mode 100644 index 9053a589acf2e..0000000000000 --- a/CommonTools/Utils/interface/TMVAEvaluator.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef CommonTools_Utils_TMVAEvaluator_h -#define CommonTools_Utils_TMVAEvaluator_h - -#include -#include -#include -#include -#include - -#include "TMVA/Reader.h" -#include "TMVA/IMethod.h" -#include "CondFormats/EgammaObjects/interface/GBRForest.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Utilities/interface/thread_safety_macros.h" - - -class TMVAEvaluator { - - public: - TMVAEvaluator(); - - void initialize(const std::string & options, const std::string & method, const std::string & weightFile, - const std::vector & variables, const std::vector & spectators, bool useGBRForest=false, bool useAdaBoost=false); - void initializeGBRForest(const GBRForest* gbrForest, const std::vector & variables, - const std::vector & spectators, bool useAdaBoost=false); - void initializeGBRForest(const edm::EventSetup &iSetup, const std::string & label, - const std::vector & variables, const std::vector & spectators, bool useAdaBoost=false); - float evaluateTMVA(const std::map & inputs, bool useSpectators) const; - float evaluateGBRForest(const std::map & inputs) const; - float evaluate(const std::map & inputs, bool useSpectators=false) const; - - private: - bool mIsInitialized; - bool mUsingGBRForest; - bool mUseAdaBoost; - - std::string mMethod; - mutable std::mutex m_mutex; - CMS_THREAD_GUARD(m_mutex) std::unique_ptr mReader; - std::shared_ptr mGBRForest; - - CMS_THREAD_GUARD(m_mutex) mutable std::map> mVariables; - CMS_THREAD_GUARD(m_mutex) mutable std::map> mSpectators; -}; - -#endif // CommonTools_Utils_TMVAEvaluator_h - diff --git a/CommonTools/Utils/plugins/BuildFile.xml b/CommonTools/Utils/plugins/BuildFile.xml index fa3b9e798347a..6df874fc25aaf 100644 --- a/CommonTools/Utils/plugins/BuildFile.xml +++ b/CommonTools/Utils/plugins/BuildFile.xml @@ -1,9 +1,4 @@ - - - - - diff --git a/CommonTools/Utils/plugins/GBRForestWriter.cc b/CommonTools/Utils/plugins/GBRForestWriter.cc deleted file mode 100644 index fae17115e81cd..0000000000000 --- a/CommonTools/Utils/plugins/GBRForestWriter.cc +++ /dev/null @@ -1,116 +0,0 @@ -#include "CommonTools/Utils/plugins/GBRForestWriter.h" - -#include "CommonTools/Utils/interface/TMVAZipReader.h" -#include "FWCore/Utilities/interface/Exception.h" - -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "CondCore/DBOutputService/interface/PoolDBOutputService.h" - -#include "CondFormats/EgammaObjects/interface/GBRForest.h" - -#include "TMVA/ClassifierFactory.h" -#include "TMVA/Event.h" -#include "TMVA/Factory.h" -#include "TMVA/MethodBase.h" -#include "TMVA/MethodBDT.h" -#include "TMVA/Reader.h" -#include "TMVA/Tools.h" - -#include - -GBRForestWriter::GBRForestWriter(const edm::ParameterSet& cfg) - : moduleLabel_(cfg.getParameter("@module_label")) -{ - edm::VParameterSet cfgJobs = cfg.getParameter("jobs"); - for ( edm::VParameterSet::const_iterator cfgJob = cfgJobs.begin(); - cfgJob != cfgJobs.end(); ++cfgJob ) { - jobEntryType* job = new jobEntryType(*cfgJob); - jobs_.push_back(job); - } -} - -GBRForestWriter::~GBRForestWriter() -{ - for ( std::vector::iterator it = jobs_.begin(); - it != jobs_.end(); ++it ) { - delete (*it); - } -} - -void GBRForestWriter::analyze(const edm::Event&, const edm::EventSetup&) -{ - - for ( std::vector::iterator job = jobs_.begin(); - job != jobs_.end(); ++job ) { - std::map gbrForests; // key = name - for ( std::vector::iterator category = (*job)->categories_.begin(); - category != (*job)->categories_.end(); ++category ) { - const GBRForest* gbrForest = nullptr; - if ( (*category)->inputFileType_ == categoryEntryType::kXML ) { - TMVA::Tools::Instance(); - TMVA::Reader* mvaReader = new TMVA::Reader("!V:!Silent"); - std::vector dummyVariables; - for ( vstring::const_iterator inputVariable = (*category)->inputVariables_.begin(); - inputVariable != (*category)->inputVariables_.end(); ++inputVariable ) { - dummyVariables.push_back(0.); - mvaReader->AddVariable(inputVariable->data(), &dummyVariables.back()); - } - for ( vstring::const_iterator spectatorVariable = (*category)->spectatorVariables_.begin(); - spectatorVariable != (*category)->spectatorVariables_.end(); ++spectatorVariable ) { - dummyVariables.push_back(0.); - mvaReader->AddSpectator(spectatorVariable->data(), &dummyVariables.back()); - } - reco::details::loadTMVAWeights(mvaReader, (*category)->methodName_, (*category)->inputFileName_); - TMVA::MethodBDT* bdt = dynamic_cast(mvaReader->FindMVA((*category)->methodName_.data())); - if ( !bdt ) - throw cms::Exception("GBRForestWriter") - << "Failed to load MVA = " << (*category)->methodName_.data() << " from file = " << (*category)->inputFileName_ << " !!\n"; - gbrForest = new GBRForest(bdt); - delete mvaReader; - TMVA::Tools::DestroyInstance(); - } else if ( (*category)->inputFileType_ == categoryEntryType::kGBRForest ) { - TFile* inputFile = new TFile((*category)->inputFileName_.data()); - //gbrForest = dynamic_cast(inputFile->Get((*category)->gbrForestName_.data())); // CV: dynamic_cast fails for some reason ?! - gbrForest = (GBRForest*)inputFile->Get((*category)->gbrForestName_.data()); - delete inputFile; - } - if ( !gbrForest ) - throw cms::Exception("GBRForestWriter") - << " Failed to load GBRForest = " << (*category)->gbrForestName_.data() << " from file = " << (*category)->inputFileName_ << " !!\n"; - gbrForests[(*category)->gbrForestName_] = gbrForest; - } - if ( (*job)->outputFileType_ == jobEntryType::kGBRForest ) { - TFile* outputFile = new TFile((*job)->outputFileName_.data(), "RECREATE"); - - for ( std::map::iterator gbrForest = gbrForests.begin(); - gbrForest != gbrForests.end(); ++gbrForest ) { - outputFile->WriteObject(gbrForest->second, gbrForest->first.data()); - } - delete outputFile; - } else if ( (*job)->outputFileType_ == jobEntryType::kSQLLite ) { - edm::Service dbService; - if ( !dbService.isAvailable() ) - throw cms::Exception("GBRForestWriter") - << " Failed to access PoolDBOutputService !!\n"; - - for ( std::map::iterator gbrForest = gbrForests.begin(); - gbrForest != gbrForests.end(); ++gbrForest ) { - std::string outputRecord = (*job)->outputRecord_; - if ( gbrForests.size() > 1 ) outputRecord.append("_").append(gbrForest->first); - dbService->writeOne(gbrForest->second, dbService->beginOfTime(), outputRecord); - } - } - - //gbrforest deletion - for ( std::map::iterator gbrForest = gbrForests.begin(); - gbrForest != gbrForests.end(); ++gbrForest ) { - delete gbrForest->second; - } - - } - -} - -#include "FWCore/Framework/interface/MakerMacros.h" - -DEFINE_FWK_MODULE(GBRForestWriter); diff --git a/CommonTools/Utils/plugins/GBRForestWriter.h b/CommonTools/Utils/plugins/GBRForestWriter.h deleted file mode 100644 index b1f1937d20c31..0000000000000 --- a/CommonTools/Utils/plugins/GBRForestWriter.h +++ /dev/null @@ -1,113 +0,0 @@ -#ifndef RecoMET_METPUSubtraction_GBRForestWriter_h -#define RecoMET_METPUSubtraction_GBRForestWriter_h - -/** \class GBRForestWriter - * - * Read GBRForest objects from ROOT file input - * and store it in SQL-lite output file - * - * \authors Christian Veelken, LLR - * - */ - -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/Framework/interface/EDAnalyzer.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" - -#include -#include - -class GBRForestWriter : public edm::EDAnalyzer -{ - public: - GBRForestWriter(const edm::ParameterSet&); - ~GBRForestWriter() override; - - private: - void analyze(const edm::Event&, const edm::EventSetup&) override; - - std::string moduleLabel_; - - bool hasRun_; - - typedef std::vector vstring; - - struct categoryEntryType - { - categoryEntryType(const edm::ParameterSet& cfg) - { - if ( cfg.existsAs("inputFileName") ) { - edm::FileInPath inputFileName_fip = cfg.getParameter("inputFileName"); - inputFileName_ = inputFileName_fip.fullPath(); - } else if ( cfg.existsAs("inputFileName") ) { - inputFileName_ = cfg.getParameter("inputFileName"); - } else throw cms::Exception("GBRForestWriter") - << " Undefined Configuration Parameter 'inputFileName !!\n"; - std::string inputFileType_string = cfg.getParameter("inputFileType"); - if ( inputFileType_string == "XML" ) inputFileType_ = kXML; - else if ( inputFileType_string == "GBRForest" ) inputFileType_ = kGBRForest; - else throw cms::Exception("GBRForestWriter") - << " Invalid Configuration Parameter 'inputFileType' = " << inputFileType_string << " !!\n"; - if ( inputFileType_ == kXML ) { - inputVariables_ = cfg.getParameter("inputVariables"); - spectatorVariables_ = cfg.getParameter("spectatorVariables"); - methodName_ = cfg.getParameter("methodName"); - gbrForestName_ = ( cfg.existsAs("gbrForestName") ? cfg.getParameter("gbrForestName") : methodName_ ); - } - else - gbrForestName_ = cfg.getParameter("gbrForestName"); - } - ~categoryEntryType() {} - std::string inputFileName_; - enum { kXML, kGBRForest }; - int inputFileType_; - vstring inputVariables_; - vstring spectatorVariables_; - std::string gbrForestName_; - std::string methodName_; - }; - struct jobEntryType - { - jobEntryType(const edm::ParameterSet& cfg) - { - if ( cfg.exists("categories") ) { - edm::VParameterSet cfgCategories = cfg.getParameter("categories"); - for ( edm::VParameterSet::const_iterator cfgCategory = cfgCategories.begin(); - cfgCategory != cfgCategories.end(); ++cfgCategory ) { - categoryEntryType* category = new categoryEntryType(*cfgCategory); - categories_.push_back(category); - } - } else { - categoryEntryType* category = new categoryEntryType(cfg); - categories_.push_back(category); - } - std::string outputFileType_string = cfg.getParameter("outputFileType"); - if ( outputFileType_string == "GBRForest" ) outputFileType_ = kGBRForest; - else if ( outputFileType_string == "SQLLite" ) outputFileType_ = kSQLLite; - else throw cms::Exception("GBRForestWriter") - << " Invalid Configuration Parameter 'outputFileType' = " << outputFileType_string << " !!\n"; - if ( outputFileType_ == kGBRForest ) { - outputFileName_ = cfg.getParameter("outputFileName"); - } - if ( outputFileType_ == kSQLLite ) { - outputRecord_ = cfg.getParameter("outputRecord"); - } - } - ~jobEntryType() - { - for ( std::vector::iterator it = categories_.begin(); - it != categories_.end(); ++it ) { - delete (*it); - } - } - std::vector categories_; - enum { kGBRForest, kSQLLite }; - int outputFileType_; - std::string outputFileName_; - std::string outputRecord_; - }; - std::vector jobs_; -}; - -#endif diff --git a/CommonTools/Utils/src/TMVAEvaluator.cc b/CommonTools/Utils/src/TMVAEvaluator.cc deleted file mode 100644 index a34398ddcd81d..0000000000000 --- a/CommonTools/Utils/src/TMVAEvaluator.cc +++ /dev/null @@ -1,180 +0,0 @@ -#include "CommonTools/Utils/interface/TMVAEvaluator.h" - -#include "CommonTools/Utils/interface/TMVAZipReader.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "CondFormats/DataRecord/interface/GBRWrapperRcd.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "TMVA/MethodBDT.h" - - -TMVAEvaluator::TMVAEvaluator() : - mIsInitialized(false), mUsingGBRForest(false), mUseAdaBoost(false) -{ -} - -void TMVAEvaluator::initialize(const std::string & options, const std::string & method, const std::string & weightFile, - const std::vector & variables, const std::vector & spectators, bool useGBRForest, bool useAdaBoost) -{ - // initialize the TMVA reader - mReader.reset(new TMVA::Reader(options.c_str())); - mReader->SetVerbose(false); - mMethod = method; - - // add input variables - for(std::vector::const_iterator it = variables.begin(); it!=variables.end(); ++it) - { - mVariables.insert( std::make_pair( *it, std::make_pair( it - variables.begin(), 0. ) ) ); - mReader->AddVariable(it->c_str(), &(mVariables.at(*it).second)); - } - - // add spectator variables - for(std::vector::const_iterator it = spectators.begin(); it!=spectators.end(); ++it) - { - mSpectators.insert( std::make_pair( *it, std::make_pair( it - spectators.begin(), 0. ) ) ); - mReader->AddSpectator(it->c_str(), &(mSpectators.at(*it).second)); - } - - // load the TMVA weights - reco::details::loadTMVAWeights(mReader.get(), mMethod, weightFile); - - if (useGBRForest) - { - mGBRForest.reset( new GBRForest( dynamic_cast( mReader->FindMVA(mMethod.c_str()) ) ) ); - - // now can free some memory - mReader.reset(nullptr); - - mUsingGBRForest = true; - mUseAdaBoost = useAdaBoost; - } - - mIsInitialized = true; -} - - -void TMVAEvaluator::initializeGBRForest(const GBRForest* gbrForest, const std::vector & variables, - const std::vector & spectators, bool useAdaBoost) -{ - // add input variables - for(std::vector::const_iterator it = variables.begin(); it!=variables.end(); ++it) - mVariables.insert( std::make_pair( *it, std::make_pair( it - variables.begin(), 0. ) ) ); - - // add spectator variables - for(std::vector::const_iterator it = spectators.begin(); it!=spectators.end(); ++it) - mSpectators.insert( std::make_pair( *it, std::make_pair( it - spectators.begin(), 0. ) ) ); - - // do not take ownership if getting GBRForest from an external source - mGBRForest = std::shared_ptr(gbrForest, [](const GBRForest*) {} ); - - mIsInitialized = true; - mUsingGBRForest = true; - mUseAdaBoost = useAdaBoost; -} - - -void TMVAEvaluator::initializeGBRForest(const edm::EventSetup &iSetup, const std::string & label, - const std::vector & variables, const std::vector & spectators, bool useAdaBoost) -{ - edm::ESHandle gbrForestHandle; - - iSetup.get().get(label.c_str(), gbrForestHandle); - - initializeGBRForest(gbrForestHandle.product(), variables, spectators, useAdaBoost); -} - - -float TMVAEvaluator::evaluateTMVA(const std::map & inputs, bool useSpectators) const -{ - // default value - float value = -99.; - - // TMVA::Reader is not thread safe - std::lock_guard lock(m_mutex); - - // set the input variable values - for(auto it = mVariables.begin(); it!=mVariables.end(); ++it) - { - if (inputs.count(it->first)>0) - it->second.second = inputs.at(it->first); - else - edm::LogError("MissingInputVariable") << "Input variable " << it->first << " is missing from the list of inputs. The returned discriminator value might not be sensible."; - } - - // if using spectator variables - if(useSpectators) - { - // set the spectator variable values - for(auto it = mSpectators.begin(); it!=mSpectators.end(); ++it) - { - if (inputs.count(it->first)>0) - it->second.second = inputs.at(it->first); - else - edm::LogError("MissingSpectatorVariable") << "Spectator variable " << it->first << " is missing from the list of inputs. The returned discriminator value might not be sensible."; - } - } - - // evaluate the MVA - value = mReader->EvaluateMVA(mMethod.c_str()); - - return value; -} - - -float TMVAEvaluator::evaluateGBRForest(const std::map & inputs) const -{ - // default value - float value = -99.; - - std::unique_ptr vars(new float[mVariables.size()]); // allocate n floats - - // set the input variable values - for(auto it = mVariables.begin(); it!=mVariables.end(); ++it) - { - if (inputs.count(it->first)>0) - vars[it->second.first] = inputs.at(it->first); - else - edm::LogError("MissingInputVariable") << "Input variable " << it->first << " is missing from the list of inputs. The returned discriminator value might not be sensible."; - } - - // evaluate the MVA - if (mUseAdaBoost) - value = mGBRForest->GetAdaBoostClassifier(vars.get()); - else - value = mGBRForest->GetGradBoostClassifier(vars.get()); - - return value; -} - -float TMVAEvaluator::evaluate(const std::map & inputs, bool useSpectators) const -{ - // default value - float value = -99.; - - if(!mIsInitialized) - { - edm::LogError("InitializationError") << "TMVAEvaluator not properly initialized."; - return value; - } - - if( useSpectators && inputs.size() < ( mVariables.size() + mSpectators.size() ) ) - { - edm::LogError("MissingInputs") << "Too few inputs provided (" << inputs.size() << " provided but " << mVariables.size() << " input and " << mSpectators.size() << " spectator variables expected)."; - return value; - } - else if( inputs.size() < mVariables.size() ) - { - edm::LogError("MissingInputVariable(s)") << "Too few input variables provided (" << inputs.size() << " provided but " << mVariables.size() << " expected)."; - return value; - } - - if (mUsingGBRForest) - { - if(useSpectators) - edm::LogWarning("UnsupportedFunctionality") << "Use of spectator variables with GBRForest is not supported. Spectator variables will be ignored."; - value = evaluateGBRForest(inputs); - } - else - value = evaluateTMVA(inputs, useSpectators); - - return value; -} diff --git a/CommonTools/Utils/src/TMVAZipReader.cc b/CommonTools/Utils/src/TMVAZipReader.cc deleted file mode 100644 index 6c144ed748e81..0000000000000 --- a/CommonTools/Utils/src/TMVAZipReader.cc +++ /dev/null @@ -1,116 +0,0 @@ -#include "CommonTools/Utils/interface/TMVAZipReader.h" -#include "FWCore/Utilities/interface/Exception.h" -#include -#include -#include -#include -#include "zlib.h" - -using namespace std; - -// From http://stackoverflow.com/questions/874134/find-if-string-endswith-another-string-in-c -bool reco::details::hasEnding(std::string const &fullString, std::string const &ending) { - if (fullString.length() >= ending.length()) { - return (0 == fullString.compare( - fullString.length() - ending.length(), ending.length(), ending)); - } else { - return false; - } -} - -char* reco::details::readGzipFile(const std::string& weightFile) -{ - FILE *f = fopen(weightFile.c_str(), "r"); - if (f==nullptr) { - throw cms::Exception("InvalidFileState") - << "Failed to open MVA file = " << weightFile << " !!\n"; - } - int magic; - int size; - fread(&magic, 4, 1, f); - fseek(f, -4, SEEK_END); - fread(&size, 4, 1, f); - fclose(f); - //printf("%x, %i\n", magic, size); - - gzFile file = gzopen (weightFile.c_str(), "r"); - - int bytes_read; - char *buffer = (char*)malloc(size); - bytes_read = gzread (file, buffer, size - 1); - buffer[bytes_read] = '\0'; - if (!gzeof (file)) { - int err; - const char * error_string; - error_string = gzerror (file, & err); - if (err) { - free(buffer); - throw cms::Exception("InvalidFileState") - << "Error while reading gzipped file = " << weightFile << " !!\n" - << error_string; - } - } - gzclose (file); - return buffer; -} - -TMVA::IMethod* reco::details::loadTMVAWeights(TMVA::Reader* reader, const std::string& method, - const std::string& weightFile, bool verbose) { - - TMVA::IMethod* ptr = nullptr; - - verbose = false; - if (verbose) - std::cout << "Booking TMVA Reader with " << method << " and weight file: " << weightFile - << std::endl; - - if (reco::details::hasEnding(weightFile, ".xml")) { - if (verbose) - std::cout << "Weight file is pure xml." << std::endl; - // Let TMVA read the file - ptr = reader->BookMVA(method, weightFile); - } else if (reco::details::hasEnding(weightFile, ".gz") || reco::details::hasEnding(weightFile, ".gzip")) { - if (verbose) - std::cout << "Unzipping file." << std::endl; - char* c = readGzipFile(weightFile); - - // We can't use tmpnam, gcc emits a warning about security. - // This is also technically insecure in the same way, since we append - // a suffix and then open another file. - char tmpFilename[] = "/tmp/tmva.XXXXXX"; - int fdToUselessFile = mkstemp(tmpFilename); - std::string weight_file_name(tmpFilename); - weight_file_name += ".xml"; - FILE *theActualFile = fopen(weight_file_name.c_str(), "w"); - if (theActualFile != nullptr) { - // write xml - fputs(c, theActualFile); - fputs("\n", theActualFile); - fclose(theActualFile); - close(fdToUselessFile); - } else { - throw cms::Exception("CannotWriteFile") - << "Error while writing file = " << weight_file_name << " !!\n"; - } - if (verbose) - std::cout << "Booking MvA" << std::endl; - ptr = reader->BookMVA(method, weight_file_name); - if (verbose) - std::cout << "Cleaning up" << std::endl; - remove(weight_file_name.c_str()); - remove(tmpFilename); - - // Someday this will work. - //reader->BookMVA(TMVA::Types::Instance().GetMethodType(TString(method)), c); - if (verbose) { - std::cout << "Reader booked" << std::endl; - } - free(c); - } else { - throw cms::Exception("BadTMVAWeightFilename") - << "I don't understand the extension on the filename: " - << weightFile << ", it should be .xml, .gz, or .gzip" << std::endl; - } - - return ptr; -} diff --git a/CondCore/Utilities/BuildFile.xml b/CondCore/Utilities/BuildFile.xml index eca2a67cb1c61..d9b6ce83a2351 100644 --- a/CondCore/Utilities/BuildFile.xml +++ b/CondCore/Utilities/BuildFile.xml @@ -33,6 +33,7 @@ + diff --git a/CondCore/Utilities/plugins/BuildFile.xml b/CondCore/Utilities/plugins/BuildFile.xml index 9c456fcb0ad14..d33e9fd729580 100644 --- a/CondCore/Utilities/plugins/BuildFile.xml +++ b/CondCore/Utilities/plugins/BuildFile.xml @@ -7,7 +7,7 @@ - + diff --git a/CondCore/Utilities/test/BuildFile.xml b/CondCore/Utilities/test/BuildFile.xml index 02bebb1860d56..e85fc5867036d 100644 --- a/CondCore/Utilities/test/BuildFile.xml +++ b/CondCore/Utilities/test/BuildFile.xml @@ -3,6 +3,7 @@ + diff --git a/CondFormats/EgammaObjects/BuildFile.xml b/CondFormats/EgammaObjects/BuildFile.xml index fd10fd3023857..18205baae4be9 100644 --- a/CondFormats/EgammaObjects/BuildFile.xml +++ b/CondFormats/EgammaObjects/BuildFile.xml @@ -4,7 +4,6 @@ - diff --git a/CondFormats/EgammaObjects/interface/GBRForest.h b/CondFormats/EgammaObjects/interface/GBRForest.h index b31ac146de053..ea7ccda9ce6df 100644 --- a/CondFormats/EgammaObjects/interface/GBRForest.h +++ b/CondFormats/EgammaObjects/interface/GBRForest.h @@ -17,23 +17,15 @@ ////////////////////////////////////////////////////////////////////////// #include "CondFormats/Serialization/interface/Serializable.h" +#include "CondFormats/EgammaObjects/interface/GBRTree.h" #include -#include "GBRTree.h" -#include -#include - - namespace TMVA { - class MethodBDT; - } class GBRForest { public: GBRForest(); - explicit GBRForest(const TMVA::MethodBDT *bdt); - virtual ~GBRForest(); double GetResponse(const float* vector) const; double GetGradBoostClassifier(const float* vector) const; @@ -46,10 +38,11 @@ std::vector &Trees() { return fTrees; } const std::vector &Trees() const { return fTrees; } - + protected: - double fInitialResponse; - std::vector fTrees; + + double fInitialResponse; + std::vector fTrees; COND_SERIALIZABLE; diff --git a/CondFormats/EgammaObjects/interface/GBRTree.h b/CondFormats/EgammaObjects/interface/GBRTree.h index b9dc7c7c1d2a7..4319585240f58 100644 --- a/CondFormats/EgammaObjects/interface/GBRTree.h +++ b/CondFormats/EgammaObjects/interface/GBRTree.h @@ -26,23 +26,15 @@ #include "CondFormats/Serialization/interface/Serializable.h" #include -#include - - namespace TMVA { - class DecisionTree; - class DecisionTreeNode; - } class GBRTree { public: GBRTree(); - explicit GBRTree(const TMVA::DecisionTree *tree, double scale, bool useyesnoleaf, bool adjustboundary); - virtual ~GBRTree(); + explicit GBRTree(int nIntermediate, int nTerminal); double GetResponse(const float* vector) const; - int TerminalIndex(const float *vector) const; std::vector &Responses() { return fResponses; } const std::vector &Responses() const { return fResponses; } @@ -58,39 +50,29 @@ std::vector &RightIndices() { return fRightIndices; } const std::vector &RightIndices() const { return fRightIndices; } - - protected: - unsigned int CountIntermediateNodes(const TMVA::DecisionTreeNode *node); - unsigned int CountTerminalNodes(const TMVA::DecisionTreeNode *node); - - void AddNode(const TMVA::DecisionTreeNode *node, double scale, bool isregression, bool useyesnoleaf, bool adjustboundary); - - std::vector fCutIndices; - std::vector fCutVals; - std::vector fLeftIndices; - std::vector fRightIndices; - std::vector fResponses; + + std::vector fCutIndices; + std::vector fCutVals; + std::vector fLeftIndices; + std::vector fRightIndices; + std::vector fResponses; COND_SERIALIZABLE; }; //_______________________________________________________________________ -inline double GBRTree::GetResponse(const float* vector) const { - return fResponses[TerminalIndex(vector)]; -} - -//_______________________________________________________________________ -inline int GBRTree::TerminalIndex(const float* vector) const { +inline double GBRTree::GetResponse(const float* vector) const +{ int index = 0; - do { - auto r = fRightIndices[index]; - auto l = fLeftIndices[index]; - index = vector[fCutIndices[index]] > fCutVals[index] ? r : l; - } while (index>0); - return -index; + do { + auto r = fRightIndices[index]; + auto l = fLeftIndices[index]; + index = vector[fCutIndices[index]] > fCutVals[index] ? r : l; + } while (index>0); + return fResponses[-index]; } #endif diff --git a/CondFormats/EgammaObjects/src/GBRForest.cxx b/CondFormats/EgammaObjects/src/GBRForest.cxx index b66b2133bb487..182b54f1c7e06 100644 --- a/CondFormats/EgammaObjects/src/GBRForest.cxx +++ b/CondFormats/EgammaObjects/src/GBRForest.cxx @@ -1,58 +1,6 @@ #include "CondFormats/EgammaObjects/interface/GBRForest.h" -//#include -#include "TMVA/DecisionTree.h" -#include "TMVA/MethodBDT.h" - - //_______________________________________________________________________ GBRForest::GBRForest() : fInitialResponse(0.) -{ - -} - -//_______________________________________________________________________ -GBRForest::~GBRForest() -{ -} - -//_______________________________________________________________________ -GBRForest::GBRForest(const TMVA::MethodBDT *bdt) -{ - - //special handling for non-gradient-boosted (ie ADABoost) classifiers, where tree responses - //need to be renormalized after the training for evaluation purposes - bool isadaclassifier = !bdt->DoRegression() && !bdt->GetOptions().Contains("~BoostType=Grad"); - bool useyesnoleaf = isadaclassifier && bdt->GetOptions().Contains("~UseYesNoLeaf=True"); - bool isregression = bdt->DoRegression(); - //newer tmva versions use >= instead of > in decision tree splits, so adjust cut value - //to reproduce the correct behaviour - bool adjustboundaries = (bdt->GetTrainingROOTVersionCode()>=ROOT_VERSION(5,34,20) && bdt->GetTrainingROOTVersionCode()GetTrainingROOTVersionCode()>=ROOT_VERSION(6,2,0); - - if (isregression) { - fInitialResponse = bdt->GetBoostWeights().front(); - } - else { - fInitialResponse = 0.; - } - - double norm = 0; - if (isadaclassifier) { - for (std::vector::const_iterator it=bdt->GetBoostWeights().begin(); it!=bdt->GetBoostWeights().end(); ++it) { - norm += *it; - } - } - - const std::vector &forest = bdt->GetForest(); - fTrees.reserve(forest.size()); - for (unsigned int itree=0; itreeGetBoostWeights()[itree]/norm : 1.0; - fTrees.push_back(GBRTree(forest[itree],scale,useyesnoleaf,adjustboundaries)); - } - -} - - - - +{} diff --git a/CondFormats/EgammaObjects/src/GBRTree.cxx b/CondFormats/EgammaObjects/src/GBRTree.cxx index a848be0b24e2c..e6e7e34558ef9 100644 --- a/CondFormats/EgammaObjects/src/GBRTree.cxx +++ b/CondFormats/EgammaObjects/src/GBRTree.cxx @@ -1,25 +1,12 @@ - - #include "CondFormats/EgammaObjects/interface/GBRTree.h" -using namespace std; -#include "TMVA/DecisionTreeNode.h" -#include "TMVA/DecisionTree.h" - //_______________________________________________________________________ -GBRTree::GBRTree() -{ - -} +GBRTree::GBRTree() {} //_______________________________________________________________________ -GBRTree::GBRTree(const TMVA::DecisionTree *tree, double scale, bool useyesnoleaf, bool adjustboundary) +GBRTree::GBRTree(int nIntermediate, int nTerminal) { - - //printf("boostweights size = %i, forest size = %i\n",bdt->GetBoostWeights().size(),bdt->GetForest().size()); - int nIntermediate = CountIntermediateNodes((TMVA::DecisionTreeNode*)tree->GetRoot()); - int nTerminal = CountTerminalNodes((TMVA::DecisionTreeNode*)tree->GetRoot()); - + //special case, root node is terminal if (nIntermediate==0) nIntermediate = 1; @@ -29,111 +16,4 @@ GBRTree::GBRTree(const TMVA::DecisionTree *tree, double scale, bool useyesnoleaf fRightIndices.reserve(nIntermediate); fResponses.reserve(nTerminal); - AddNode((TMVA::DecisionTreeNode*)tree->GetRoot(), scale, tree->DoRegression(), useyesnoleaf, adjustboundary); - - //special case, root node is terminal, create fake intermediate node at root - if (fCutIndices.size()==0) { - fCutIndices.push_back(0); - fCutVals.push_back(0); - fLeftIndices.push_back(0); - fRightIndices.push_back(0); - } - -} - - -//_______________________________________________________________________ -GBRTree::~GBRTree() { - -} - -//_______________________________________________________________________ -unsigned int GBRTree::CountIntermediateNodes(const TMVA::DecisionTreeNode *node) { - - if (!node->GetLeft() || !node->GetRight() || node->IsTerminal()) { - return 0; - } - else { - return 1 + CountIntermediateNodes((TMVA::DecisionTreeNode*)node->GetLeft()) + CountIntermediateNodes((TMVA::DecisionTreeNode*)node->GetRight()); - } - -} - -//_______________________________________________________________________ -unsigned int GBRTree::CountTerminalNodes(const TMVA::DecisionTreeNode *node) { - - if (!node->GetLeft() || !node->GetRight() || node->IsTerminal()) { - return 1; - } - else { - return 0 + CountTerminalNodes((TMVA::DecisionTreeNode*)node->GetLeft()) + CountTerminalNodes((TMVA::DecisionTreeNode*)node->GetRight()); - } - -} - - -//_______________________________________________________________________ -void GBRTree::AddNode(const TMVA::DecisionTreeNode *node, double scale, bool isregression, bool useyesnoleaf, bool adjustboundary) { - - if (!node->GetLeft() || !node->GetRight() || node->IsTerminal()) { - double response = 0.; - if (isregression) { - response = node->GetResponse(); - } - else { - if (useyesnoleaf) { - response = double(node->GetNodeType()); - } - else { - response = node->GetPurity(); - } - } - response *= scale; - fResponses.push_back(response); - return; - } - else { - int thisidx = fCutIndices.size(); - - fCutIndices.push_back(node->GetSelector()); - float cutval = node->GetCutValue(); - //newer tmva versions use >= instead of > in decision tree splits, so adjust cut value - //to reproduce the correct behaviour - if (adjustboundary) { - cutval = std::nextafter(cutval,std::numeric_limits::lowest()); - } - fCutVals.push_back(cutval); - fLeftIndices.push_back(0); - fRightIndices.push_back(0); - - TMVA::DecisionTreeNode *left; - TMVA::DecisionTreeNode *right; - if (node->GetCutType()) { - left = (TMVA::DecisionTreeNode*)node->GetLeft(); - right = (TMVA::DecisionTreeNode*)node->GetRight(); - } - else { - left = (TMVA::DecisionTreeNode*)node->GetRight(); - right = (TMVA::DecisionTreeNode*)node->GetLeft(); - } - - - if (!left->GetLeft() || !left->GetRight() || left->IsTerminal()) { - fLeftIndices[thisidx] = -fResponses.size(); - } - else { - fLeftIndices[thisidx] = fCutIndices.size(); - } - AddNode(left, scale, isregression, useyesnoleaf, adjustboundary); - - if (!right->GetLeft() || !right->GetRight() || right->IsTerminal()) { - fRightIndices[thisidx] = -fResponses.size(); - } - else { - fRightIndices[thisidx] = fCutIndices.size(); - } - AddNode(right, scale, isregression, useyesnoleaf, adjustboundary); - - } - } diff --git a/L1Trigger/L1THGCal/plugins/BuildFile.xml b/L1Trigger/L1THGCal/plugins/BuildFile.xml index 0070c40764482..1457ad9f3334e 100644 --- a/L1Trigger/L1THGCal/plugins/BuildFile.xml +++ b/L1Trigger/L1THGCal/plugins/BuildFile.xml @@ -17,6 +17,7 @@ + diff --git a/L1Trigger/L1THGCal/plugins/be_algorithms/HGCalTriggerClusterIdentificationBDT.cc b/L1Trigger/L1THGCal/plugins/be_algorithms/HGCalTriggerClusterIdentificationBDT.cc index 4923266057134..b39a151bf394e 100644 --- a/L1Trigger/L1THGCal/plugins/be_algorithms/HGCalTriggerClusterIdentificationBDT.cc +++ b/L1Trigger/L1THGCal/plugins/be_algorithms/HGCalTriggerClusterIdentificationBDT.cc @@ -3,7 +3,7 @@ #include "FWCore/MessageLogger/interface/MessageLogger.h" #include "L1Trigger/L1THGCal/interface/be_algorithms/HGCalTriggerClusterIdentificationBase.h" #include "L1Trigger/L1THGCal/interface/HGCalTriggerTools.h" -#include "CommonTools/Utils/interface/TMVAEvaluator.h" +#include "CommonTools/MVAUtils/interface/TMVAEvaluator.h" diff --git a/PhysicsTools/HepMCCandAlgos/BuildFile.xml b/PhysicsTools/HepMCCandAlgos/BuildFile.xml index 5967540ff6bee..18aa6ab45a591 100644 --- a/PhysicsTools/HepMCCandAlgos/BuildFile.xml +++ b/PhysicsTools/HepMCCandAlgos/BuildFile.xml @@ -6,6 +6,7 @@ + diff --git a/PhysicsTools/NanoAOD/plugins/BuildFile.xml b/PhysicsTools/NanoAOD/plugins/BuildFile.xml index f90f43e843434..c2e6cf64bdd68 100644 --- a/PhysicsTools/NanoAOD/plugins/BuildFile.xml +++ b/PhysicsTools/NanoAOD/plugins/BuildFile.xml @@ -5,7 +5,7 @@ - + diff --git a/PhysicsTools/PatAlgos/BuildFile.xml b/PhysicsTools/PatAlgos/BuildFile.xml index ef2a89a33bb61..3aaff834e379f 100644 --- a/PhysicsTools/PatAlgos/BuildFile.xml +++ b/PhysicsTools/PatAlgos/BuildFile.xml @@ -1,3 +1,4 @@ + diff --git a/PhysicsTools/PatAlgos/interface/MuonMvaEstimator.h b/PhysicsTools/PatAlgos/interface/MuonMvaEstimator.h index d390dbe80d993..042ed59858023 100644 --- a/PhysicsTools/PatAlgos/interface/MuonMvaEstimator.h +++ b/PhysicsTools/PatAlgos/interface/MuonMvaEstimator.h @@ -17,11 +17,15 @@ namespace reco { class Vertex; } +namespace edm { + class FileInPath; +} + namespace pat { class MuonMvaEstimator{ public: - MuonMvaEstimator(const std::string& weightsfile, float dRmax); + MuonMvaEstimator(const edm::FileInPath& weightsfile, float dRmax); ~MuonMvaEstimator(); @@ -38,19 +42,6 @@ namespace pat { std::unique_ptr gbrForest_; float dRmax_; - /// MVA VAriables - float pt_ = 0.0; - float eta_ = 0.0; - float jetNDauCharged_ = 0.0; - float miniRelIsoCharged_ = 0.0; - float miniRelIsoNeutral_ = 0.0; - float jetPtRel_ = 0.0; - float jetPtRatio_ = 0.0; - float jetBTagCSV_ = 0.0; - float sip_ = 0.0; - float log_abs_dxyBS_ = 0.0; - float log_abs_dzPV_ = 0.0; - float segmentCompatibility_ = 0.0; }; } #endif diff --git a/PhysicsTools/PatAlgos/interface/SoftMuonMvaEstimator.h b/PhysicsTools/PatAlgos/interface/SoftMuonMvaEstimator.h index dd199a465d651..385809b6a7660 100644 --- a/PhysicsTools/PatAlgos/interface/SoftMuonMvaEstimator.h +++ b/PhysicsTools/PatAlgos/interface/SoftMuonMvaEstimator.h @@ -10,11 +10,15 @@ namespace pat { class Muon; } +namespace edm { + class FileInPath; +} + namespace pat { class SoftMuonMvaEstimator{ public: - SoftMuonMvaEstimator(const std::string& weightsfile); + SoftMuonMvaEstimator(const edm::FileInPath& weightsfile); ~SoftMuonMvaEstimator(); @@ -24,28 +28,6 @@ namespace pat { std::unique_ptr gbrForest_; - // MVA VAriables - float segmentCompatibility_ = 0.0; - float chi2LocalMomentum_ = 0.0; - float chi2LocalPosition_ = 0.0; - float glbTrackProbability_ = 0.0; - float iValidFraction_ = 0.0; - float layersWithMeasurement_ = 0.0; - float trkKink_ = 0.0; - float log2PlusGlbKink_ = 0.0; - float timeAtIpInOutErr_ = 0.0; - float outerChi2_ = 0.0; - float innerChi2_ = 0.0; - float trkRelChi2_ = 0.0; - float vMuonHitComb_ = 0.0; - float qProd_ = 0.0; - - // MVA Spectator - float pID_ = 0.0; - float pt_ = 0.0; - float eta_ = 0.0; - float momID_ = 0.0; - }; } #endif diff --git a/PhysicsTools/PatAlgos/plugins/BaseMVAValueMapProducer.h b/PhysicsTools/PatAlgos/plugins/BaseMVAValueMapProducer.h index f937538911155..97c38bb709f7c 100644 --- a/PhysicsTools/PatAlgos/plugins/BaseMVAValueMapProducer.h +++ b/PhysicsTools/PatAlgos/plugins/BaseMVAValueMapProducer.h @@ -39,7 +39,7 @@ #include "CommonTools/Utils/interface/StringObjectFunction.h" #include "DataFormats/Common/interface/ValueMap.h" -#include "CommonTools/Utils/interface/TMVAZipReader.h" +#include "CommonTools/MVAUtils/interface/TMVAZipReader.h" #include "DataFormats/PatCandidates/interface/Jet.h" #include "DataFormats/PatCandidates/interface/Muon.h" #include "DataFormats/PatCandidates/interface/Electron.h" diff --git a/PhysicsTools/PatAlgos/plugins/BuildFile.xml b/PhysicsTools/PatAlgos/plugins/BuildFile.xml index b5d710cd405f8..e7d2c6386a13a 100644 --- a/PhysicsTools/PatAlgos/plugins/BuildFile.xml +++ b/PhysicsTools/PatAlgos/plugins/BuildFile.xml @@ -20,6 +20,7 @@ + diff --git a/PhysicsTools/PatAlgos/plugins/PATMuonProducer.cc b/PhysicsTools/PatAlgos/plugins/PATMuonProducer.cc index 372c75a2dfa19..03faf7900b3d7 100755 --- a/PhysicsTools/PatAlgos/plugins/PATMuonProducer.cc +++ b/PhysicsTools/PatAlgos/plugins/PATMuonProducer.cc @@ -61,18 +61,14 @@ using namespace std; PATMuonHeavyObjectCache::PATMuonHeavyObjectCache(const edm::ParameterSet& iConfig) { if (iConfig.getParameter("computeMuonMVA")) { - std::string mvaTrainingFile = iConfig.getParameter("mvaTrainingFile"); - // xml training file - edm::FileInPath fip(mvaTrainingFile); + edm::FileInPath mvaTrainingFile = iConfig.getParameter("mvaTrainingFile"); float mvaDrMax = iConfig.getParameter("mvaDrMax"); - muonMvaEstimator_ = std::make_unique(fip.fullPath(), mvaDrMax); + muonMvaEstimator_ = std::make_unique(mvaTrainingFile, mvaDrMax); } if (iConfig.getParameter("computeSoftMuonMVA")) { - std::string softMvaTrainingFile = iConfig.getParameter("softMvaTrainingFile"); - // xml soft mva training file - edm::FileInPath softfip(softMvaTrainingFile); - softMuonMvaEstimator_ = std::make_unique(softfip.fullPath()); + edm::FileInPath softMvaTrainingFile = iConfig.getParameter("softMvaTrainingFile"); + softMuonMvaEstimator_ = std::make_unique(softMvaTrainingFile); } } diff --git a/PhysicsTools/PatAlgos/python/producersLayer1/muonProducer_cfi.py b/PhysicsTools/PatAlgos/python/producersLayer1/muonProducer_cfi.py index 34600816c090f..cff50fb7c31a6 100644 --- a/PhysicsTools/PatAlgos/python/producersLayer1/muonProducer_cfi.py +++ b/PhysicsTools/PatAlgos/python/producersLayer1/muonProducer_cfi.py @@ -106,7 +106,7 @@ # Depends on MiniIsolation, so only works in miniaod # Don't forget to set flags properly in miniAOD_tools.py computeMuonMVA = cms.bool(False), - mvaTrainingFile = cms.string("RecoMuon/MuonIdentification/data/mu_BDTG_Run2017.weights.xml"), + mvaTrainingFile = cms.FileInPath("RecoMuon/MuonIdentification/data/mu_BDTG_Run2017.weights.xml"), recomputeBasicSelectors = cms.bool(True), mvaUseJec = cms.bool(True), mvaDrMax = cms.double(0.4), @@ -116,7 +116,7 @@ rho = cms.InputTag("fixedGridRhoFastjetCentralNeutral"), computeSoftMuonMVA = cms.bool(False), - softMvaTrainingFile = cms.string("RecoMuon/MuonIdentification/data/TMVA-muonid-bmm4-B-25.weights.xml"), + softMvaTrainingFile = cms.FileInPath("RecoMuon/MuonIdentification/data/TMVA-muonid-bmm4-B-25.weights.xml"), # MC Info muonSimInfo = cms.InputTag("muonSimClassifier"), diff --git a/PhysicsTools/PatAlgos/src/MuonMvaEstimator.cc b/PhysicsTools/PatAlgos/src/MuonMvaEstimator.cc index b3861adb3d781..fa4a47ea19d61 100644 --- a/PhysicsTools/PatAlgos/src/MuonMvaEstimator.cc +++ b/PhysicsTools/PatAlgos/src/MuonMvaEstimator.cc @@ -1,6 +1,7 @@ #include "PhysicsTools/PatAlgos/interface/MuonMvaEstimator.h" -#include "CondFormats/EgammaObjects/interface/GBRForest.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" #include "DataFormats/Candidate/interface/Candidate.h" #include "DataFormats/MuonReco/interface/Muon.h" #include "DataFormats/MuonReco/interface/MuonSelectors.h" @@ -10,34 +11,12 @@ #include "DataFormats/PatCandidates/interface/Muon.h" #include "JetMETCorrections/JetCorrector/interface/JetCorrector.h" -#include "TMVA/Reader.h" -#include "TMVA/MethodBDT.h" - using namespace pat; -namespace { - constexpr char muon_mva_name[] = "BDTG"; -} - -MuonMvaEstimator::MuonMvaEstimator(const std::string& weightsfile, float dRmax): +MuonMvaEstimator::MuonMvaEstimator(const edm::FileInPath& weightsfile, float dRmax): dRmax_(dRmax) { - TMVA::Reader tmvaReader("!Color:!Silent:Error"); - tmvaReader.AddVariable("LepGood_pt", &pt_ ); - tmvaReader.AddVariable("LepGood_eta", &eta_ ); - tmvaReader.AddVariable("LepGood_jetNDauChargedMVASel", &jetNDauCharged_ ); - tmvaReader.AddVariable("LepGood_miniRelIsoCharged", &miniRelIsoCharged_); - tmvaReader.AddVariable("LepGood_miniRelIsoNeutral", &miniRelIsoNeutral_); - tmvaReader.AddVariable("LepGood_jetPtRelv2", &jetPtRel_ ); - tmvaReader.AddVariable("max(LepGood_jetBTagCSV,0)", &jetBTagCSV_ ); - tmvaReader.AddVariable("(LepGood_jetBTagCSV>-5)*min(LepGood_jetPtRatiov2,1.5)+(LepGood_jetBTagCSV<-5)/(1+LepGood_relIso04)", &jetPtRatio_ ); - tmvaReader.AddVariable("LepGood_sip3d", &sip_ ); - tmvaReader.AddVariable("log(abs(LepGood_dxy))", &log_abs_dxyBS_ ); - tmvaReader.AddVariable("log(abs(LepGood_dz))", &log_abs_dzPV_ ); - tmvaReader.AddVariable("LepGood_segmentCompatibility", &segmentCompatibility_); - - auto temp{ tmvaReader.BookMVA(muon_mva_name, weightsfile.c_str()) }; - gbrForest_ = std::make_unique( dynamic_cast( temp ) ); + gbrForest_ = createGBRForest( weightsfile ); } MuonMvaEstimator::~MuonMvaEstimator() { } diff --git a/PhysicsTools/PatAlgos/src/SoftMuonMvaEstimator.cc b/PhysicsTools/PatAlgos/src/SoftMuonMvaEstimator.cc index 034457311f9bc..e6d4246890955 100644 --- a/PhysicsTools/PatAlgos/src/SoftMuonMvaEstimator.cc +++ b/PhysicsTools/PatAlgos/src/SoftMuonMvaEstimator.cc @@ -1,45 +1,17 @@ #include "PhysicsTools/PatAlgos/interface/SoftMuonMvaEstimator.h" -#include "CondFormats/EgammaObjects/interface/GBRForest.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" #include "DataFormats/Candidate/interface/Candidate.h" #include "DataFormats/MuonReco/interface/Muon.h" #include "DataFormats/MuonReco/interface/MuonSelectors.h" #include "DataFormats/PatCandidates/interface/Muon.h" -#include "TMVA/Reader.h" -#include "TMVA/MethodBDT.h" - using namespace pat; -namespace { - constexpr char softmuon_mva_name[] = "BDT"; -} - -SoftMuonMvaEstimator::SoftMuonMvaEstimator(const std::string& weightsfile) +SoftMuonMvaEstimator::SoftMuonMvaEstimator(const edm::FileInPath& weightsfile) { - TMVA::Reader tmvaReader("!Color:!Silent:Error"); - tmvaReader.AddVariable("segComp", &segmentCompatibility_); - tmvaReader.AddVariable("chi2LocMom", &chi2LocalMomentum_); - tmvaReader.AddVariable("chi2LocPos", &chi2LocalPosition_); - tmvaReader.AddVariable("glbTrackTailProb", &glbTrackProbability_); - tmvaReader.AddVariable("iValFrac", &iValidFraction_); - tmvaReader.AddVariable("LWH", &layersWithMeasurement_); - tmvaReader.AddVariable("kinkFinder", &trkKink_); - tmvaReader.AddVariable("TMath::Log(2+glbKinkFinder)", &log2PlusGlbKink_); - tmvaReader.AddVariable("timeAtIpInOutErr", &timeAtIpInOutErr_); - tmvaReader.AddVariable("outerChi2", &outerChi2_); - tmvaReader.AddVariable("innerChi2", &innerChi2_); - tmvaReader.AddVariable("trkRelChi2", &trkRelChi2_); - tmvaReader.AddVariable("vMuonHitComb", &vMuonHitComb_); - tmvaReader.AddVariable("Qprod", &qProd_); - - tmvaReader.AddSpectator("pID", &pID_); - tmvaReader.AddSpectator("pt", &pt_); - tmvaReader.AddSpectator("eta", &eta_); - tmvaReader.AddSpectator("MomID", &momID_); - - auto temp{ tmvaReader.BookMVA(softmuon_mva_name, weightsfile.c_str()) }; - gbrForest_ = std::make_unique( dynamic_cast( temp ) ); + gbrForest_ = createGBRForest( weightsfile ); } SoftMuonMvaEstimator::~SoftMuonMvaEstimator() { } @@ -114,9 +86,9 @@ float SoftMuonMvaEstimator::computeMva(const pat::Muon& muon) const const reco::HitPattern &gMpattern = gTrack->hitPattern(); - std::vector fvDThits = {0,0,0,0}; - std::vector fvRPChits = {0,0,0,0}; - std::vector fvCSChits = {0,0,0,0}; + std::vector fvDThits {0,0,0,0}; + std::vector fvRPChits {0,0,0,0}; + std::vector fvCSChits {0,0,0,0}; var[kVMuonHitComb] = 0; diff --git a/RecoBTag/CTagging/BuildFile.xml b/RecoBTag/CTagging/BuildFile.xml index 73689d39fd78b..f3c5f33bb2584 100644 --- a/RecoBTag/CTagging/BuildFile.xml +++ b/RecoBTag/CTagging/BuildFile.xml @@ -2,8 +2,7 @@ - - + diff --git a/RecoBTag/CTagging/interface/CharmTagger.h b/RecoBTag/CTagging/interface/CharmTagger.h index 81de0fe229def..e051e8fd9af89 100755 --- a/RecoBTag/CTagging/interface/CharmTagger.h +++ b/RecoBTag/CTagging/interface/CharmTagger.h @@ -2,7 +2,7 @@ #define RecoBTag_CTagging_CharmTagger_h #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "CommonTools/Utils/interface/TMVAEvaluator.h" +#include "CommonTools/MVAUtils/interface/TMVAEvaluator.h" #include "RecoBTau/JetTagComputer/interface/JetTagComputer.h" #include #include "FWCore/Utilities/interface/ESInputTag.h" diff --git a/RecoBTag/Combined/BuildFile.xml b/RecoBTag/Combined/BuildFile.xml index a8488f80363af..7441812fca3c8 100644 --- a/RecoBTag/Combined/BuildFile.xml +++ b/RecoBTag/Combined/BuildFile.xml @@ -1,4 +1,4 @@ - + diff --git a/RecoBTag/Combined/interface/CandidateChargeBTagComputer.h b/RecoBTag/Combined/interface/CandidateChargeBTagComputer.h index 4a4ba3d4105c3..51ecae121ea6e 100644 --- a/RecoBTag/Combined/interface/CandidateChargeBTagComputer.h +++ b/RecoBTag/Combined/interface/CandidateChargeBTagComputer.h @@ -3,7 +3,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/Framework/interface/ESHandle.h" -#include "CommonTools/Utils/interface/TMVAEvaluator.h" +#include "CommonTools/MVAUtils/interface/TMVAEvaluator.h" #include "DataFormats/BTauReco/interface/CandIPTagInfo.h" #include "DataFormats/BTauReco/interface/CandSecondaryVertexTagInfo.h" #include "DataFormats/BTauReco/interface/CandSoftLeptonTagInfo.h" diff --git a/RecoBTag/Combined/interface/CombinedMVAV2JetTagComputer.h b/RecoBTag/Combined/interface/CombinedMVAV2JetTagComputer.h index 6f868b36edae5..baeec55d2016a 100644 --- a/RecoBTag/Combined/interface/CombinedMVAV2JetTagComputer.h +++ b/RecoBTag/Combined/interface/CombinedMVAV2JetTagComputer.h @@ -8,7 +8,7 @@ #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "CommonTools/Utils/interface/TMVAEvaluator.h" +#include "CommonTools/MVAUtils/interface/TMVAEvaluator.h" #include "RecoBTau/JetTagComputer/interface/JetTagComputer.h" class CombinedMVAV2JetTagComputer : public JetTagComputer { diff --git a/RecoBTag/SecondaryVertex/BuildFile.xml b/RecoBTag/SecondaryVertex/BuildFile.xml index 2b9b65b492af3..18978a7453da6 100644 --- a/RecoBTag/SecondaryVertex/BuildFile.xml +++ b/RecoBTag/SecondaryVertex/BuildFile.xml @@ -1,6 +1,6 @@ - + diff --git a/RecoBTag/SecondaryVertex/interface/CandidateBoostedDoubleSecondaryVertexComputer.h b/RecoBTag/SecondaryVertex/interface/CandidateBoostedDoubleSecondaryVertexComputer.h index a0ff6e3231d75..df3f8fd7c67de 100644 --- a/RecoBTag/SecondaryVertex/interface/CandidateBoostedDoubleSecondaryVertexComputer.h +++ b/RecoBTag/SecondaryVertex/interface/CandidateBoostedDoubleSecondaryVertexComputer.h @@ -2,7 +2,7 @@ #define RecoBTag_SecondaryVertex_CandidateBoostedDoubleSecondaryVertexComputer_h #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "CommonTools/Utils/interface/TMVAEvaluator.h" +#include "CommonTools/MVAUtils/interface/TMVAEvaluator.h" #include "RecoBTau/JetTagComputer/interface/JetTagComputer.h" diff --git a/RecoBTag/SoftLepton/BuildFile.xml b/RecoBTag/SoftLepton/BuildFile.xml index 95730bd45f6fa..79b1128c1690a 100644 --- a/RecoBTag/SoftLepton/BuildFile.xml +++ b/RecoBTag/SoftLepton/BuildFile.xml @@ -1,8 +1,7 @@ - - + diff --git a/RecoBTag/SoftLepton/interface/ElectronTagger.h b/RecoBTag/SoftLepton/interface/ElectronTagger.h index 362c120036b84..561be83dc1b5d 100755 --- a/RecoBTag/SoftLepton/interface/ElectronTagger.h +++ b/RecoBTag/SoftLepton/interface/ElectronTagger.h @@ -2,7 +2,7 @@ #define RecoBTag_SoftLepton_ElectronTagger_h #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "CommonTools/Utils/interface/TMVAEvaluator.h" +#include "CommonTools/MVAUtils/interface/TMVAEvaluator.h" #include "RecoBTau/JetTagComputer/interface/JetTagComputer.h" #include "RecoBTag/SoftLepton/interface/LeptonSelector.h" diff --git a/RecoBTag/SoftLepton/interface/MuonTagger.h b/RecoBTag/SoftLepton/interface/MuonTagger.h index f06395706684c..cb4c784e948ab 100644 --- a/RecoBTag/SoftLepton/interface/MuonTagger.h +++ b/RecoBTag/SoftLepton/interface/MuonTagger.h @@ -6,7 +6,7 @@ #define RecoBTag_SoftLepton_MuonTagger_h #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "CommonTools/Utils/interface/TMVAEvaluator.h" +#include "CommonTools/MVAUtils/interface/TMVAEvaluator.h" #include "RecoBTau/JetTagComputer/interface/JetTagComputer.h" #include "RecoBTag/SoftLepton/interface/LeptonSelector.h" #include diff --git a/RecoEgamma/EgammaTools/BuildFile.xml b/RecoEgamma/EgammaTools/BuildFile.xml index 9550cfc3a89b9..112b7010f29fa 100644 --- a/RecoEgamma/EgammaTools/BuildFile.xml +++ b/RecoEgamma/EgammaTools/BuildFile.xml @@ -7,15 +7,13 @@ - - + - diff --git a/RecoEgamma/EgammaTools/interface/GBRForestTools.h b/RecoEgamma/EgammaTools/interface/GBRForestTools.h deleted file mode 100644 index c72812070a2ea..0000000000000 --- a/RecoEgamma/EgammaTools/interface/GBRForestTools.h +++ /dev/null @@ -1,39 +0,0 @@ -//-------------------------------------------------------------------------------------------------- -// -// GRBForestTools -// -// Utility to read a TMVA weights file with a BDT into a GRBForest. -// -// Author: Jonas Rembser -//-------------------------------------------------------------------------------------------------- - - -#ifndef RecoEgamma_EgammaTools_GBRForestTools_h -#define RecoEgamma_EgammaTools_GBRForestTools_h - -#include -#include - -#include "CondFormats/EgammaObjects/interface/GBRForest.h" -#include "FWCore/ParameterSet/interface/FileInPath.h" - -#include "TMVA/MethodBDT.h" -#include "TMVA/Reader.h" - -#include "CommonTools/Utils/interface/TMVAZipReader.h" - -class GBRForestTools -{ - public: - GBRForestTools() {} - - static std::unique_ptr createGBRForest(const std::string &weightFile); - static std::unique_ptr createGBRForest(const edm::FileInPath &weightFile); - - // Overloaded versions which are taking string vectors by reference to strore the variable names in - static std::unique_ptr createGBRForest(const std::string &weightFile, std::vector &varNames); - static std::unique_ptr createGBRForest(const edm::FileInPath &weightFile, std::vector &varNames); - -}; - -#endif diff --git a/RecoEgamma/EgammaTools/src/GBRForestTools.cc b/RecoEgamma/EgammaTools/src/GBRForestTools.cc deleted file mode 100644 index cdcd42646924a..0000000000000 --- a/RecoEgamma/EgammaTools/src/GBRForestTools.cc +++ /dev/null @@ -1,151 +0,0 @@ -#include "RecoEgamma/EgammaTools/interface/GBRForestTools.h" - -#include -#include - -namespace { - - // Will return position of n-th occurence of a char in a string. - int strpos(const std::string &haystack, char needle, unsigned int nth) - { - int found = 0; - for (unsigned int i=0 ; i GBRForestTools::createGBRForest(const std::string &weightFile, - std::vector &varNames){ - edm::FileInPath weightFileEdm(weightFile); - return GBRForestTools::createGBRForest(weightFileEdm, varNames); -} - -// Creates a pointer to new GBRForest corresponding to a TMVA weights file -std::unique_ptr GBRForestTools::createGBRForest(const edm::FileInPath &weightFile, - std::vector &varNames){ - - std::string method; - - unsigned int NVar = 0; - unsigned int NSpec = 0; - - std::vector dumbVars; - std::vector dumbSpecs; - - varNames.clear(); - std::vector specNames; - - std::string line; - std::ifstream f; - std::string tmpstr; - - bool gzipped = false; - - // - // Set up the input buffers, for gzipped or raw xml file - // - if (reco::details::hasEnding(weightFile.fullPath(), ".xml")) { - f.open(weightFile.fullPath()); - tmpstr = ""; - } else if (reco::details::hasEnding(weightFile.fullPath(), ".gz") || reco::details::hasEnding(weightFile.fullPath(), ".gzip")) { - gzipped = true; - char *buffer = reco::details::readGzipFile(weightFile.fullPath()); - tmpstr = std::string(buffer); - free(buffer); - } - std::stringstream is(tmpstr); - - bool isend; - - while(true) { - - if (gzipped) isend = !std::getline(is, line); - else isend = !std::getline(f, line); - - if (isend) break; - - // Terminate reading of weights file - if (line.find("AddVariable(varNames[i], &dumbVars[i]); - } - - for(size_t i = 0; i < NSpec; ++i){ - mvaReader->AddSpectator(specNames[i], &dumbSpecs[i]); - } - - // - // Book the method and set up the weights file - // - - reco::details::loadTMVAWeights(mvaReader, method, weightFile.fullPath()); - - TMVA::MethodBDT* bdt = dynamic_cast( mvaReader->FindMVA(method) ); - std::unique_ptr gbrForest = std::make_unique(GBRForest(bdt)); - delete mvaReader; - - return gbrForest; -} - -std::unique_ptr GBRForestTools::createGBRForest(const std::string &weightFile){ - std::vector varNames; - return GBRForestTools::createGBRForest(weightFile, varNames); -} - -std::unique_ptr GBRForestTools::createGBRForest(const edm::FileInPath &weightFile){ - std::vector varNames; - return GBRForestTools::createGBRForest(weightFile, varNames); -} diff --git a/RecoEgamma/ElectronIdentification/BuildFile.xml b/RecoEgamma/ElectronIdentification/BuildFile.xml index f7589cf2db2d7..289dfc278b9cb 100644 --- a/RecoEgamma/ElectronIdentification/BuildFile.xml +++ b/RecoEgamma/ElectronIdentification/BuildFile.xml @@ -4,6 +4,7 @@ + diff --git a/RecoEgamma/ElectronIdentification/interface/ElectronMVAEstimator.h b/RecoEgamma/ElectronIdentification/interface/ElectronMVAEstimator.h index 22ed12f51f368..207d9f14f7432 100644 --- a/RecoEgamma/ElectronIdentification/interface/ElectronMVAEstimator.h +++ b/RecoEgamma/ElectronIdentification/interface/ElectronMVAEstimator.h @@ -2,7 +2,7 @@ #define __RecoEgamma_ElectronIdentification_ElectronMVAEstimator_H__ #include "DataFormats/EgammaCandidates/interface/GsfElectron.h" -#include "RecoEgamma/EgammaTools/interface/GBRForestTools.h" +#include "CondFormats/EgammaObjects/interface/GBRForest.h" #include #include @@ -13,7 +13,7 @@ class ElectronMVAEstimator { std::vector vweightsfiles; }; ElectronMVAEstimator(); - ElectronMVAEstimator(std::string fileName); + ElectronMVAEstimator(const std::string& fileName); ElectronMVAEstimator(const Configuration & ); ~ElectronMVAEstimator() {;} double mva(const reco::GsfElectron& myElectron, int nvertices=0) const; diff --git a/RecoEgamma/ElectronIdentification/interface/ElectronMVAEstimatorRun2.h b/RecoEgamma/ElectronIdentification/interface/ElectronMVAEstimatorRun2.h index c158e539be055..6e2b0d65ae800 100644 --- a/RecoEgamma/ElectronIdentification/interface/ElectronMVAEstimatorRun2.h +++ b/RecoEgamma/ElectronIdentification/interface/ElectronMVAEstimatorRun2.h @@ -3,7 +3,7 @@ #include "DataFormats/PatCandidates/interface/Electron.h" #include "RecoEgamma/EgammaTools/interface/AnyMVAEstimatorRun2Base.h" -#include "RecoEgamma/EgammaTools/interface/GBRForestTools.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" #include "RecoEgamma/EgammaTools/interface/MVAVariableManager.h" class ElectronMVAEstimatorRun2 : public AnyMVAEstimatorRun2Base{ diff --git a/RecoEgamma/ElectronIdentification/interface/SoftElectronMVAEstimator.h b/RecoEgamma/ElectronIdentification/interface/SoftElectronMVAEstimator.h index 493d18953caf6..cd18c7ed24043 100644 --- a/RecoEgamma/ElectronIdentification/interface/SoftElectronMVAEstimator.h +++ b/RecoEgamma/ElectronIdentification/interface/SoftElectronMVAEstimator.h @@ -6,7 +6,7 @@ #include "DataFormats/EgammaCandidates/interface/GsfElectron.h" #include "DataFormats/VertexReco/interface/Vertex.h" #include "DataFormats/VertexReco/interface/VertexFwd.h" -#include "RecoEgamma/EgammaTools/interface/GBRForestTools.h" +#include "CondFormats/EgammaObjects/interface/GBRForest.h" #include #include diff --git a/RecoEgamma/ElectronIdentification/plugins/ElectronIDValueMapProducer.cc b/RecoEgamma/ElectronIdentification/plugins/ElectronIDValueMapProducer.cc index 3bce967b1fdc1..da0eeb988490e 100644 --- a/RecoEgamma/ElectronIdentification/plugins/ElectronIDValueMapProducer.cc +++ b/RecoEgamma/ElectronIdentification/plugins/ElectronIDValueMapProducer.cc @@ -18,6 +18,8 @@ #include "RecoEgamma/EgammaTools/interface/MultiToken.h" #include "RecoEgamma/EgammaTools/interface/Utils.h" +#include "FWCore/Utilities/interface/isFinite.h" + #include #include @@ -92,7 +94,7 @@ void ElectronIDValueMapProducer::produce(edm::Event& iEvent, const edm::EventSet const auto& theseed = *(ele.superCluster()->seed()); std::vector vCov = lazyToolnoZS->localCovariances( theseed ); - const float see = (isnan(vCov[0]) ? 0. : sqrt(vCov[0])); + const float see = (edm::isNotFinite(vCov[0]) ? 0. : sqrt(vCov[0])); const float sep = vCov[1]; eleFull5x5SigmaIEtaIEta.push_back(see); eleFull5x5SigmaIEtaIPhi.push_back(sep); diff --git a/RecoEgamma/ElectronIdentification/src/ElectronMVAEstimator.cc b/RecoEgamma/ElectronIdentification/src/ElectronMVAEstimator.cc index 797cc658d9fd6..2cfaafb7ef422 100644 --- a/RecoEgamma/ElectronIdentification/src/ElectronMVAEstimator.cc +++ b/RecoEgamma/ElectronIdentification/src/ElectronMVAEstimator.cc @@ -3,24 +3,25 @@ #include "DataFormats/TrackReco/interface/TrackFwd.h" #include "DataFormats/GsfTrackReco/interface/GsfTrack.h" #include "DataFormats/GsfTrackReco/interface/GsfTrackFwd.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" ElectronMVAEstimator::ElectronMVAEstimator(): cfg_{} {} -ElectronMVAEstimator::ElectronMVAEstimator(std::string fileName): +ElectronMVAEstimator::ElectronMVAEstimator(const std::string& fileName): cfg_{} { // Taken from Daniele (his mail from the 30/11) // tmvaReader.BookMVA("BDTSimpleCat","../Training/weights_Root527b_3Depth_DanVarConvRej_2PtBins_10Pt_800TPrune5_Min100Events_NoBjets_half/TMVA_BDTSimpleCat.weights.xm"); // training of the 7/12 with Nvtx added - gbr_.push_back( GBRForestTools::createGBRForest( fileName ) ); + gbr_.push_back( createGBRForest(fileName) ); } ElectronMVAEstimator::ElectronMVAEstimator(const Configuration & cfg):cfg_(cfg) { - for(auto& weightsfile : cfg_.vweightsfiles) { - gbr_.push_back( GBRForestTools::createGBRForest( weightsfile )); + for(const auto& weightsfile : cfg_.vweightsfiles) { + gbr_.push_back( createGBRForest(weightsfile) ); } } diff --git a/RecoEgamma/ElectronIdentification/src/ElectronMVAEstimatorRun2.cc b/RecoEgamma/ElectronIdentification/src/ElectronMVAEstimatorRun2.cc index 2620480f10785..12704865c5853 100644 --- a/RecoEgamma/ElectronIdentification/src/ElectronMVAEstimatorRun2.cc +++ b/RecoEgamma/ElectronIdentification/src/ElectronMVAEstimatorRun2.cc @@ -40,13 +40,13 @@ void ElectronMVAEstimatorRun2::init(const std::vector &weightFileNa // Create a TMVA reader object for each category for(int i=0; i variableNamesInCategory; std::vector variablesInCategory; // Use unique_ptr so that all readers are properly cleaned up // when the vector clear() is called in the destructor - gbrForests_.push_back( GBRForestTools::createGBRForest( weightFileNames[i], variableNamesInCategory ) ); + std::vector variableNamesInCategory; + gbrForests_.push_back(createGBRForest(weightFileNames[i], variableNamesInCategory)); nVariables_.push_back(variableNamesInCategory.size()); diff --git a/RecoEgamma/ElectronIdentification/src/SoftElectronMVAEstimator.cc b/RecoEgamma/ElectronIdentification/src/SoftElectronMVAEstimator.cc index 8a906ca8acf87..935b4b0048392 100644 --- a/RecoEgamma/ElectronIdentification/src/SoftElectronMVAEstimator.cc +++ b/RecoEgamma/ElectronIdentification/src/SoftElectronMVAEstimator.cc @@ -3,6 +3,7 @@ #include "DataFormats/TrackReco/interface/TrackFwd.h" #include "DataFormats/GsfTrackReco/interface/GsfTrack.h" #include "DataFormats/GsfTrackReco/interface/GsfTrackFwd.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" SoftElectronMVAEstimator::SoftElectronMVAEstimator(const Configuration & cfg):cfg_(cfg) { @@ -17,7 +18,7 @@ SoftElectronMVAEstimator::SoftElectronMVAEstimator(const Configuration & cfg):cf for(auto& weightsfile : cfg_.vweightsfiles) { // Taken from Daniele (his mail from the 30/11) // training of the 7/12 with Nvtx added - gbr_.push_back( GBRForestTools::createGBRForest( weightsfile )); + gbr_.push_back(createGBRForest( weightsfile )); } } diff --git a/RecoEgamma/PhotonIdentification/interface/PhotonMVAEstimator.h b/RecoEgamma/PhotonIdentification/interface/PhotonMVAEstimator.h index 9791163959ae6..94329bed82ca0 100644 --- a/RecoEgamma/PhotonIdentification/interface/PhotonMVAEstimator.h +++ b/RecoEgamma/PhotonIdentification/interface/PhotonMVAEstimator.h @@ -7,7 +7,7 @@ #include "RecoEgamma/EgammaTools/interface/AnyMVAEstimatorRun2Base.h" #include "DataFormats/EgammaCandidates/interface/Photon.h" #include "RecoEgamma/EgammaTools/interface/EffectiveAreas.h" -#include "RecoEgamma/EgammaTools/interface/GBRForestTools.h" +#include "CondFormats/EgammaObjects/interface/GBRForest.h" #include "RecoEgamma/EgammaTools/interface/MVAVariableManager.h" class PhotonMVAEstimator : public AnyMVAEstimatorRun2Base{ diff --git a/RecoEgamma/PhotonIdentification/plugins/BuildFile.xml b/RecoEgamma/PhotonIdentification/plugins/BuildFile.xml index dc26d96bbc9f7..5044557c4eb77 100644 --- a/RecoEgamma/PhotonIdentification/plugins/BuildFile.xml +++ b/RecoEgamma/PhotonIdentification/plugins/BuildFile.xml @@ -7,6 +7,7 @@ + diff --git a/RecoEgamma/PhotonIdentification/plugins/PhotonIDValueMapProducer.cc b/RecoEgamma/PhotonIdentification/plugins/PhotonIDValueMapProducer.cc index 4885725d0d0bd..dac583b84c07f 100644 --- a/RecoEgamma/PhotonIdentification/plugins/PhotonIDValueMapProducer.cc +++ b/RecoEgamma/PhotonIdentification/plugins/PhotonIDValueMapProducer.cc @@ -15,6 +15,7 @@ #include "RecoEcal/EgammaCoreTools/interface/EcalClusterLazyTools.h" #include "RecoEgamma/EgammaTools/interface/MultiToken.h" #include "RecoEgamma/EgammaTools/interface/Utils.h" +#include "FWCore/Utilities/interface/isFinite.h" // This template function finds whether theCandidate is in thefootprint // collection. It is templated to be able to handle both reco and pat @@ -189,7 +190,7 @@ void PhotonIDValueMapProducer::produce(edm::Event& iEvent, const edm::EventSetup // retrieve the full5x5 directly from the object with ->full5x5_sigmaIetaIeta() // for both formats. std::vector vCov = lazyToolnoZS->localCovariances(theseed); - vars[0].push_back(isnan(vCov[0]) ? 0. : sqrt(vCov[0])); + vars[0].push_back(edm::isNotFinite(vCov[0]) ? 0. : sqrt(vCov[0])); vars[1].push_back(vCov[1]); vars[2].push_back(lazyToolnoZS->e1x3(theseed)); vars[3].push_back(lazyToolnoZS->e2x2(theseed)); diff --git a/RecoEgamma/PhotonIdentification/plugins/PhotonMVAEstimator.cc b/RecoEgamma/PhotonIdentification/plugins/PhotonMVAEstimator.cc index e869590bd0473..b28efaa666a9e 100644 --- a/RecoEgamma/PhotonIdentification/plugins/PhotonMVAEstimator.cc +++ b/RecoEgamma/PhotonIdentification/plugins/PhotonMVAEstimator.cc @@ -1,5 +1,6 @@ #include "RecoEgamma/PhotonIdentification/interface/PhotonMVAEstimator.h" #include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" PhotonMVAEstimator::PhotonMVAEstimator(const edm::ParameterSet& conf) : AnyMVAEstimatorRun2Base(conf) @@ -39,10 +40,10 @@ PhotonMVAEstimator::PhotonMVAEstimator(const edm::ParameterSet& conf) // Create a TMVA reader object for each category for(int i=0; i variableNamesInCategory; std::vector variablesInCategory; - gbrForests_.push_back( GBRForestTools::createGBRForest( weightFileNames[i], variableNamesInCategory ) ); + std::vector variableNamesInCategory; + gbrForests_.push_back(createGBRForest(weightFileNames[i], variableNamesInCategory)); nVariables_.push_back(variableNamesInCategory.size()); diff --git a/RecoHI/HiTracking/plugins/BuildFile.xml b/RecoHI/HiTracking/plugins/BuildFile.xml index 894248d98c038..457a3fc67b288 100644 --- a/RecoHI/HiTracking/plugins/BuildFile.xml +++ b/RecoHI/HiTracking/plugins/BuildFile.xml @@ -1,6 +1,7 @@ + diff --git a/RecoJets/JetProducers/BuildFile.xml b/RecoJets/JetProducers/BuildFile.xml index b3e5bcebad3b7..55ce79680a70d 100644 --- a/RecoJets/JetProducers/BuildFile.xml +++ b/RecoJets/JetProducers/BuildFile.xml @@ -10,9 +10,8 @@ - + - diff --git a/RecoJets/JetProducers/interface/PileupJetIdAlgo.h b/RecoJets/JetProducers/interface/PileupJetIdAlgo.h index 3b47e2c72c650..ea5b34b7ee114 100644 --- a/RecoJets/JetProducers/interface/PileupJetIdAlgo.h +++ b/RecoJets/JetProducers/interface/PileupJetIdAlgo.h @@ -15,11 +15,6 @@ #include "DataFormats/PatCandidates/interface/PackedCandidate.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "TMVA/Tools.h" -#include "TMVA/Reader.h" -#include "TMVA/Tools.h" -#include "TMVA/Reader.h" - #include "DataFormats/JetReco/interface/PileupJetIdentifier.h" #include "CondFormats/EgammaObjects/interface/GBRForest.h" @@ -81,10 +76,6 @@ class PileupJetIdAlgo { array_t const& rmsCut() const { return rmsCut_; } array_t const& betaStarCut() const { return betaStarCut_; } - std::unique_ptr getMVA(std::vector const& varList, - std::string const& tmvaWeights, - std::vector const& tmvaSpectators); - private: std::unique_ptr reader_; diff --git a/RecoJets/JetProducers/python/PileupJetIDParams_cfi.py b/RecoJets/JetProducers/python/PileupJetIDParams_cfi.py index cca5b2b2a8223..1d03820f45424 100644 --- a/RecoJets/JetProducers/python/PileupJetIDParams_cfi.py +++ b/RecoJets/JetProducers/python/PileupJetIDParams_cfi.py @@ -13,7 +13,7 @@ cms.PSet( jEtaMin = cms.double(0.), jEtaMax = cms.double(2.5), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_80XvarFix_Eta0to2p5_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_80XvarFix_Eta0to2p5_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -35,7 +35,7 @@ cms.PSet( jEtaMin = cms.double(2.5), jEtaMax = cms.double(2.75), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_80XvarFix_Eta2p5to2p75_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_80XvarFix_Eta2p5to2p75_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -57,7 +57,7 @@ cms.PSet( jEtaMin = cms.double(2.75), jEtaMax = cms.double(3.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_80XvarFix_Eta2p75to3_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_80XvarFix_Eta2p75to3_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -79,7 +79,7 @@ cms.PSet( jEtaMin = cms.double(3.), jEtaMax = cms.double(5.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_80XvarFix_Eta3to5_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_80XvarFix_Eta3to5_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -117,7 +117,7 @@ cms.PSet( jEtaMin = cms.double(0.), jEtaMax = cms.double(2.5), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_80X_Eta0to2p5_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_80X_Eta0to2p5_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -139,7 +139,7 @@ cms.PSet( jEtaMin = cms.double(2.5), jEtaMax = cms.double(2.75), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_80X_Eta2p5to2p75_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_80X_Eta2p5to2p75_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -161,7 +161,7 @@ cms.PSet( jEtaMin = cms.double(2.75), jEtaMax = cms.double(3.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_80X_Eta2p75to3_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_80X_Eta2p75to3_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -183,7 +183,7 @@ cms.PSet( jEtaMin = cms.double(3.), jEtaMax = cms.double(5.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_80X_Eta3to5_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_80X_Eta3to5_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -218,7 +218,7 @@ cms.PSet( jEtaMin = cms.double(0.), jEtaMax = cms.double(2.5), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_76x_Eta0to2p5_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_76x_Eta0to2p5_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -240,7 +240,7 @@ cms.PSet( jEtaMin = cms.double(2.5), jEtaMax = cms.double(2.75), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_76x_Eta2p5to2p75_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_76x_Eta2p5to2p75_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -262,7 +262,7 @@ cms.PSet( jEtaMin = cms.double(2.75), jEtaMax = cms.double(3.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_76x_Eta2p75to3_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_76x_Eta2p75to3_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -284,7 +284,7 @@ cms.PSet( jEtaMin = cms.double(3.), jEtaMax = cms.double(5.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/pileupJetId_76x_Eta3to5_BDT.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/pileupJetId_76x_Eta3to5_BDT.weights.xml.gz"), tmvaVariables = cms.vstring( "nvtx", "dR2Mean" , @@ -320,7 +320,7 @@ cms.PSet( jEtaMin = cms.double(0.), jEtaMax = cms.double(2.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassificationCategory_BDTG.weights_jteta_0_2_newNames.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/TMVAClassificationCategory_BDTG.weights_jteta_0_2_newNames.xml.gz"), tmvaVariables = cms.vstring( "dR2Mean" , "rho" , @@ -343,7 +343,7 @@ cms.PSet( jEtaMin = cms.double(2.), jEtaMax = cms.double(2.5), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassificationCategory_BDTG.weights_jteta_2_2p5_newNames.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/TMVAClassificationCategory_BDTG.weights_jteta_2_2p5_newNames.xml.gz"), tmvaVariables = cms.vstring( "dR2Mean" , "rho" , @@ -366,7 +366,7 @@ cms.PSet( jEtaMin = cms.double(2.5), jEtaMax = cms.double(3.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassificationCategory_BDTG.weights_jteta_2p5_3_newNames.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/TMVAClassificationCategory_BDTG.weights_jteta_2p5_3_newNames.xml.gz"), tmvaVariables = cms.vstring( "dR2Mean" , "rho" , @@ -389,7 +389,7 @@ cms.PSet( jEtaMin = cms.double(3.), jEtaMax = cms.double(5.), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassificationCategory_BDTG.weights_jteta_3_5_newNames.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/TMVAClassificationCategory_BDTG.weights_jteta_3_5_newNames.xml.gz"), tmvaVariables = cms.vstring( "dR2Mean" , "rho" , @@ -421,7 +421,7 @@ impactParTkThreshold = cms.double(1.) , cutBased = cms.bool(False), etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("CondFormats/JetMETObjects/data/TMVAClassificationCategory_JetID_53X_Dec2012.weights.xml"), + tmvaWeights = cms.FileInPath("CondFormats/JetMETObjects/data/TMVAClassificationCategory_JetID_53X_Dec2012.weights.xml"), tmvaMethod = cms.string("JetIDMVAHighPt"), version = cms.int32(-1), tmvaVariables = cms.vstring( @@ -452,8 +452,8 @@ impactParTkThreshold = cms.double(1.) , cutBased = cms.bool(False), etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("CondFormats/JetMETObjects/data/TMVAClassificationCategory_JetID_53X_chs_Dec2012.weights.xml"), - #tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassificationCategory_JetID_53X_chs_Dec2012.weights.xml"), + tmvaWeights = cms.FileInPath("CondFormats/JetMETObjects/data/TMVAClassificationCategory_JetID_53X_chs_Dec2012.weights.xml"), + #tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/TMVAClassificationCategory_JetID_53X_chs_Dec2012.weights.xml"), tmvaMethod = cms.string("JetIDMVAHighPt"), version = cms.int32(-1), tmvaVariables = cms.vstring( @@ -484,7 +484,7 @@ impactParTkThreshold = cms.double(1.) , cutBased = cms.bool(False), etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassificationCategory_JetID_MET_53X_Dec2012.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/TMVAClassificationCategory_JetID_MET_53X_Dec2012.weights.xml.gz"), tmvaMethod = cms.string("JetIDMVAMET"), version = cms.int32(-1), tmvaVariables = cms.vstring( @@ -509,12 +509,12 @@ JetIdParams = met_53x_wp, label = cms.string("met53x") ) -#################################################################################################################### +################################################################################################################## full_5x = cms.PSet( impactParTkThreshold = cms.double(1.) , cutBased = cms.bool(False), etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassificationCategory_JetID_MET_53X_Dec2012.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/TMVAClassificationCategory_JetID_MET_53X_Dec2012.weights.xml.gz"), tmvaMethod = cms.string("BDT_fullPlusRMS"), version = cms.int32(-1), tmvaVariables = cms.vstring( @@ -539,37 +539,12 @@ label = cms.string("full") ) -#################################################################################################################### -simple_5x = cms.PSet( - impactParTkThreshold = cms.double(1.) , - cutBased = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassification_5x_BDT_simpleNoVtxCat.weights.xml.gz"), - tmvaMethod = cms.string("BDT_simpleNoVtxCat"), - version = cms.int32(-1), - tmvaVariables = cms.vstring( - "frac01", - "frac02", - "frac03", - "frac04", - "frac05", - "nvtx", - "beta", - "betaStar", - ), - tmvaSpectators = cms.vstring( - "jetPt", - "jetEta", - ), - JetIdParams = simple_5x_wp, - label = cms.string("simple") - ) - -#################################################################################################################### +################################################################################################################## full_5x_chs = cms.PSet( impactParTkThreshold = cms.double(1.) , cutBased = cms.bool(False), etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassification_5x_BDT_chsFullPlusRMS.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/TMVAClassification_5x_BDT_chsFullPlusRMS.weights.xml.gz"), tmvaMethod = cms.string("BDT_chsFullPlusRMS"), version = cms.int32(-1), tmvaVariables = cms.vstring( @@ -594,87 +569,6 @@ label = cms.string("full") ) -#################################################################################################################### -simple_5x_chs = cms.PSet( - impactParTkThreshold = cms.double(1.) , - cutBased = cms.bool(False), - etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassification_5x_BDT_chsSimpleNoVtxCat.weights.xml.gz"), - tmvaMethod = cms.string("BDT_chsSimpleNoVtxCat"), - version = cms.int32(-1), - tmvaVariables = cms.vstring( - "frac01", - "frac02", - "frac03", - "frac04", - "frac05", - "nvtx", - "beta", - "betaStar", - ), - tmvaSpectators = cms.vstring( - "jetPt", - "jetEta", - ), - JetIdParams = simple_5x_chs_wp, - label = cms.string("simple") - ) - -#################################################################################################################### -full = cms.PSet( - impactParTkThreshold = cms.double(1.) , - cutBased = cms.bool(False), - etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassification_PuJetIdOptMVA.weights.xml.gz"), - tmvaMethod = cms.string("PuJetIdOptMVA"), - version = cms.int32(-1), - tmvaVariables = cms.vstring( - "frac01", - "frac02", - "frac03", - "frac04", - "frac05", - "nvtx", - "nNeutrals", - "beta", - "betaStar", - "dZ", - "nCharged", - ), - tmvaSpectators = cms.vstring( - "jetPt", - "jetEta", - ), - JetIdParams = PuJetIdOptMVA_wp, - label = cms.string("full") - ) - -#################################################################################################################### -simple = cms.PSet( - impactParTkThreshold = cms.double(1.) , - cutBased = cms.bool(False), - etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/TMVAClassification_PuJetIdMinMVA.weights.xml.gz"), - tmvaMethod = cms.string("PuJetIdMinMVA"), - version = cms.int32(-1), - tmvaVariables = cms.vstring( - "frac01", - "frac02", - "frac03", - "frac04", - "frac05", - "beta", - "betaStar", - ), - tmvaSpectators = cms.vstring( - "nvtx", - "jetPt", - "jetEta", - ), - JetIdParams = PuJetIdMinMVA_wp, - label = cms.string("simple") - ) - #################################################################################################################### cutbased = cms.PSet( impactParTkThreshold = cms.double(1.), @@ -683,22 +577,12 @@ label = cms.string("cutbased") ) -#################################################################################################################### -PhilV0 = cms.PSet( - impactParTkThreshold = cms.double(1.) , - cutBased = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/mva_JetID.weights.xml.gz"), - tmvaMethod = cms.string("JetID"), - version = cms.int32(0), - JetIdParams = EmptyJetIdParams -) - #################################################################################################################### PhilV1 = cms.PSet( impactParTkThreshold = cms.double(1.) , cutBased = cms.bool(False), etaBinnedWeights = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/mva_JetID_v1.weights.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/mva_JetID_v1.weights.xml.gz"), tmvaMethod = cms.string("JetID"), version = cms.int32(-1), tmvaVariables = cms.vstring( diff --git a/RecoJets/JetProducers/python/hltPUIdAlgo_cff.py b/RecoJets/JetProducers/python/hltPUIdAlgo_cff.py index 2d6a66ad75d70..6faf9db661002 100644 --- a/RecoJets/JetProducers/python/hltPUIdAlgo_cff.py +++ b/RecoJets/JetProducers/python/hltPUIdAlgo_cff.py @@ -28,7 +28,7 @@ full_74x = cms.PSet( impactParTkThreshold = cms.double(1.) , cutBased = cms.bool(False), - tmvaWeights = cms.string("RecoJets/JetProducers/data/MVAJetPuID.weights_newNames.xml.gz"), + tmvaWeights = cms.FileInPath("RecoJets/JetProducers/data/MVAJetPuID.weights_newNames.xml.gz"), tmvaMethod = cms.string("BDTG"), version = cms.int32(-1), tmvaVariables = cms.vstring( diff --git a/RecoJets/JetProducers/src/MVAJetPuId.cc b/RecoJets/JetProducers/src/MVAJetPuId.cc index 928ed21263714..78e7e8f6468a0 100644 --- a/RecoJets/JetProducers/src/MVAJetPuId.cc +++ b/RecoJets/JetProducers/src/MVAJetPuId.cc @@ -6,7 +6,7 @@ #include "DataFormats/ParticleFlowCandidate/interface/PFCandidate.h" #include "DataFormats/Math/interface/deltaR.h" #include "FWCore/ParameterSet/interface/FileInPath.h" -#include "CommonTools/Utils/interface/TMVAZipReader.h" +#include "CommonTools/MVAUtils/interface/TMVAZipReader.h" #include "TMatrixDSym.h" #include "TMatrixDSymEigen.h" #include "DataFormats/JetReco/interface/PileupJetIdentifier.h" diff --git a/RecoJets/JetProducers/src/PileupJPTJetIdAlgo.cc b/RecoJets/JetProducers/src/PileupJPTJetIdAlgo.cc index 2aea96b4c2a14..15a103521d4b2 100644 --- a/RecoJets/JetProducers/src/PileupJPTJetIdAlgo.cc +++ b/RecoJets/JetProducers/src/PileupJPTJetIdAlgo.cc @@ -38,7 +38,7 @@ #include "Math/GenVector/VectorUtil.h" #include "Math/GenVector/PxPyPzE4D.h" -#include "CommonTools/Utils/interface/TMVAZipReader.h" +#include "CommonTools/MVAUtils/interface/TMVAZipReader.h" #include diff --git a/RecoJets/JetProducers/src/PileupJetIdAlgo.cc b/RecoJets/JetProducers/src/PileupJetIdAlgo.cc index 0821afbc59474..7772ceabd490c 100644 --- a/RecoJets/JetProducers/src/PileupJetIdAlgo.cc +++ b/RecoJets/JetProducers/src/PileupJetIdAlgo.cc @@ -6,11 +6,10 @@ #include "DataFormats/ParticleFlowCandidate/interface/PFCandidate.h" #include "DataFormats/Math/interface/deltaR.h" #include "FWCore/ParameterSet/interface/FileInPath.h" -#include "CommonTools/Utils/interface/TMVAZipReader.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" #include "TMatrixDSym.h" #include "TMatrixDSymEigen.h" -#include "TMVA/MethodBDT.h" #include @@ -30,8 +29,7 @@ PileupJetIdAlgo::AlgoGBRForestsAndConstants::AlgoGBRForestsAndConstants(edm::Par betaStarCut_{} { - std::string tmvaWeights; - std::vector tmvaEtaWeights; + std::vector tmvaEtaWeights; std::vector tmvaSpectators; int version; @@ -41,7 +39,7 @@ PileupJetIdAlgo::AlgoGBRForestsAndConstants::AlgoGBRForestsAndConstants(edm::Par const std::vector& trainings = ps.getParameter >("trainings"); nEtaBins_ = ps.getParameter("nEtaBins"); for (int v = 0; v < nEtaBins_; v++) { - tmvaEtaWeights.push_back( edm::FileInPath(trainings.at(v).getParameter("tmvaWeights")).fullPath() ); + tmvaEtaWeights.push_back(trainings.at(v).getParameter("tmvaWeights")); jEtaMin_.push_back( trainings.at(v).getParameter("jEtaMin") ); jEtaMax_.push_back( trainings.at(v).getParameter("jEtaMax") ); } @@ -49,7 +47,6 @@ PileupJetIdAlgo::AlgoGBRForestsAndConstants::AlgoGBRForestsAndConstants(edm::Par tmvaEtaVariables_.push_back( trainings.at(v).getParameter >("tmvaVariables") ); } } else { - tmvaWeights = edm::FileInPath(ps.getParameter("tmvaWeights")).fullPath(); tmvaVariables_ = ps.getParameter >("tmvaVariables"); } tmvaMethod_ = ps.getParameter("tmvaMethod"); @@ -102,35 +99,14 @@ PileupJetIdAlgo::AlgoGBRForestsAndConstants::AlgoGBRForestsAndConstants(edm::Par if (( ! cutBased_ ) && (runMvas_)) { if (etaBinnedWeights_) { for (int v = 0; v < nEtaBins_; v++) { - etaReader_.push_back(getMVA(tmvaEtaVariables_.at(v), tmvaEtaWeights.at(v), tmvaSpectators)); + etaReader_.push_back(createGBRForest(tmvaEtaWeights.at(v))); } } else { - reader_ = getMVA(tmvaVariables_, tmvaWeights, tmvaSpectators); + reader_ = createGBRForest(ps.getParameter("tmvaWeights")); } } } -std::unique_ptr -PileupJetIdAlgo::AlgoGBRForestsAndConstants::getMVA(std::vector const& varList, - std::string const& tmvaWeights, - std::vector const& tmvaSpectators) { - - // A temporary only to access the variables while calling TMVA AddVariable and TMVA AddSpectator. - PileupJetIdAlgo algo(nullptr); - - TMVA::Reader tmpTMVAReader( "!Color:Silent:!Error" ); - for (auto const& varName : varList) { - if ( tmvaNames_[varName].empty() ) tmvaNames_[varName] = varName; - tmpTMVAReader.AddVariable( varName, std::get(algo.getVariables().at(tmvaNames_[varName])) ); - } - for (auto const& spectatorName : tmvaSpectators) { - if ( tmvaNames_[spectatorName].empty() ) tmvaNames_[spectatorName] = spectatorName; - tmpTMVAReader.AddSpectator( spectatorName, std::get(algo.getVariables().at(tmvaNames_[spectatorName])) ); - } - reco::details::loadTMVAWeights(&tmpTMVAReader, tmvaMethod_, tmvaWeights); - return ( std::make_unique ( dynamic_cast( tmpTMVAReader.FindMVA(tmvaMethod_.c_str()) ) ) ); -} - PileupJetIdAlgo::PileupJetIdAlgo(AlgoGBRForestsAndConstants const* cache) : cache_(cache) { @@ -267,8 +243,6 @@ PileupJetIdentifier PileupJetIdAlgo::computeMva() PileupJetIdentifier PileupJetIdAlgo::computeIdVariables(const reco::Jet * jet, float jec, const reco::Vertex * vtx, const reco::VertexCollection & allvtx, double rho, bool usePuppi) { - - static std::atomic printWarning{10}; // initialize all variables to 0 resetVariables(); diff --git a/RecoParticleFlow/PFProducer/BuildFile.xml b/RecoParticleFlow/PFProducer/BuildFile.xml index a1373e10bf027..c578bd323ab06 100644 --- a/RecoParticleFlow/PFProducer/BuildFile.xml +++ b/RecoParticleFlow/PFProducer/BuildFile.xml @@ -1,5 +1,6 @@ + @@ -17,7 +18,6 @@ - diff --git a/RecoParticleFlow/PFProducer/data/download.url b/RecoParticleFlow/PFProducer/data/download.url deleted file mode 100644 index f10f8dbfca63f..0000000000000 --- a/RecoParticleFlow/PFProducer/data/download.url +++ /dev/null @@ -1,9 +0,0 @@ -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/MVAnalysis_BDT.weights_finalID_hzz-pions.txt -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/MVAnalysis_MLP.weights.txt -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/MVAnalysis_BDT.weights_PfElectrons23Jan.txt -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/MVAnalysis_BDT.weights_PfElectrons23Jan_IntToFloat.txt -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/MVAnalysis_BDT.weights_pfConversionAug0411.txt -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/TMVARegression_BDTG_PFClusterLCorr_14Dec2011.root -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/TMVARegression_BDTG_PFGlobalCorr_14Dec2011.root -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/allX0histos.root -http://cmsdoc.cern.ch/cms/data/CMSSW/RecoParticleFlow/PFProducer/data/TMVARegression_BDTG_PFRes_14Dec2011.root diff --git a/RecoParticleFlow/PFProducer/interface/PFEGammaAlgo.h b/RecoParticleFlow/PFProducer/interface/PFEGammaAlgo.h index 2823a486eb4fb..2263d881a5dbb 100644 --- a/RecoParticleFlow/PFProducer/interface/PFEGammaAlgo.h +++ b/RecoParticleFlow/PFProducer/interface/PFEGammaAlgo.h @@ -41,7 +41,6 @@ #include "CondFormats/ESObjects/interface/ESEEIntercalibConstants.h" #include "CondFormats/ESObjects/interface/ESChannelStatus.h" -#include "TMVA/Reader.h" #include #include @@ -133,9 +132,6 @@ class PFEGammaAlgo { channelStatus_ = channelStatus; } - void setnPU(int nVtx){ - nVtx_=nVtx; - } void setPhotonPrimaryVtx(const reco::Vertex& primary){ cfg_.primaryVtx = & primary; } @@ -145,14 +141,6 @@ class PFEGammaAlgo { std::vector< bool >& active ); - //check candidate validity - bool isEGValidCandidate(const pfEGHelpers::HeavyObjectCache* hoc, - const reco::PFBlockRef& blockRef, - std::vector< bool >& active){ - RunPFEG(hoc,blockRef,active); - return (!egCandidate_.empty()); - }; - //get PFCandidate collection reco::PFCandidateCollection& getCandidates() {return outcands_;} @@ -165,12 +153,6 @@ class PFEGammaAlgo { private: - enum verbosityLevel { - Silent, - Summary, - Chatty - }; - // ------ rewritten basic processing pieces and cleaning algorithms // the output collections reco::PFCandidateCollection outcands_; @@ -185,10 +167,9 @@ class PFEGammaAlgo { // keep a map of pf indices to the splayed block for convenience // sadly we're mashing together two ways of thinking about the block std::vector > _splayedblock; - ElementMap _recoveredlinks; // pre-cleaning for the splayed block - bool isAMuon(const reco::PFBlockElement&); + bool isMuon(const reco::PFBlockElement&); // pre-processing of ECAL clusters near non-primary KF tracks void removeOrLinkECALClustersToKFTracks(); @@ -199,8 +180,6 @@ class PFEGammaAlgo { // flow. // use list for constant-time removals std::list _refinableObjects; - // final list of fully refined objects in this block - reco::PFCandidateCollection _finalCandidates; // functions: // this runs the functions below @@ -271,10 +250,10 @@ class PFEGammaAlgo { // helper functions for that - float calculate_ele_mva(const pfEGHelpers::HeavyObjectCache* hoc, - const ProtoEGObject&, - reco::PFCandidateEGammaExtra&); - void fill_extra_info(const ProtoEGObject&, + float calculateEleMVA(const pfEGHelpers::HeavyObjectCache* hoc, + const ProtoEGObject&, + reco::PFCandidateEGammaExtra&) const; + void fillExtraInfo(const ProtoEGObject&, reco::PFCandidateEGammaExtra&); // ------ end of new stuff @@ -297,91 +276,12 @@ class PFEGammaAlgo { const char *mvaWeightFile_; - // New BDT observables - // Normalization - float lnPt_gsf,Eta_gsf; - - // Pure Tracking observ. - float dPtOverPt_gsf,chi2_gsf,DPtOverPt_gsf, - chi2_kf,DPtOverPt_kf; - // int nhit_gsf,nhit_kf; - float nhit_gsf,nhit_kf; - - // Tracker-Ecal observ. - float EtotPinMode,EGsfPoutMode,EtotBremPinPoutMode; - float DEtaGsfEcalClust; - float SigmaEtaEta; - //int lateBrem,firstBrem,earlyBrem; - float lateBrem,firstBrem,earlyBrem; - float HOverHE,HOverPin; - - bool isvalid_; - - //const std::vector * theGsfElectrons_; - //end of data members from PFElectronAlgo - - - //bool isvalid_; // is set to TRUE when a valid PhotonCandidate is found in a PFBlock - verbosityLevel verbosityLevel_; /* Verbosity Level: - ............... 0: Say nothing at all - ............... 1: Print summary about found PhotonCadidates only - ............... 2: Chatty mode - */ - //FOR SINGLE LEG MVA: - const reco::Vertex * primaryVertex_; - //TMVA::Reader *tmvaReader_; - const GBRForest *ReaderLC_; - const GBRForest *ReaderGC_; - const GBRForest *ReaderRes_; - - const GBRForest *ReaderLCEB_; - const GBRForest *ReaderLCEE_; - const GBRForest *ReaderGCEB_; - const GBRForest *ReaderGCEEhR9_; - const GBRForest *ReaderGCEElR9_; - -// boost::shared_ptr thePFEnergyCalibration_; - std::vectormatch_ind; - //std::auto_ptr< reco::PFCandidateCollection > permElectronCandidates_; - - std::vector< reco::PFCandidate >permElectronCandidates_; - float nlost, nlayers; - float chi2, STIP, del_phi,HoverPt, EoverPt, track_pt; - double mvaValue; - //for Cluster Shape Calculations: - float e5x5Map[5][5]; - - //For Local Containment Corrections: - float CrysPhi_, CrysEta_, VtxZ_, ClusPhi_, ClusEta_, - ClusR9_, Clus5x5ratio_, PFCrysEtaCrack_, logPFClusE_, e3x3_; - int CrysIPhi_, CrysIEta_; - float CrysX_, CrysY_; - float EB; - //Cluster Shapes: - float eSeed_, e1x3_,e3x1_, e1x5_, e2x5Top_, e2x5Bottom_, e2x5Left_, e2x5Right_ ; - float etop_, ebottom_, eleft_, eright_; - float e2x5Max_; - //For Global Corrections: - float PFPhoEta_, PFPhoPhi_, PFPhoR9_, PFPhoR9Corr_, SCPhiWidth_, SCEtaWidth_, PFPhoEt_, RConv_, PFPhoEtCorr_, PFPhoE_, PFPhoECorr_, MustE_, E3x3_; - float dEta_, dPhi_, LowClusE_, RMSAll_, RMSMust_, nPFClus_; - float TotPS1_, TotPS2_; - float nVtx_; - //for PileUP - float excluded_, Mustache_EtRatio_, Mustache_Et_out_; const ESChannelStatus* channelStatus_; - std::vector AddFromElectron_; - - reco::PFCandidateCollection egCandidate_; -// std::vector ebeeCluster_; -// std::vector esCluster_; -// std::vector sCluster_; - reco::PFCandidateEGammaExtraCollection egExtra_; - - float EvaluateSingleLegMVA(const pfEGHelpers::HeavyObjectCache* hoc, + float evaluateSingleLegMVA(const pfEGHelpers::HeavyObjectCache* hoc, const reco::PFBlockRef& blockref, - const reco::Vertex& primaryvtx, - unsigned int track_index); + const reco::Vertex& primaryVtx, + unsigned int trackIndex); }; #endif diff --git a/RecoParticleFlow/PFProducer/interface/PFEGammaHeavyObjectCache.h b/RecoParticleFlow/PFProducer/interface/PFEGammaHeavyObjectCache.h index 0a71d26ca16cb..c0e1adc04f88e 100644 --- a/RecoParticleFlow/PFProducer/interface/PFEGammaHeavyObjectCache.h +++ b/RecoParticleFlow/PFProducer/interface/PFEGammaHeavyObjectCache.h @@ -2,23 +2,19 @@ #define __RecoParticleFlow_PFProducer_pfEGHelpersHeavyObjectCache_h__ #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "CondFormats/EgammaObjects/interface/GBRForest.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" #include namespace pfEGHelpers { class HeavyObjectCache { public: - HeavyObjectCache(const edm::ParameterSet&); - std::unique_ptr gbrEle_; - std::unique_ptr gbrSingleLeg_; - private: - // for electron mva - float lnPt_gsf, Eta_gsf, dPtOverPt_gsf, DPtOverPt_gsf, chi2_gsf, nhit_kf; - float chi2_kf, EtotPinMode, EGsfPoutMode, EtotBremPinPoutMode, DEtaGsfEcalClust; - float SigmaEtaEta, HOverHE, lateBrem, firstBrem; - // for single leg mva - float nlost, nlayers; - float chi2, STIP, del_phi,HoverPt, EoverPt, track_pt; + HeavyObjectCache(const edm::ParameterSet& conf) + : gbrEle_ (createGBRForest(conf.getParameter("pf_electronID_mvaWeightFile"))) + , gbrSingleLeg_ (createGBRForest(conf.getParameter("pf_convID_mvaWeightFile"))) + {} + + const std::unique_ptr gbrEle_; + const std::unique_ptr gbrSingleLeg_; }; } diff --git a/RecoParticleFlow/PFProducer/plugins/BuildFile.xml b/RecoParticleFlow/PFProducer/plugins/BuildFile.xml index 8ab007a6acdb4..c000d79be04e6 100644 --- a/RecoParticleFlow/PFProducer/plugins/BuildFile.xml +++ b/RecoParticleFlow/PFProducer/plugins/BuildFile.xml @@ -91,7 +91,5 @@ - - diff --git a/RecoParticleFlow/PFProducer/plugins/PFEGammaProducer.cc b/RecoParticleFlow/PFProducer/plugins/PFEGammaProducer.cc index 0061a097b609d..5e49dfd63992b 100644 --- a/RecoParticleFlow/PFProducer/plugins/PFEGammaProducer.cc +++ b/RecoParticleFlow/PFProducer/plugins/PFEGammaProducer.cc @@ -363,9 +363,6 @@ PFEGammaProducer::setPFEGParameters(PFEGammaAlgo::PFEGConfigInfo& cfg) { void PFEGammaProducer::setPFVertexParameters(const reco::VertexCollection* primaryVertices) { - //Now find the primary vertex! - int nVtx=primaryVertices->size(); - pfeg_->setnPU(nVtx); primaryVertex_ = primaryVertices->front(); for (unsigned short i=0 ;isize();++i) { @@ -451,8 +448,8 @@ void PFEGammaProducer::fillDescriptions(edm::ConfigurationDescriptions& descript desc.add ("EEtoPS_source", edm::InputTag("particleFlowClusterECAL"))->setComment("EE to PS association"); desc.add ("vertexCollection", edm::InputTag("offlinePrimaryVertices")); desc.add("pf_electronID_mvaWeightFile", - edm::FileInPath("RecoParticleFlow/PFProducer/data/MVAnalysis_BDT.weights_PfElectrons23Jan_IntToFloat.txt")); + edm::FileInPath("RecoParticleFlow/PFProducer/data/PfElectrons23Jan_BDT.weights.xml.gz")); desc.add("pf_convID_mvaWeightFile", - edm::FileInPath("RecoParticleFlow/PFProducer/data/MVAnalysis_BDT.weights_pfConversionAug0411.txt")); + edm::FileInPath("RecoParticleFlow/PFProducer/data/pfConversionAug0411_BDT.weights.xml.gz")); descriptions.add("particleFlowEGamma", desc); } diff --git a/RecoParticleFlow/PFProducer/src/PFEGammaAlgo.cc b/RecoParticleFlow/PFProducer/src/PFEGammaAlgo.cc index 3d1d442a685b1..7ec7df0d0eb2f 100644 --- a/RecoParticleFlow/PFProducer/src/PFEGammaAlgo.cc +++ b/RecoParticleFlow/PFProducer/src/PFEGammaAlgo.cc @@ -33,7 +33,6 @@ #include #include #include -#include "TMVA/MethodBDT.h" // include combinations header (not yet included in boost) #include "combination.hpp" @@ -598,28 +597,7 @@ namespace { } PFEGammaAlgo:: -PFEGammaAlgo(const PFEGammaAlgo::PFEGConfigInfo& cfg) : - cfg_(cfg), - isvalid_(false), - verbosityLevel_(Silent), - nlost(0.0), nlayers(0.0), - chi2(0.0), STIP(0.0), del_phi(0.0),HoverPt(0.0), EoverPt(0.0), track_pt(0.0), - mvaValue(0.0), - CrysPhi_(0.0), CrysEta_(0.0), VtxZ_(0.0), ClusPhi_(0.0), ClusEta_(0.0), - ClusR9_(0.0), Clus5x5ratio_(0.0), PFCrysEtaCrack_(0.0), logPFClusE_(0.0), e3x3_(0.0), - CrysIPhi_(0), CrysIEta_(0), - CrysX_(0.0), CrysY_(0.0), - EB(0.0), - eSeed_(0.0), e1x3_(0.0),e3x1_(0.0), e1x5_(0.0), e2x5Top_(0.0), e2x5Bottom_(0.0), e2x5Left_(0.0), e2x5Right_(0.0), - etop_(0.0), ebottom_(0.0), eleft_(0.0), eright_(0.0), - e2x5Max_(0.0), - PFPhoEta_(0.0), PFPhoPhi_(0.0), PFPhoR9_(0.0), PFPhoR9Corr_(0.0), SCPhiWidth_(0.0), SCEtaWidth_(0.0), - PFPhoEt_(0.0), RConv_(0.0), PFPhoEtCorr_(0.0), PFPhoE_(0.0), PFPhoECorr_(0.0), MustE_(0.0), E3x3_(0.0), - dEta_(0.0), dPhi_(0.0), LowClusE_(0.0), RMSAll_(0.0), RMSMust_(0.0), nPFClus_(0.0), - TotPS1_(0.0), TotPS2_(0.0), - nVtx_(0.0), - excluded_(0.0), Mustache_EtRatio_(0.0), Mustache_Et_out_(0.0), - channelStatus_(nullptr) +PFEGammaAlgo(const PFEGammaAlgo::PFEGConfigInfo& cfg) : cfg_(cfg) {} void PFEGammaAlgo::RunPFEG(const pfEGHelpers::HeavyObjectCache* hoc, @@ -629,74 +607,67 @@ void PFEGammaAlgo::RunPFEG(const pfEGHelpers::HeavyObjectCache* hoc, fifthStepKfTrack_.clear(); convGsfTrack_.clear(); - egCandidate_.clear(); - egExtra_.clear(); - - // define how much is printed out for debugging. - // ... will be setable via CFG file parameter - verbosityLevel_ = Chatty; // Chatty mode. - buildAndRefineEGObjects(hoc, blockRef); } -float PFEGammaAlgo:: -EvaluateSingleLegMVA(const pfEGHelpers::HeavyObjectCache* hoc, - const reco::PFBlockRef& blockref, - const reco::Vertex& primaryvtx, - unsigned int track_index) { - const reco::PFBlock& block = *blockref; +float PFEGammaAlgo::evaluateSingleLegMVA(const pfEGHelpers::HeavyObjectCache* hoc, + const reco::PFBlockRef& blockRef, + const reco::Vertex& primaryVtx, + unsigned int trackIndex) +{ + const reco::PFBlock& block = *blockRef; const edm::OwnVector< reco::PFBlockElement >& elements = block.elements(); //use this to store linkdata in the associatedElements function below const PFBlock::LinkData& linkData = block.linkData(); //calculate MVA Variables - chi2=elements[track_index].trackRef()->chi2()/elements[track_index].trackRef()->ndof(); - nlost=elements[track_index].trackRef()->hitPattern().numberOfLostHits(HitPattern::MISSING_INNER_HITS); - nlayers=elements[track_index].trackRef()->hitPattern().trackerLayersWithMeasurement(); - track_pt=elements[track_index].trackRef()->pt(); - STIP=elements[track_index].trackRefPF()->STIP(); + const float chi2 = elements[trackIndex].trackRef()->chi2()/elements[trackIndex].trackRef()->ndof(); + const float nlost = elements[trackIndex].trackRef()->hitPattern().numberOfLostHits(HitPattern::MISSING_INNER_HITS); + const float nLayers = elements[trackIndex].trackRef()->hitPattern().trackerLayersWithMeasurement(); + const float trackPt = elements[trackIndex].trackRef()->pt(); + const float stip = elements[trackIndex].trackRefPF()->STIP(); - float linked_e=0; - float linked_h=0; + float linkedE = 0; + float linkedH = 0; std::multimap ecalAssoTrack; - block.associatedElements( track_index,linkData, - ecalAssoTrack, - reco::PFBlockElement::ECAL, - reco::PFBlock::LINKTEST_ALL ); + block.associatedElements(trackIndex,linkData, + ecalAssoTrack, + reco::PFBlockElement::ECAL, + reco::PFBlock::LINKTEST_ALL ); std::multimap hcalAssoTrack; - block.associatedElements( track_index,linkData, - hcalAssoTrack, - reco::PFBlockElement::HCAL, - reco::PFBlock::LINKTEST_ALL ); - if(!ecalAssoTrack.empty()) { - for(std::multimap::iterator itecal = ecalAssoTrack.begin(); - itecal != ecalAssoTrack.end(); ++itecal) { - linked_e=linked_e+elements[itecal->second].clusterRef()->energy(); + block.associatedElements(trackIndex,linkData, + hcalAssoTrack, + reco::PFBlockElement::HCAL, + reco::PFBlock::LINKTEST_ALL ); + if(!ecalAssoTrack.empty()) + { + for (auto & itecal : ecalAssoTrack) + { + linkedE = linkedE+elements[itecal.second].clusterRef()->energy(); } } - if(!hcalAssoTrack.empty()) { - for(std::multimap::iterator ithcal = hcalAssoTrack.begin(); - ithcal != hcalAssoTrack.end(); ++ithcal) { - linked_h=linked_h+elements[ithcal->second].clusterRef()->energy(); + if(!hcalAssoTrack.empty()) + { + for (auto & ithcal : hcalAssoTrack) + { + linkedH = linkedH+elements[ithcal.second].clusterRef()->energy(); } } - EoverPt=linked_e/elements[track_index].trackRef()->pt(); - HoverPt=linked_h/elements[track_index].trackRef()->pt(); - GlobalVector rvtx(elements[track_index].trackRef()->innerPosition().X()-primaryvtx.x(), - elements[track_index].trackRef()->innerPosition().Y()-primaryvtx.y(), - elements[track_index].trackRef()->innerPosition().Z()-primaryvtx.z()); - double vtx_phi=rvtx.phi(); + const float eOverPt = linkedE / elements[trackIndex].trackRef()->pt(); + const float hOverPt = linkedH / elements[trackIndex].trackRef()->pt(); + GlobalVector rvtx(elements[trackIndex].trackRef()->innerPosition().X()-primaryVtx.x(), + elements[trackIndex].trackRef()->innerPosition().Y()-primaryVtx.y(), + elements[trackIndex].trackRef()->innerPosition().Z()-primaryVtx.z()); + double vtxPhi = rvtx.phi(); //delta Phi between conversion vertex and track - del_phi=fabs(deltaPhi(vtx_phi, elements[track_index].trackRef()->innerMomentum().Phi())); + float delPhi = fabs(deltaPhi(vtxPhi, elements[trackIndex].trackRef()->innerMomentum().Phi())); - float vars[] = { del_phi, nlayers, chi2, EoverPt, - HoverPt, track_pt, STIP, nlost }; + float vars[] = { delPhi, nLayers, chi2, eOverPt, + hOverPt, trackPt, stip, nlost }; - mvaValue = hoc->gbrSingleLeg_->GetAdaBoostClassifier(vars); - - return mvaValue; + return hoc->gbrSingleLeg_->GetAdaBoostClassifier(vars); } -bool PFEGammaAlgo::isAMuon(const reco::PFBlockElement& pfbe) { +bool PFEGammaAlgo::isMuon(const reco::PFBlockElement& pfbe) { switch( pfbe.type() ) { case reco::PFBlockElement::GSF: { @@ -728,9 +699,7 @@ void PFEGammaAlgo::buildAndRefineEGObjects(const pfEGHelpers::HeavyObjectCache* LOGVERB("PFEGammaAlgo") << "Resetting PFEGammaAlgo for new block and running!" << std::endl; _splayedblock.clear(); - _recoveredlinks.clear(); _refinableObjects.clear(); - _finalCandidates.clear(); _splayedblock.resize(13); // make sure that we always have the HGCAL entry _currentblock = block; @@ -739,7 +708,7 @@ void PFEGammaAlgo::buildAndRefineEGObjects(const pfEGHelpers::HeavyObjectCache* LOGVERB("PFEGammaAlgo") << "Splaying block" << std::endl; //unwrap the PF block into a fast access map for( const auto& pfelement : _currentblock->elements() ) { - if( isAMuon(pfelement) ) continue; // don't allow muons in our element list + if( isMuon(pfelement) ) continue; // don't allow muons in our element list if (pfelement.type() == PFBlockElement::HCAL && pfelement.clusterRef()->flags() & reco::CaloCluster::badHcalMarker) continue; // skip also dead area markers for now const size_t itype = (size_t)pfelement.type(); @@ -896,7 +865,6 @@ initializeProtoCands(std::list& egobjs) { reco::GsfTrackRef gsfref_forextra; reco::TrackExtraRef gsftrk_extra; reco::ElectronSeedRef theseedref; - std::list::iterator objsbegin, objsend; for( auto& element : _splayedblock[PFBlockElement::GSF] ) { LOGDRESSED("PFEGammaAlgo") << "creating GSF-based proto-object" << std::endl @@ -963,8 +931,8 @@ initializeProtoCands(std::list& egobjs) { << " isNonnull: " << fromGSF.electronSeed.isNonnull() << std::endl; SeedMatchesToProtoObject sctoseedmatch(fromGSF.electronSeed); - objsbegin = _refinableObjects.begin(); - objsend = _refinableObjects.end(); + std::list::iterator objsbegin = _refinableObjects.begin(); + std::list::iterator objsend = _refinableObjects.end(); // this auto is a std::list::iterator auto clusmatch = std::find_if(objsbegin,objsend,sctoseedmatch); if( clusmatch != objsend ) { @@ -1338,14 +1306,7 @@ initializeProtoCands(std::list& egobjs) { // if associated to good non-GSF matched track remove this cluster if( PFTrackAlgoTools::isGoodForEGMPrimary(trackref->algo()) && nexhits == 0 && fromprimaryvertex ) { closestECAL.second = false; - } else { // otherwise associate the cluster and KF track - _recoveredlinks.emplace_back(closestECAL.first,kftrack.first); - _recoveredlinks.emplace_back(kftrack.first,closestECAL.first); } - - - - } } // found a good closest ECAL match } // no GSF track matched to KF @@ -1783,7 +1744,7 @@ linkRefinableObjectECALToSingleLegConv(const pfEGHelpers::HeavyObjectCache* hoc, } // go through non-conv-identified kfs and check MVA to add conversions for( auto kf = notconvkf; kf != notmatchedkf; ++kf ) { - float mvaval = EvaluateSingleLegMVA(hoc,_currentblock, + float mvaval = evaluateSingleLegMVA(hoc,_currentblock, *cfg_.primaryVtx, kf->first->index()); if(mvaval > cfg_.mvaConvCut) { @@ -1902,7 +1863,7 @@ fillPFCandidates(const pfEGHelpers::HeavyObjectCache* hoc, //by storing 3.0 + mvaval float mvaval = ( mvavalmapped != RO.singleLegConversionMvaMap.end() ? mvavalmapped->second : - 3.0 + EvaluateSingleLegMVA(hoc,_currentblock, + 3.0 + evaluateSingleLegMVA(hoc,_currentblock, *cfg_.primaryVtx, kf->index()) ); @@ -1959,179 +1920,179 @@ fillPFCandidates(const pfEGHelpers::HeavyObjectCache* hoc, cand.setP4(p4); cand.setPositionAtECALEntrance(kf->positionAtECALEntrance()); } - const float ele_mva_value = calculate_ele_mva(hoc,RO,xtra); - fill_extra_info(RO,xtra); - //std::cout << "PFEG ele_mva: " << ele_mva_value << std::endl; - xtra.setMVA(ele_mva_value); - cand.set_mva_e_pi(ele_mva_value); + const float eleMVAValue = calculateEleMVA(hoc,RO,xtra); + fillExtraInfo(RO,xtra); + //std::cout << "PFEG eleMVA: " << eleMVAValue << std::endl; + xtra.setMVA(eleMVAValue); + cand.set_mva_e_pi(eleMVAValue); egcands.push_back(cand); egxs.push_back(xtra); } } -float PFEGammaAlgo:: -calculate_ele_mva(const pfEGHelpers::HeavyObjectCache* hoc, - const PFEGammaAlgo::ProtoEGObject& RO, - reco::PFCandidateEGammaExtra& xtra) { - if( RO.primaryGSFs.empty() ) return -2.0f; - const PFGSFElement* gsfElement = RO.primaryGSFs.front().first; - const PFKFElement* kfElement = nullptr; - if( !RO.primaryKFs.empty() ) kfElement = RO.primaryKFs.front().first; - reco::GsfTrackRef RefGSF= gsfElement->GsftrackRef(); - reco::TrackRef RefKF; - constexpr float m_el = 0.000511; - const double Ein_gsf = std::hypot(RefGSF->pMode(),m_el); - double deta_gsfecal = 1e6; - double sigmaEtaEta = 1e-14; - const double Ene_hcalgsf = std::accumulate(RO.hcalClusters.begin(), - RO.hcalClusters.end(), - 0.0, - [](const double a, - const PFClusterFlaggedElement& b) - { return a + b.first->clusterRef()->energy(); } - ); - if( !RO.primaryKFs.empty() ) { - RefKF = RO.primaryKFs.front().first->trackRef(); +float PFEGammaAlgo::calculateEleMVA(const pfEGHelpers::HeavyObjectCache* hoc, + const PFEGammaAlgo::ProtoEGObject& ro, + reco::PFCandidateEGammaExtra& xtra) const +{ + if( ro.primaryGSFs.empty() ) + { + return -2.0f; + } + const PFGSFElement* gsfElement = ro.primaryGSFs.front().first; + const PFKFElement* kfElement = nullptr; + if( !ro.primaryKFs.empty() ) + { + kfElement = ro.primaryKFs.front().first; + } + reco::GsfTrackRef refGsf = gsfElement->GsftrackRef(); + reco::TrackRef refKf; + constexpr float mEl = 0.000511; + const double eInGsf = std::hypot(refGsf->pMode(),mEl); + double dEtGsfEcal = 1e6; + double sigmaEtaEta = 1e-14; + const double eneHcalGsf = std::accumulate( + ro.hcalClusters.begin(), + ro.hcalClusters.end(), + 0.0, + [](const double a, const PFClusterFlaggedElement& b) + { return a + b.first->clusterRef()->energy(); } + ); + if( !ro.primaryKFs.empty() ) + { + refKf = ro.primaryKFs.front().first->trackRef(); } - const double Eout_gsf = gsfElement->Pout().t(); - const double Etaout_gsf = gsfElement->positionAtECALEntrance().eta(); - double FirstEcalGsfEnergy(0.0), OtherEcalGsfEnergy(0.0), EcalBremEnergy(0.0); + const double eOutGsf = gsfElement->Pout().t(); + const double etaOutGsf = gsfElement->positionAtECALEntrance().eta(); + double firstEcalGsfEnergy {0.0}; + double otherEcalGsfEnergy {0.0}; + double ecalBremEnergy {0.0}; //shower shape of cluster closest to gsf track - std::vector gsfcluster; - for( const auto& ecal : RO.ecalclusters ) { + std::vector gsfCluster; + for( const auto& ecal : ro.ecalclusters ) + { const double cenergy = ecal.first->clusterRef()->correctedEnergy(); ElementMap::value_type gsfToEcal(gsfElement,ecal.first); ElementMap::value_type kfToEcal(kfElement,ecal.first); - bool hasgsf = - ( std::find(RO.localMap.begin(), RO.localMap.end(), gsfToEcal) == - RO.localMap.end() ); - bool haskf = - ( std::find(RO.localMap.begin(), RO.localMap.end(), kfToEcal) == - RO.localMap.end() ); + bool hasgsf = ( std::find(ro.localMap.begin(), ro.localMap.end(), gsfToEcal) == ro.localMap.end() ); + bool haskf = ( std::find(ro.localMap.begin(), ro.localMap.end(), kfToEcal) == ro.localMap.end() ); bool hasbrem = false; - for( const auto& brem : RO.brems ) { + for( const auto& brem : ro.brems ) + { ElementMap::value_type bremToEcal(brem.first,ecal.first); - if( std::find(RO.localMap.begin(), RO.localMap.end(), bremToEcal) != - RO.localMap.end() ) { - hasbrem = true; + if( std::find(ro.localMap.begin(), ro.localMap.end(), bremToEcal) != ro.localMap.end() ) + { + hasbrem = true; } } - if( hasbrem && ecal.first != RO.electronClusters[0] ) { - EcalBremEnergy += cenergy; + if( hasbrem && ecal.first != ro.electronClusters[0] ) + { + ecalBremEnergy += cenergy; } - if( !hasbrem && ecal.first != RO.electronClusters[0] ) { - if( hasgsf ) OtherEcalGsfEnergy += cenergy; - if( haskf ) EcalBremEnergy += cenergy; // from conv. brem! - if( !(hasgsf || haskf) ) OtherEcalGsfEnergy += cenergy; // stuff from SC + if( !hasbrem && ecal.first != ro.electronClusters[0] ) + { + if( hasgsf ) otherEcalGsfEnergy += cenergy; + if( haskf ) ecalBremEnergy += cenergy; // from conv. brem! + if( !(hasgsf || haskf) ) otherEcalGsfEnergy += cenergy; // stuff from SC } } - if( RO.electronClusters[0] ) { - reco::PFClusterRef cref = RO.electronClusters[0]->clusterRef(); - xtra.setGsfElectronClusterRef(_currentblock,*(RO.electronClusters[0])); - FirstEcalGsfEnergy = cref->correctedEnergy(); - deta_gsfecal = cref->positionREP().eta() - Etaout_gsf; - gsfcluster.push_back(&*cref); - PFClusterWidthAlgo pfwidth(gsfcluster); + if( ro.electronClusters[0] ) + { + reco::PFClusterRef cref = ro.electronClusters[0]->clusterRef(); + xtra.setGsfElectronClusterRef(_currentblock,*(ro.electronClusters[0])); + firstEcalGsfEnergy = cref->correctedEnergy(); + dEtGsfEcal = cref->positionREP().eta() - etaOutGsf; + gsfCluster.push_back(&*cref); + PFClusterWidthAlgo pfwidth(gsfCluster); sigmaEtaEta = pfwidth.pflowSigmaEtaEta(); } // brem sequence information - lateBrem = firstBrem = earlyBrem = -1.0f; - if(RO.nBremsWithClusters > 0) { - if (RO.lateBrem == 1) lateBrem = 1.0f; - else lateBrem = 0.0f; - firstBrem = RO.firstBrem; - if(RO.firstBrem < 4) earlyBrem = 1.0f; - else earlyBrem = 0.0f; + float firstBrem {-1.0f}; + float earlyBrem {-1.0f}; + float lateBrem {-1.0f}; + if(ro.nBremsWithClusters > 0) + { + firstBrem = ro.firstBrem; + earlyBrem = ro.firstBrem < 4 ? 1.0f : 0.0f; + lateBrem = ro.lateBrem == 1 ? 1.0f : 0.0f; } xtra.setEarlyBrem(earlyBrem); xtra.setLateBrem(lateBrem); - if( FirstEcalGsfEnergy > 0.0 ) { - if( RefGSF.isNonnull() ) { + if( firstEcalGsfEnergy > 0.0 ) + { + if( refGsf.isNonnull() ) + { xtra.setGsfTrackPout(gsfElement->Pout()); // normalization observables - const float Pt_gsf = RefGSF->ptMode(); - lnPt_gsf = std::log(Pt_gsf); - Eta_gsf = RefGSF->etaMode(); + const float ptGsf = refGsf->ptMode(); + const float etaGsf = refGsf->etaMode(); // tracking observables - const double ptModeErrorGsf = RefGSF->ptModeError(); - dPtOverPt_gsf = (ptModeErrorGsf > 0. ? ptModeErrorGsf/Pt_gsf : 1.0); - nhit_gsf = RefGSF->hitPattern().trackerLayersWithMeasurement(); - chi2_gsf = RefGSF->normalizedChi2(); - DPtOverPt_gsf = (Pt_gsf - gsfElement->Pout().pt())/Pt_gsf; + const double ptModeErrorGsf = refGsf->ptModeError(); + float ptModeErrOverPtGsf = (ptModeErrorGsf > 0. ? ptModeErrorGsf/ptGsf : 1.0); + float chi2Gsf = refGsf->normalizedChi2(); + float dPtOverPtGsf = (ptGsf - gsfElement->Pout().pt())/ptGsf; // kalman filter vars - nhit_kf = 0; - chi2_kf = -0.01; - DPtOverPt_kf = -0.01; - if( RefKF.isNonnull() ) { - nhit_kf = RefKF->hitPattern().trackerLayersWithMeasurement(); - chi2_kf = RefKF->normalizedChi2(); - // not used for moment, weird behavior of variable - // DPtOverPt_kf = (RefKF->pt() - RefKF->outerPt())/RefKF->pt(); - } + float nHitKf = refKf.isNonnull() ? refKf->hitPattern().trackerLayersWithMeasurement() : 0; + float chi2Kf = refKf.isNonnull() ? refKf->normalizedChi2() : -0.01; + //tracker + calorimetry observables - const double EcalETot = - (FirstEcalGsfEnergy+OtherEcalGsfEnergy+EcalBremEnergy); - EtotPinMode = EcalETot / Ein_gsf; - EGsfPoutMode = FirstEcalGsfEnergy / Eout_gsf; - EtotBremPinPoutMode = ( (EcalBremEnergy + OtherEcalGsfEnergy) / - (Ein_gsf - Eout_gsf) ); - DEtaGsfEcalClust = std::abs(deta_gsfecal); - SigmaEtaEta = std::log(sigmaEtaEta); - xtra.setDeltaEta(DEtaGsfEcalClust); + float eTotPinMode = (firstEcalGsfEnergy+otherEcalGsfEnergy+ecalBremEnergy)/ eInGsf; + float eGsfPoutMode = firstEcalGsfEnergy / eOutGsf; + float eTotBremPinPoutMode = (ecalBremEnergy + otherEcalGsfEnergy) / (eInGsf - eOutGsf); + float dEtaGsfEcalClust = std::abs(dEtGsfEcal); + float logSigmaEtaEta = std::log(sigmaEtaEta); + float hOverHe = eneHcalGsf/(eneHcalGsf + firstEcalGsfEnergy); + + xtra.setDeltaEta(dEtaGsfEcalClust); xtra.setSigmaEtaEta(sigmaEtaEta); - - HOverHE = Ene_hcalgsf/(Ene_hcalgsf + FirstEcalGsfEnergy); - HOverPin = Ene_hcalgsf / Ein_gsf; - xtra.setHadEnergy(Ene_hcalgsf); + xtra.setHadEnergy(eneHcalGsf); // Apply bounds to variables and calculate MVA - DPtOverPt_gsf = std::max(DPtOverPt_gsf,-0.2f); - DPtOverPt_gsf = std::min(DPtOverPt_gsf,1.0f); - dPtOverPt_gsf = std::min(dPtOverPt_gsf,0.3f); - chi2_gsf = std::min(chi2_gsf,10.0f); - DPtOverPt_kf = std::max(DPtOverPt_kf,-0.2f); - DPtOverPt_kf = std::min(DPtOverPt_kf,1.0f); - chi2_kf = std::min(chi2_kf,10.0f); - EtotPinMode = std::max(EtotPinMode,0.0f); - EtotPinMode = std::min(EtotPinMode,5.0f); - EGsfPoutMode = std::max(EGsfPoutMode,0.0f); - EGsfPoutMode = std::min(EGsfPoutMode,5.0f); - EtotBremPinPoutMode = std::max(EtotBremPinPoutMode,0.0f); - EtotBremPinPoutMode = std::min(EtotBremPinPoutMode,5.0f); - DEtaGsfEcalClust = std::min(DEtaGsfEcalClust,0.1f); - SigmaEtaEta = std::max(SigmaEtaEta,-14.0f); - HOverPin = std::max(HOverPin,0.0f); - HOverPin = std::min(HOverPin,5.0f); - /* - std::cout << " **** PFEG BDT observables ****" << endl; - std::cout << " < Normalization > " << endl; - std::cout << " Pt_gsf " << Pt_gsf << " Pin " << Ein_gsf - << " Pout " << Eout_gsf << " Eta_gsf " << Eta_gsf << endl; - std::cout << " < PureTracking > " << endl; - std::cout << " dPtOverPt_gsf " << dPtOverPt_gsf - << " DPtOverPt_gsf " << DPtOverPt_gsf - << " chi2_gsf " << chi2_gsf - << " nhit_gsf " << nhit_gsf - << " DPtOverPt_kf " << DPtOverPt_kf - << " chi2_kf " << chi2_kf - << " nhit_kf " << nhit_kf << endl; - std::cout << " < track-ecal-hcal-ps " << endl; - std::cout << " EtotPinMode " << EtotPinMode - << " EGsfPoutMode " << EGsfPoutMode - << " EtotBremPinPoutMode " << EtotBremPinPoutMode - << " DEtaGsfEcalClust " << DEtaGsfEcalClust - << " SigmaEtaEta " << SigmaEtaEta - << " HOverHE " << HOverHE << " Hcal energy " << Ene_hcalgsf - << " HOverPin " << HOverPin - << " lateBrem " << lateBrem - << " firstBrem " << firstBrem << endl; - */ - - float vars[] = { lnPt_gsf, Eta_gsf, dPtOverPt_gsf, DPtOverPt_gsf, chi2_gsf, - nhit_kf, chi2_kf, EtotPinMode, EGsfPoutMode, EtotBremPinPoutMode, - DEtaGsfEcalClust, SigmaEtaEta, HOverHE, lateBrem, firstBrem }; + dPtOverPtGsf = std::clamp(dPtOverPtGsf, -0.2f, 1.0f); + ptModeErrOverPtGsf = std::min(ptModeErrOverPtGsf,0.3f); + chi2Gsf = std::min(chi2Gsf,10.0f); + chi2Kf = std::min(chi2Kf,10.0f); + eTotPinMode = std::clamp(eTotPinMode,0.0f, 5.0f); + eGsfPoutMode = std::clamp(eGsfPoutMode,0.0f, 5.0f); + eTotBremPinPoutMode = std::clamp(eTotBremPinPoutMode,0.0f, 5.0f); + dEtaGsfEcalClust = std::min(dEtaGsfEcalClust,0.1f); + logSigmaEtaEta = std::max(logSigmaEtaEta,-14.0f); + + // not used for moment, weird behavior of variable + //float dPtOverPtKf = refKf.isNonnull() ? (refKf->pt() - refKf->outerPt())/refKf->pt() : -0.01; + //dPtOverPtKf = std::clamp(dPtOverPtKf,-0.2f, 1.0f); + +/* + * To be used for debugging: + * pretty-print the PFEgamma electron MVA input variables + * + * std::cout << " **** PFEG BDT observables ****" << endl; + * std::cout << " < Normalization > " << endl; + * std::cout << " ptGsf " << ptGsf << " Pin " << eInGsf + * << " Pout " << eOutGsf << " etaGsf " << etaGsf << endl; + * std::cout << " < PureTracking > " << endl; + * std::cout << " ptModeErrOverPtGsf " << ptModeErrOverPtGsf + * << " dPtOverPtGsf " << dPtOverPtGsf + * << " chi2Gsf " << chi2Gsf + * << " nhit_gsf " << nhit_gsf + * << " dPtOverPtKf " << dPtOverPtKf + * << " chi2Kf " << chi2Kf + * << " nHitKf " << nHitKf << endl; + * std::cout << " < track-ecal-hcal-ps " << endl; + * std::cout << " eTotPinMode " << eTotPinMode + * << " eGsfPoutMode " << eGsfPoutMode + * << " eTotBremPinPoutMode " << eTotBremPinPoutMode + * << " dEtaGsfEcalClust " << dEtaGsfEcalClust + * << " logSigmaEtaEta " << logSigmaEtaEta + * << " hOverHe " << hOverHe << " Hcal energy " << eneHcalGsf + * << " lateBrem " << lateBrem + * << " firstBrem " << firstBrem << endl; + */ + + float vars[] = { std::log(ptGsf), etaGsf, ptModeErrOverPtGsf, dPtOverPtGsf, chi2Gsf, + nHitKf, chi2Kf, eTotPinMode, eGsfPoutMode, eTotBremPinPoutMode, + dEtaGsfEcalClust, logSigmaEtaEta, hOverHe, lateBrem, firstBrem }; return hoc->gbrEle_->GetAdaBoostClassifier(vars); } @@ -2139,8 +2100,8 @@ calculate_ele_mva(const pfEGHelpers::HeavyObjectCache* hoc, return -2.0f; } -void PFEGammaAlgo::fill_extra_info( const ProtoEGObject& RO, - reco::PFCandidateEGammaExtra& xtra ) { +void PFEGammaAlgo::fillExtraInfo(const ProtoEGObject& RO, + reco::PFCandidateEGammaExtra& xtra ) { // add tracks associated to clusters that are not T_FROM_GAMMACONV // info about single-leg convs is already save, so just veto in loops auto KFbegin = _splayedblock[reco::PFBlockElement::TRACK].begin(); diff --git a/RecoParticleFlow/PFProducer/src/PFEGammaHeavyObjectCache.cc b/RecoParticleFlow/PFProducer/src/PFEGammaHeavyObjectCache.cc deleted file mode 100644 index 7c44fc5cb03f8..0000000000000 --- a/RecoParticleFlow/PFProducer/src/PFEGammaHeavyObjectCache.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include "RecoParticleFlow/PFProducer/interface/PFEGammaHeavyObjectCache.h" -#include "FWCore/ParameterSet/interface/FileInPath.h" -#include "TMVA/MethodBDT.h" -#include "TMVA/Reader.h" - -namespace pfEGHelpers { - HeavyObjectCache::HeavyObjectCache(const edm::ParameterSet& conf) { - { - const edm::FileInPath& wfile = conf.getParameter("pf_electronID_mvaWeightFile"); - // Set the tmva reader for electrons - TMVA::Reader tmvaReaderEle_("!Color:Silent"); - tmvaReaderEle_.AddVariable("lnPt_gsf",&lnPt_gsf); - tmvaReaderEle_.AddVariable("Eta_gsf",&Eta_gsf); - tmvaReaderEle_.AddVariable("dPtOverPt_gsf",&dPtOverPt_gsf); - tmvaReaderEle_.AddVariable("DPtOverPt_gsf",&DPtOverPt_gsf); - //tmvaReaderEle_.AddVariable("nhit_gsf",&nhit_gsf); - tmvaReaderEle_.AddVariable("chi2_gsf",&chi2_gsf); - //tmvaReaderEle_.AddVariable("DPtOverPt_kf",&DPtOverPt_kf); - tmvaReaderEle_.AddVariable("nhit_kf",&nhit_kf); - tmvaReaderEle_.AddVariable("chi2_kf",&chi2_kf); - tmvaReaderEle_.AddVariable("EtotPinMode",&EtotPinMode); - tmvaReaderEle_.AddVariable("EGsfPoutMode",&EGsfPoutMode); - tmvaReaderEle_.AddVariable("EtotBremPinPoutMode",&EtotBremPinPoutMode); - tmvaReaderEle_.AddVariable("DEtaGsfEcalClust",&DEtaGsfEcalClust); - tmvaReaderEle_.AddVariable("SigmaEtaEta",&SigmaEtaEta); - tmvaReaderEle_.AddVariable("HOverHE",&HOverHE); - // tmvaReaderEle_.AddVariable("HOverPin",&HOverPin); - tmvaReaderEle_.AddVariable("lateBrem",&lateBrem); - tmvaReaderEle_.AddVariable("firstBrem",&firstBrem); - tmvaReaderEle_.BookMVA("BDT", wfile.fullPath().c_str()); - gbrEle_.reset( new GBRForest( dynamic_cast( tmvaReaderEle_.FindMVA("BDT") ) ) ); - } - { - const edm::FileInPath& wfile = conf.getParameter("pf_convID_mvaWeightFile"); - //Book MVA (single leg) - TMVA::Reader tmvaReader_("!Color:Silent"); - tmvaReader_.AddVariable("del_phi",&del_phi); - tmvaReader_.AddVariable("nlayers", &nlayers); - tmvaReader_.AddVariable("chi2",&chi2); - tmvaReader_.AddVariable("EoverPt",&EoverPt); - tmvaReader_.AddVariable("HoverPt",&HoverPt); - tmvaReader_.AddVariable("track_pt", &track_pt); - tmvaReader_.AddVariable("STIP",&STIP); - tmvaReader_.AddVariable("nlost", &nlost); - tmvaReader_.BookMVA("BDT", wfile.fullPath().c_str()); - gbrSingleLeg_.reset( new GBRForest( dynamic_cast( tmvaReader_.FindMVA("BDT") ) ) ); - } - } -} diff --git a/RecoParticleFlow/PFTracking/BuildFile.xml b/RecoParticleFlow/PFTracking/BuildFile.xml index a3bcb8ac78842..542deac84b558 100644 --- a/RecoParticleFlow/PFTracking/BuildFile.xml +++ b/RecoParticleFlow/PFTracking/BuildFile.xml @@ -31,8 +31,8 @@ + - diff --git a/RecoParticleFlow/PFTracking/interface/ConvBremHeavyObjectCache.h b/RecoParticleFlow/PFTracking/interface/ConvBremHeavyObjectCache.h index d30ac9f5983bb..e7e00253d7c3b 100644 --- a/RecoParticleFlow/PFTracking/interface/ConvBremHeavyObjectCache.h +++ b/RecoParticleFlow/PFTracking/interface/ConvBremHeavyObjectCache.h @@ -15,10 +15,6 @@ namespace convbremhelpers { std::unique_ptr gbrEndcapsLowPt_; std::unique_ptr gbrEndcapsHighPt_; std::unique_ptr pfcalib_; - private: - std::unique_ptr setupMVA(const std::string&); - // for variable binding - float secR, sTIP, nHITS1, Epout, detaBremKF, ptRatioGsfKF; }; } diff --git a/RecoParticleFlow/PFTracking/plugins/GoodSeedProducer.cc b/RecoParticleFlow/PFTracking/plugins/GoodSeedProducer.cc index a774d99c0a1e5..2c062739dc9c6 100644 --- a/RecoParticleFlow/PFTracking/plugins/GoodSeedProducer.cc +++ b/RecoParticleFlow/PFTracking/plugins/GoodSeedProducer.cc @@ -26,6 +26,7 @@ #include "FastSimulation/BaseParticlePropagator/interface/BaseParticlePropagator.h" #include "MagneticField/Engine/interface/MagneticField.h" #include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" #include "DataFormats/Math/interface/deltaR.h" @@ -33,7 +34,6 @@ #include #include "TMath.h" #include "Math/VectorUtil.h" -#include "TMVA/MethodBDT.h" using namespace edm; using namespace std; @@ -490,7 +490,6 @@ namespace goodseedhelpers { const bool useTmva = conf.getUntrackedParameter("UseTMVA",false); if( useTmva ) { - const std::string method_ = conf.getParameter("TMVAMethod"); std::array weights = {{ edm::FileInPath(conf.getParameter("Weights1")), edm::FileInPath(conf.getParameter("Weights2")), edm::FileInPath(conf.getParameter("Weights3")), @@ -502,22 +501,7 @@ namespace goodseedhelpers { edm::FileInPath(conf.getParameter("Weights9")) }}; for(UInt_t j = 0; j < gbr.size(); ++j){ - TMVA::Reader reader("!Color:Silent"); - - reader.AddVariable("NHits", &nhit); - reader.AddVariable("NormChi", &chikfred); - reader.AddVariable("dPtGSF", &dpt); - reader.AddVariable("EoP", &eP); - reader.AddVariable("ChiRatio", &chiRatio); - reader.AddVariable("RedChi", &chired); - reader.AddVariable("EcalDEta", &trk_ecalDeta); - reader.AddVariable("EcalDPhi", &trk_ecalDphi); - reader.AddVariable("pt", &pt); - reader.AddVariable("eta", &eta); - - reader.BookMVA(method_, weights[j].fullPath().c_str()); - - gbr[j].reset( new GBRForest( dynamic_cast( reader.FindMVA(method_) ) ) ); + gbr[j] = createGBRForest( weights[j] ); } } } diff --git a/RecoParticleFlow/PFTracking/python/pfTrackElec_cfi.py b/RecoParticleFlow/PFTracking/python/pfTrackElec_cfi.py index 955a395837417..01819a3d3b325 100644 --- a/RecoParticleFlow/PFTracking/python/pfTrackElec_cfi.py +++ b/RecoParticleFlow/PFTracking/python/pfTrackElec_cfi.py @@ -35,10 +35,10 @@ pf_convBremFinderID_mvaCutBarrelHighPt = cms.double(0.97), pf_convBremFinderID_mvaCutEndcapsLowPt = cms.double(0.9), pf_convBremFinderID_mvaCutEndcapsHighPt = cms.double(0.995), - pf_convBremFinderID_mvaWeightFileBarrelLowPt = cms.string('RecoParticleFlow/PFTracking/data/TMVAClassification_ConvBremFinder_Testetlt20absetalt1_479_BDT.weights.xml'), - pf_convBremFinderID_mvaWeightFileBarrelHighPt = cms.string('RecoParticleFlow/PFTracking/data/TMVAClassification_ConvBremFinder_Testetgt20absetalt1_479_BDT.weights.xml'), - pf_convBremFinderID_mvaWeightFileEndcapsLowPt = cms.string('RecoParticleFlow/PFTracking/data/TMVAClassification_ConvBremFinder_Testetlt20absetagt1_479_BDT.weights.xml'), - pf_convBremFinderID_mvaWeightFileEndcapsHighPt = cms.string('RecoParticleFlow/PFTracking/data/TMVAClassification_ConvBremFinder_Testetgt20absetagt1_479_BDT.weights.xml') + pf_convBremFinderID_mvaWeightFileBarrelLowPt = cms.FileInPath('RecoParticleFlow/PFTracking/data/TMVAClassification_ConvBremFinder_Testetlt20absetalt1_479_BDT.weights.xml'), + pf_convBremFinderID_mvaWeightFileBarrelHighPt = cms.FileInPath('RecoParticleFlow/PFTracking/data/TMVAClassification_ConvBremFinder_Testetgt20absetalt1_479_BDT.weights.xml'), + pf_convBremFinderID_mvaWeightFileEndcapsLowPt = cms.FileInPath('RecoParticleFlow/PFTracking/data/TMVAClassification_ConvBremFinder_Testetlt20absetagt1_479_BDT.weights.xml'), + pf_convBremFinderID_mvaWeightFileEndcapsHighPt = cms.FileInPath('RecoParticleFlow/PFTracking/data/TMVAClassification_ConvBremFinder_Testetgt20absetagt1_479_BDT.weights.xml') ) diff --git a/RecoParticleFlow/PFTracking/src/ConvBremHeavyObjectCache.cc b/RecoParticleFlow/PFTracking/src/ConvBremHeavyObjectCache.cc index f3b6e82bb0fb5..58444620c3b49 100644 --- a/RecoParticleFlow/PFTracking/src/ConvBremHeavyObjectCache.cc +++ b/RecoParticleFlow/PFTracking/src/ConvBremHeavyObjectCache.cc @@ -1,51 +1,21 @@ +#include "CommonTools/MVAUtils/interface/GBRForestTools.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" #include "RecoParticleFlow/PFTracking/interface/ConvBremHeavyObjectCache.h" -#include "TMVA/Reader.h" -#include "TMVA/MethodBDT.h" - namespace convbremhelpers { HeavyObjectCache::HeavyObjectCache(const edm::ParameterSet& conf) { - pfcalib_.reset( new PFEnergyCalibration() ); + pfcalib_ = std::make_unique(); const bool useConvBremFinder_ = conf.getParameter("useConvBremFinder"); if(useConvBremFinder_) { - const std::string& mvaWeightFileConvBremBarrelLowPt = - conf.getParameter("pf_convBremFinderID_mvaWeightFileBarrelLowPt"); - const std::string mvaWeightFileConvBremBarrelHighPt = - conf.getParameter("pf_convBremFinderID_mvaWeightFileBarrelHighPt"); - const std::string mvaWeightFileConvBremEndcapsLowPt = - conf.getParameter("pf_convBremFinderID_mvaWeightFileEndcapsLowPt"); - const std::string mvaWeightFileConvBremEndcapsHighPt = - conf.getParameter("pf_convBremFinderID_mvaWeightFileEndcapsHighPt"); - - const std::string path_mvaWeightFileConvBremBarrelLowPt = - edm::FileInPath( mvaWeightFileConvBremBarrelLowPt.c_str() ).fullPath(); - const std::string path_mvaWeightFileConvBremBarrelHighPt = - edm::FileInPath( mvaWeightFileConvBremBarrelHighPt.c_str() ).fullPath(); - const std::string path_mvaWeightFileConvBremEndcapsLowPt = - edm::FileInPath( mvaWeightFileConvBremEndcapsLowPt.c_str() ).fullPath(); - const std::string path_mvaWeightFileConvBremEndcapsHighPt = - edm::FileInPath( mvaWeightFileConvBremEndcapsHighPt.c_str() ).fullPath(); - - gbrBarrelLowPt_ = setupMVA(path_mvaWeightFileConvBremBarrelLowPt); - gbrBarrelHighPt_ = setupMVA(path_mvaWeightFileConvBremBarrelHighPt); - gbrEndcapsLowPt_ = setupMVA(path_mvaWeightFileConvBremEndcapsLowPt); - gbrEndcapsHighPt_ = setupMVA(path_mvaWeightFileConvBremEndcapsHighPt); + + gbrBarrelLowPt_ = createGBRForest(conf.getParameter("pf_convBremFinderID_mvaWeightFileBarrelLowPt")); + gbrBarrelHighPt_ = createGBRForest(conf.getParameter("pf_convBremFinderID_mvaWeightFileBarrelHighPt")); + gbrEndcapsLowPt_ = createGBRForest(conf.getParameter("pf_convBremFinderID_mvaWeightFileEndcapsLowPt")); + gbrEndcapsHighPt_ = createGBRForest(conf.getParameter("pf_convBremFinderID_mvaWeightFileEndcapsHighPt")); } } - - std::unique_ptr HeavyObjectCache::setupMVA(const std::string& weights) { - TMVA::Reader reader("!Color:Silent"); - reader.AddVariable("kftrack_secR",&secR); - reader.AddVariable("kftrack_sTIP",&sTIP); - reader.AddVariable("kftrack_nHITS1",&nHITS1); - reader.AddVariable("kftrack_Epout",&Epout); - reader.AddVariable("kftrack_detaBremKF",&detaBremKF); - reader.AddVariable("kftrack_ptRatioGsfKF",&ptRatioGsfKF); - reader.BookMVA("BDT", weights.c_str()); - return std::unique_ptr( new GBRForest( dynamic_cast( reader.FindMVA("BDT") ) ) ); - } }