Merge pull request #40478 from swertz/fixSizeType_from-CMSSW_13_0_0_pre2

Nano: fix array size branch type, support 16 bit ints, use more 8 or 16 bit integers
cms-sw · Jan 16, 2023 · d570bd3 · d570bd3
2 parents c215a0d + cc491f1
commit d570bd3
Show file tree

Hide file tree

Showing 28 changed files with 266 additions and 170 deletions.
diff --git a/DataFormats/NanoAOD/interface/FlatTable.h b/DataFormats/NanoAOD/interface/FlatTable.h
@@ -38,13 +38,15 @@ namespace nanoaod {
   class FlatTable {
   public:
     enum class ColumnType {
-      Float,
-      Int,
+      Int8,
       UInt8,
-      Bool,
+      Int16,
+      UInt16,
+      Int32,
       UInt32,
+      Bool,
+      Float,
       Double,
-      Int8
     };  // We could have other Float types with reduced mantissa, and similar
 
     FlatTable() : size_(0) {}
@@ -138,18 +140,22 @@ namespace nanoaod {
     struct dependent_false : std::false_type {};
     template <typename T>
     static ColumnType defaultColumnType() {
-      if constexpr (std::is_same<T, float>())
-        return ColumnType::Float;
-      else if constexpr (std::is_same<T, int>())
-        return ColumnType::Int;
+      if constexpr (std::is_same<T, int8_t>())
+        return ColumnType::Int8;
       else if constexpr (std::is_same<T, uint8_t>())
         return ColumnType::UInt8;
-      else if constexpr (std::is_same<T, int8_t>())
-        return ColumnType::Int8;
-      else if constexpr (std::is_same<T, bool>())
-        return ColumnType::Bool;
+      else if constexpr (std::is_same<T, int16_t>())
+        return ColumnType::Int16;
+      else if constexpr (std::is_same<T, uint16_t>())
+        return ColumnType::UInt16;
+      else if constexpr (std::is_same<T, int32_t>())
+        return ColumnType::Int32;
       else if constexpr (std::is_same<T, uint32_t>())
         return ColumnType::UInt32;
+      else if constexpr (std::is_same<T, bool>())
+        return ColumnType::Bool;
+      else if constexpr (std::is_same<T, float>())
+        return ColumnType::Float;
       else if constexpr (std::is_same<T, double>())
         return ColumnType::Double;
       else
@@ -188,18 +194,22 @@ namespace nanoaod {
     template <typename T, class This>
     static auto &bigVectorImpl(This &table) {
       // helper function to avoid code duplication, for the two accessor functions that differ only in const-ness
-      if constexpr (std::is_same<T, float>())
-        return table.floats_;
-      else if constexpr (std::is_same<T, int>())
-        return table.ints_;
-      else if constexpr (std::is_same<T, uint8_t>())
-        return table.uint8s_;
-      else if constexpr (std::is_same<T, int8_t>())
+      if constexpr (std::is_same<T, int8_t>())
         return table.int8s_;
-      else if constexpr (std::is_same<T, bool>())
+      else if constexpr (std::is_same<T, uint8_t>())
         return table.uint8s_;
+      else if constexpr (std::is_same<T, int16_t>())
+        return table.int16s_;
+      else if constexpr (std::is_same<T, uint16_t>())
+        return table.uint16s_;
+      else if constexpr (std::is_same<T, int32_t>())
+        return table.int32s_;
       else if constexpr (std::is_same<T, uint32_t>())
         return table.uint32s_;
+      else if constexpr (std::is_same<T, bool>())
+        return table.uint8s_;  // special case: bool stored as vector of uint8
+      else if constexpr (std::is_same<T, float>())
+        return table.floats_;
       else if constexpr (std::is_same<T, double>())
         return table.doubles_;
       else
@@ -210,11 +220,13 @@ namespace nanoaod {
     std::string name_, doc_;
     bool singleton_, extension_;
     std::vector<Column> columns_;
-    std::vector<float> floats_;
-    std::vector<int> ints_;
-    std::vector<uint8_t> uint8s_;
     std::vector<int8_t> int8s_;
+    std::vector<uint8_t> uint8s_;
+    std::vector<int16_t> int16s_;
+    std::vector<uint16_t> uint16s_;
+    std::vector<int32_t> int32s_;
     std::vector<uint32_t> uint32s_;
+    std::vector<float> floats_;
     std::vector<double> doubles_;
   };
 

diff --git a/DataFormats/NanoAOD/src/FlatTable.cc b/DataFormats/NanoAOD/src/FlatTable.cc
@@ -13,24 +13,30 @@ void nanoaod::FlatTable::addExtension(const nanoaod::FlatTable& other) {
     throw cms::Exception("LogicError", "Mismatch in adding extension");
   for (unsigned int i = 0, n = other.nColumns(); i < n; ++i) {
     switch (other.columnType(i)) {
-      case ColumnType::Float:
-        addColumn<float>(other.columnName(i), other.columnData<float>(i), other.columnDoc(i));
-        break;
-      case ColumnType::Int:
-        addColumn<int>(other.columnName(i), other.columnData<int>(i), other.columnDoc(i));
-        break;
       case ColumnType::Int8:
-        addColumn<int8_t>(other.columnName(i), other.columnData<int>(i), other.columnDoc(i));
-        break;
-      case ColumnType::Bool:
-        addColumn<bool>(other.columnName(i), other.columnData<bool>(i), other.columnDoc(i));
+        addColumn<int8_t>(other.columnName(i), other.columnData<int8_t>(i), other.columnDoc(i));
         break;
       case ColumnType::UInt8:
         addColumn<uint8_t>(other.columnName(i), other.columnData<uint8_t>(i), other.columnDoc(i));
         break;
+      case ColumnType::Int16:
+        addColumn<int16_t>(other.columnName(i), other.columnData<int16_t>(i), other.columnDoc(i));
+        break;
+      case ColumnType::UInt16:
+        addColumn<uint16_t>(other.columnName(i), other.columnData<uint16_t>(i), other.columnDoc(i));
+        break;
+      case ColumnType::Int32:
+        addColumn<int32_t>(other.columnName(i), other.columnData<int32_t>(i), other.columnDoc(i));
+        break;
       case ColumnType::UInt32:
         addColumn<uint32_t>(other.columnName(i), other.columnData<uint32_t>(i), other.columnDoc(i));
         break;
+      case ColumnType::Bool:
+        addColumn<bool>(other.columnName(i), other.columnData<bool>(i), other.columnDoc(i));
+        break;
+      case ColumnType::Float:
+        addColumn<float>(other.columnName(i), other.columnData<float>(i), other.columnDoc(i));
+        break;
       case ColumnType::Double:
         addColumn<double>(other.columnName(i), other.columnData<double>(i), other.columnDoc(i));
         break;
@@ -44,18 +50,22 @@ double nanoaod::FlatTable::getAnyValue(unsigned int row, unsigned int column) co
   if (column >= nColumns())
     throw cms::Exception("LogicError", "Invalid column");
   switch (columnType(column)) {
-    case ColumnType::Float:
-      return *(beginData<float>(column) + row);
-    case ColumnType::Int:
-      return *(beginData<int>(column) + row);
     case ColumnType::Int8:
       return *(beginData<int8_t>(column) + row);
-    case ColumnType::Bool:
-      return *(beginData<bool>(column) + row);
     case ColumnType::UInt8:
       return *(beginData<uint8_t>(column) + row);
+    case ColumnType::Int16:
+      return *(beginData<int16_t>(column) + row);
+    case ColumnType::UInt16:
+      return *(beginData<uint16_t>(column) + row);
+    case ColumnType::Int32:
+      return *(beginData<int32_t>(column) + row);
     case ColumnType::UInt32:
       return *(beginData<uint32_t>(column) + row);
+    case ColumnType::Bool:
+      return *(beginData<bool>(column) + row);
+    case ColumnType::Float:
+      return *(beginData<float>(column) + row);
     case ColumnType::Double:
       return *(beginData<double>(column) + row);
   }

diff --git a/DataFormats/NanoAOD/src/classes_def.xml b/DataFormats/NanoAOD/src/classes_def.xml
@@ -3,7 +3,8 @@
         <version ClassVersion="3" checksum="3066258528"/>
     </class>
     <class name="std::vector<nanoaod::FlatTable::Column>" />
-    <class name="nanoaod::FlatTable" ClassVersion="5">
+    <class name="nanoaod::FlatTable" ClassVersion="6">
+     <version ClassVersion="6" checksum="70963850"/>
         <version ClassVersion="5" checksum="4251670483"/>
         <version ClassVersion="4" checksum="656493391"/>
         <version ClassVersion="3" checksum="2443023556"/>

diff --git a/PhysicsTools/NanoAOD/interface/SimpleFlatTableProducer.h b/PhysicsTools/NanoAOD/interface/SimpleFlatTableProducer.h
@@ -132,10 +132,16 @@ class SimpleFlatTableProducerBase : public edm::stream::EDProducer<> {
         vars_.push_back(std::make_unique<UIntVar>(vname, varPSet));
       else if (type == "float")
         vars_.push_back(std::make_unique<FloatVar>(vname, varPSet));
+      else if (type == "double")
+        vars_.push_back(std::make_unique<DoubleVar>(vname, varPSet));
       else if (type == "int8")
         vars_.push_back(std::make_unique<Int8Var>(vname, varPSet));
       else if (type == "uint8")
         vars_.push_back(std::make_unique<UInt8Var>(vname, varPSet));
+      else if (type == "int16")
+        vars_.push_back(std::make_unique<Int16Var>(vname, varPSet));
+      else if (type == "uint16")
+        vars_.push_back(std::make_unique<UInt16Var>(vname, varPSet));
       else if (type == "bool")
         vars_.push_back(std::make_unique<BoolVar>(vname, varPSet));
       else
@@ -160,9 +166,10 @@ class SimpleFlatTableProducerBase : public edm::stream::EDProducer<> {
     edm::ParameterSetDescription variable;
     variable.add<std::string>("expr")->setComment("a function to define the content of the branch in the flat table");
     variable.add<std::string>("doc")->setComment("few words description of the branch content");
-    variable.ifValue(edm::ParameterDescription<std::string>(
-                         "type", "int", true, edm::Comment("the c++ type of the branch in the flat table")),
-                     edm::allowedValues<std::string>("int", "unit", "float", "int8", "uint8", "bool"));
+    variable.ifValue(
+        edm::ParameterDescription<std::string>(
+            "type", "int", true, edm::Comment("the c++ type of the branch in the flat table")),
+        edm::allowedValues<std::string>("int", "uint", "float", "double", "int8", "uint8", "int16", "uint16", "bool"));
     variable.addOptionalNode(
         edm::ParameterDescription<int>(
             "precision", true, edm::Comment("the precision with which to store the value in the flat table")) xor
@@ -199,11 +206,14 @@ class SimpleFlatTableProducerBase : public edm::stream::EDProducer<> {
   const bool skipNonExistingSrc_;
   const edm::EDGetTokenT<TProd> src_;
 
-  typedef FuncVariable<T, StringObjectFunction<T>, int> IntVar;
-  typedef FuncVariable<T, StringObjectFunction<T>, unsigned int> UIntVar;
+  typedef FuncVariable<T, StringObjectFunction<T>, int32_t> IntVar;
+  typedef FuncVariable<T, StringObjectFunction<T>, uint32_t> UIntVar;
   typedef FuncVariable<T, StringObjectFunction<T>, float> FloatVar;
+  typedef FuncVariable<T, StringObjectFunction<T>, double> DoubleVar;
   typedef FuncVariable<T, StringObjectFunction<T>, int8_t> Int8Var;
   typedef FuncVariable<T, StringObjectFunction<T>, uint8_t> UInt8Var;
+  typedef FuncVariable<T, StringObjectFunction<T>, int16_t> Int16Var;
+  typedef FuncVariable<T, StringObjectFunction<T>, uint16_t> UInt16Var;
   typedef FuncVariable<T, StringCutObjectSelector<T>, bool> BoolVar;
   std::vector<std::unique_ptr<Variable<T>>> vars_;
 };
@@ -225,6 +235,9 @@ class SimpleFlatTableProducer : public SimpleFlatTableProducerBase<T, edm::View<
         if (type == "int")
           extvars_.push_back(
               std::make_unique<IntExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
+        else if (type == "uint")
+          extvars_.push_back(
+              std::make_unique<UIntExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
         else if (type == "float")
           extvars_.push_back(
               std::make_unique<FloatExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
@@ -237,6 +250,12 @@ class SimpleFlatTableProducer : public SimpleFlatTableProducerBase<T, edm::View<
         else if (type == "uint8")
           extvars_.push_back(
               std::make_unique<UInt8ExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
+        else if (type == "int16")
+          extvars_.push_back(
+              std::make_unique<Int16ExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
+        else if (type == "uint16")
+          extvars_.push_back(
+              std::make_unique<UInt16ExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
         else if (type == "bool")
           extvars_.push_back(
               std::make_unique<BoolExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
@@ -262,9 +281,10 @@ class SimpleFlatTableProducer : public SimpleFlatTableProducerBase<T, edm::View<
     edm::ParameterSetDescription extvariable;
     extvariable.add<edm::InputTag>("src")->setComment("valuemap input collection to fill the flat table");
     extvariable.add<std::string>("doc")->setComment("few words description of the branch content");
-    extvariable.ifValue(edm::ParameterDescription<std::string>(
-                            "type", "int", true, edm::Comment("the c++ type of the branch in the flat table")),
-                        edm::allowedValues<std::string>("int", "unit", "float", "int8", "uint8", "bool"));
+    extvariable.ifValue(
+        edm::ParameterDescription<std::string>(
+            "type", "int", true, edm::Comment("the c++ type of the branch in the flat table")),
+        edm::allowedValues<std::string>("int", "uint", "float", "double", "int8", "uint8", "int16", "uint16", "bool"));
     extvariable.addOptionalNode(
         edm::ParameterDescription<int>(
             "precision", true, edm::Comment("the precision with which to store the value in the flat table")) xor
@@ -317,12 +337,15 @@ class SimpleFlatTableProducer : public SimpleFlatTableProducerBase<T, edm::View<
   const unsigned int maxLen_;
   const StringCutObjectSelector<T> cut_;
 
-  typedef ValueMapVariable<T, int> IntExtVar;
+  typedef ValueMapVariable<T, int32_t> IntExtVar;
+  typedef ValueMapVariable<T, uint32_t> UIntExtVar;
   typedef ValueMapVariable<T, float> FloatExtVar;
   typedef ValueMapVariable<T, double, float> DoubleExtVar;
   typedef ValueMapVariable<T, bool> BoolExtVar;
   typedef ValueMapVariable<T, int, int8_t> Int8ExtVar;
   typedef ValueMapVariable<T, int, uint8_t> UInt8ExtVar;
+  typedef ValueMapVariable<T, int, int16_t> Int16ExtVar;
+  typedef ValueMapVariable<T, int, uint16_t> UInt16ExtVar;
   std::vector<std::unique_ptr<ExtVariable<T>>> extvars_;
 };
 

diff --git a/PhysicsTools/NanoAOD/plugins/CandMCMatchTableProducer.cc b/PhysicsTools/NanoAOD/plugins/CandMCMatchTableProducer.cc
@@ -98,7 +98,8 @@ class CandMCMatchTableProducer : public edm::global::EDProducer<> {
       iEvent.getByToken(genPartsToken_, genParts);
     }
 
-    std::vector<int> key(ncand, -1), flav(ncand, 0);
+    std::vector<int16_t> key(ncand, -1);
+    std::vector<uint8_t> flav(ncand, 0);
     for (unsigned int i = 0; i < ncand; ++i) {
       //std::cout << "cand #" << i << ": pT = " << cands->ptrAt(i)->pt() << ", eta = " << cands->ptrAt(i)->eta() << ", phi = " << cands->ptrAt(i)->phi() << std::endl;
       const auto& cand = candProd.ptrAt(i);
@@ -192,7 +193,7 @@ class CandMCMatchTableProducer : public edm::global::EDProducer<> {
       };
     }
 
-    tab->addColumn<int>(branchName_ + "Idx", key, "Index into genParticle list for " + doc_);
+    tab->addColumn<int16_t>(branchName_ + "Idx", key, "Index into genParticle list for " + doc_);
     tab->addColumn<uint8_t>(branchName_ + "Flav",
                             flav,
                             "Flavour of genParticle (DressedLeptons for electrons) for " + doc_ + ": " + flavDoc_);

diff --git a/PhysicsTools/NanoAOD/plugins/GenJetFlavourTableProducer.cc b/PhysicsTools/NanoAOD/plugins/GenJetFlavourTableProducer.cc
@@ -61,7 +61,7 @@ void GenJetFlavourTableProducer::produce(edm::Event& iEvent, const edm::EventSet
   const auto& jetFlavourInfosProd = iEvent.get(jetFlavourInfosToken_);
 
   unsigned int ncand = 0;
-  std::vector<int> partonFlavour;
+  std::vector<int16_t> partonFlavour;
   std::vector<uint8_t> hadronFlavour;
 
   for (const reco::GenJet& jet : jetsProd) {
@@ -84,7 +84,7 @@ void GenJetFlavourTableProducer::produce(edm::Event& iEvent, const edm::EventSet
   }
 
   auto tab = std::make_unique<nanoaod::FlatTable>(ncand, name_, false, true);
-  tab->addColumn<int>("partonFlavour", partonFlavour, "flavour from parton matching");
+  tab->addColumn<int16_t>("partonFlavour", partonFlavour, "flavour from parton matching");
   tab->addColumn<uint8_t>("hadronFlavour", hadronFlavour, "flavour from hadron ghost clustering");
 
   iEvent.put(std::move(tab));

diff --git a/PhysicsTools/NanoAOD/plugins/LumiOutputBranches.cc b/PhysicsTools/NanoAOD/plugins/LumiOutputBranches.cc
@@ -16,7 +16,7 @@ void LumiOutputBranches::defineBranchesFromFirstEvent(const nanoaod::FlatTable &
       case nanoaod::FlatTable::ColumnType::Float:
         m_floatBranches.emplace_back(var, tab.columnDoc(i), "F");
         break;
-      case nanoaod::FlatTable::ColumnType::Int:
+      case nanoaod::FlatTable::ColumnType::Int32:
         m_intBranches.emplace_back(var, tab.columnDoc(i), "I");
         break;
       case nanoaod::FlatTable::ColumnType::UInt8:
@@ -44,7 +44,7 @@ void LumiOutputBranches::branch(TTree &tree) {
       if (tree.FindBranch(("n" + m_baseName).c_str()) != nullptr) {
         throw cms::Exception("LogicError", "Trying to save multiple main tables for " + m_baseName + "\n");
       }
-      m_counterBranch = tree.Branch(("n" + m_baseName).c_str(), &m_counter, ("n" + m_baseName + "/i").c_str());
+      m_counterBranch = tree.Branch(("n" + m_baseName).c_str(), &m_counter, ("n" + m_baseName + "/I").c_str());
       m_counterBranch->SetTitle(m_doc.c_str());
     }
   }
@@ -68,7 +68,14 @@ void LumiOutputBranches::fill(const edm::LuminosityBlockForOutput &iLumi, TTree
   edm::Handle<nanoaod::FlatTable> handle;
   iLumi.getByToken(m_token, handle);
   const nanoaod::FlatTable &tab = *handle;
-  m_counter = tab.size();
+  auto size = tab.size();
+  // ROOT native array size branches may only be signed integers,
+  // until this is changed we need to make sure the vector sizes do not exceed that
+  if (size > std::numeric_limits<CounterType>::max()) {
+    throw cms::Exception("Table " + tab.name() + " size is " + std::to_string(size) +
+                         ", is too large for ROOT native array branch");
+  }
+  m_counter = size;
   m_singleton = tab.singleton();
   if (!m_branchesBooked) {
     m_extension = tab.extension() ? IsExtension : IsMain;
@@ -80,7 +87,7 @@ void LumiOutputBranches::fill(const edm::LuminosityBlockForOutput &iLumi, TTree
     branch(tree);
   }
   if (!m_singleton && m_extension == IsExtension) {
-    if (m_counter != *reinterpret_cast<UInt_t *>(m_counterBranch->GetAddress())) {
+    if (m_counter != *reinterpret_cast<CounterType *>(m_counterBranch->GetAddress())) {
       throw cms::Exception("LogicError",
                            "Mismatch in number of entries between extension and main table for " + tab.name());
     }

diff --git a/PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h b/PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h
@@ -35,7 +35,8 @@ class LumiOutputBranches {
   bool m_singleton;
   enum { IsMain = 0, IsExtension = 1, DontKnowYetIfMainOrExtension = 2 } m_extension;
   std::string m_doc;
-  UInt_t m_counter;
+  typedef Int_t CounterType;
+  CounterType m_counter;
   struct NamedBranchPtr {
     std::string name, title, rootTypeCode;
     TBranch *branch;