Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nano: fix array size branch type, support 16 bit ints, use more 8 or 16 bit integers #40478

Merged
merged 10 commits into from
Jan 16, 2023
58 changes: 35 additions & 23 deletions DataFormats/NanoAOD/interface/FlatTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,15 @@ namespace nanoaod {
class FlatTable {
public:
enum class ColumnType {
Float,
Int,
Int8,
UInt8,
Bool,
Int16,
UInt16,
Int32,
UInt32,
Bool,
Float,
Double,
Int8
}; // We could have other Float types with reduced mantissa, and similar

FlatTable() : size_(0) {}
Expand Down Expand Up @@ -138,18 +140,22 @@ namespace nanoaod {
struct dependent_false : std::false_type {};
template <typename T>
static ColumnType defaultColumnType() {
if constexpr (std::is_same<T, float>())
return ColumnType::Float;
else if constexpr (std::is_same<T, int>())
return ColumnType::Int;
if constexpr (std::is_same<T, int8_t>())
return ColumnType::Int8;
else if constexpr (std::is_same<T, uint8_t>())
return ColumnType::UInt8;
else if constexpr (std::is_same<T, int8_t>())
return ColumnType::Int8;
else if constexpr (std::is_same<T, bool>())
return ColumnType::Bool;
else if constexpr (std::is_same<T, int16_t>())
return ColumnType::Int16;
else if constexpr (std::is_same<T, uint16_t>())
return ColumnType::UInt16;
else if constexpr (std::is_same<T, int32_t>())
return ColumnType::Int32;
else if constexpr (std::is_same<T, uint32_t>())
return ColumnType::UInt32;
else if constexpr (std::is_same<T, bool>())
return ColumnType::Bool;
else if constexpr (std::is_same<T, float>())
return ColumnType::Float;
else if constexpr (std::is_same<T, double>())
return ColumnType::Double;
else
Expand Down Expand Up @@ -188,18 +194,22 @@ namespace nanoaod {
template <typename T, class This>
static auto &bigVectorImpl(This &table) {
// helper function to avoid code duplication, for the two accessor functions that differ only in const-ness
if constexpr (std::is_same<T, float>())
return table.floats_;
else if constexpr (std::is_same<T, int>())
return table.ints_;
else if constexpr (std::is_same<T, uint8_t>())
return table.uint8s_;
else if constexpr (std::is_same<T, int8_t>())
if constexpr (std::is_same<T, int8_t>())
return table.int8s_;
else if constexpr (std::is_same<T, bool>())
else if constexpr (std::is_same<T, uint8_t>())
return table.uint8s_;
else if constexpr (std::is_same<T, int16_t>())
return table.int16s_;
else if constexpr (std::is_same<T, uint16_t>())
return table.uint16s_;
else if constexpr (std::is_same<T, int32_t>())
return table.int32s_;
else if constexpr (std::is_same<T, uint32_t>())
return table.uint32s_;
else if constexpr (std::is_same<T, bool>())
return table.uint8s_; // special case: bool stored as vector of uint8
else if constexpr (std::is_same<T, float>())
return table.floats_;
else if constexpr (std::is_same<T, double>())
return table.doubles_;
else
Expand All @@ -210,11 +220,13 @@ namespace nanoaod {
std::string name_, doc_;
bool singleton_, extension_;
std::vector<Column> columns_;
std::vector<float> floats_;
std::vector<int> ints_;
std::vector<uint8_t> uint8s_;
std::vector<int8_t> int8s_;
std::vector<uint8_t> uint8s_;
std::vector<int16_t> int16s_;
std::vector<uint16_t> uint16s_;
std::vector<int32_t> int32s_;
std::vector<uint32_t> uint32s_;
std::vector<float> floats_;
std::vector<double> doubles_;
};

Expand Down
42 changes: 26 additions & 16 deletions DataFormats/NanoAOD/src/FlatTable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,30 @@ void nanoaod::FlatTable::addExtension(const nanoaod::FlatTable& other) {
throw cms::Exception("LogicError", "Mismatch in adding extension");
for (unsigned int i = 0, n = other.nColumns(); i < n; ++i) {
switch (other.columnType(i)) {
case ColumnType::Float:
addColumn<float>(other.columnName(i), other.columnData<float>(i), other.columnDoc(i));
break;
case ColumnType::Int:
addColumn<int>(other.columnName(i), other.columnData<int>(i), other.columnDoc(i));
break;
case ColumnType::Int8:
addColumn<int8_t>(other.columnName(i), other.columnData<int>(i), other.columnDoc(i));
break;
case ColumnType::Bool:
addColumn<bool>(other.columnName(i), other.columnData<bool>(i), other.columnDoc(i));
addColumn<int8_t>(other.columnName(i), other.columnData<int8_t>(i), other.columnDoc(i));
break;
case ColumnType::UInt8:
addColumn<uint8_t>(other.columnName(i), other.columnData<uint8_t>(i), other.columnDoc(i));
break;
case ColumnType::Int16:
addColumn<int16_t>(other.columnName(i), other.columnData<int16_t>(i), other.columnDoc(i));
break;
case ColumnType::UInt16:
addColumn<uint16_t>(other.columnName(i), other.columnData<uint16_t>(i), other.columnDoc(i));
break;
case ColumnType::Int32:
addColumn<int32_t>(other.columnName(i), other.columnData<int32_t>(i), other.columnDoc(i));
break;
case ColumnType::UInt32:
addColumn<uint32_t>(other.columnName(i), other.columnData<uint32_t>(i), other.columnDoc(i));
break;
case ColumnType::Bool:
addColumn<bool>(other.columnName(i), other.columnData<bool>(i), other.columnDoc(i));
break;
case ColumnType::Float:
addColumn<float>(other.columnName(i), other.columnData<float>(i), other.columnDoc(i));
break;
case ColumnType::Double:
addColumn<double>(other.columnName(i), other.columnData<double>(i), other.columnDoc(i));
break;
Expand All @@ -44,18 +50,22 @@ double nanoaod::FlatTable::getAnyValue(unsigned int row, unsigned int column) co
if (column >= nColumns())
throw cms::Exception("LogicError", "Invalid column");
switch (columnType(column)) {
case ColumnType::Float:
return *(beginData<float>(column) + row);
case ColumnType::Int:
return *(beginData<int>(column) + row);
case ColumnType::Int8:
return *(beginData<int8_t>(column) + row);
case ColumnType::Bool:
return *(beginData<bool>(column) + row);
case ColumnType::UInt8:
return *(beginData<uint8_t>(column) + row);
case ColumnType::Int16:
return *(beginData<int16_t>(column) + row);
case ColumnType::UInt16:
return *(beginData<uint16_t>(column) + row);
case ColumnType::Int32:
return *(beginData<int32_t>(column) + row);
case ColumnType::UInt32:
return *(beginData<uint32_t>(column) + row);
case ColumnType::Bool:
return *(beginData<bool>(column) + row);
case ColumnType::Float:
return *(beginData<float>(column) + row);
case ColumnType::Double:
return *(beginData<double>(column) + row);
}
Expand Down
3 changes: 2 additions & 1 deletion DataFormats/NanoAOD/src/classes_def.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
<version ClassVersion="3" checksum="3066258528"/>
</class>
<class name="std::vector<nanoaod::FlatTable::Column>" />
<class name="nanoaod::FlatTable" ClassVersion="5">
<class name="nanoaod::FlatTable" ClassVersion="6">
<version ClassVersion="6" checksum="70963850"/>
<version ClassVersion="5" checksum="4251670483"/>
<version ClassVersion="4" checksum="656493391"/>
<version ClassVersion="3" checksum="2443023556"/>
Expand Down
41 changes: 32 additions & 9 deletions PhysicsTools/NanoAOD/interface/SimpleFlatTableProducer.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,16 @@ class SimpleFlatTableProducerBase : public edm::stream::EDProducer<> {
vars_.push_back(std::make_unique<UIntVar>(vname, varPSet));
else if (type == "float")
vars_.push_back(std::make_unique<FloatVar>(vname, varPSet));
else if (type == "double")
vars_.push_back(std::make_unique<DoubleVar>(vname, varPSet));
else if (type == "int8")
vars_.push_back(std::make_unique<Int8Var>(vname, varPSet));
else if (type == "uint8")
vars_.push_back(std::make_unique<UInt8Var>(vname, varPSet));
else if (type == "int16")
vars_.push_back(std::make_unique<Int16Var>(vname, varPSet));
else if (type == "uint16")
vars_.push_back(std::make_unique<UInt16Var>(vname, varPSet));
else if (type == "bool")
vars_.push_back(std::make_unique<BoolVar>(vname, varPSet));
else
Expand All @@ -159,9 +165,10 @@ class SimpleFlatTableProducerBase : public edm::stream::EDProducer<> {
edm::ParameterSetDescription variable;
variable.add<std::string>("expr")->setComment("a function to define the content of the branch in the flat table");
variable.add<std::string>("doc")->setComment("few words description of the branch content");
variable.ifValue(edm::ParameterDescription<std::string>(
"type", "int", true, edm::Comment("the c++ type of the branch in the flat table")),
edm::allowedValues<std::string>("int", "unit", "float", "int8", "uint8", "bool"));
variable.ifValue(
edm::ParameterDescription<std::string>(
"type", "int", true, edm::Comment("the c++ type of the branch in the flat table")),
edm::allowedValues<std::string>("int", "uint", "float", "double", "int8", "uint8", "int16", "uint16", "bool"));
variable.addOptionalNode(
edm::ParameterDescription<int>(
"precision", true, edm::Comment("the precision with which to store the value in the flat table")) xor
Expand Down Expand Up @@ -198,11 +205,14 @@ class SimpleFlatTableProducerBase : public edm::stream::EDProducer<> {
const bool skipNonExistingSrc_;
const edm::EDGetTokenT<TProd> src_;

typedef FuncVariable<T, StringObjectFunction<T>, int> IntVar;
typedef FuncVariable<T, StringObjectFunction<T>, unsigned int> UIntVar;
typedef FuncVariable<T, StringObjectFunction<T>, int32_t> IntVar;
typedef FuncVariable<T, StringObjectFunction<T>, uint32_t> UIntVar;
typedef FuncVariable<T, StringObjectFunction<T>, float> FloatVar;
typedef FuncVariable<T, StringObjectFunction<T>, double> DoubleVar;
typedef FuncVariable<T, StringObjectFunction<T>, int8_t> Int8Var;
typedef FuncVariable<T, StringObjectFunction<T>, uint8_t> UInt8Var;
typedef FuncVariable<T, StringObjectFunction<T>, int16_t> Int16Var;
typedef FuncVariable<T, StringObjectFunction<T>, uint16_t> UInt16Var;
typedef FuncVariable<T, StringCutObjectSelector<T>, bool> BoolVar;
std::vector<std::unique_ptr<Variable<T>>> vars_;
};
Expand All @@ -224,6 +234,9 @@ class SimpleFlatTableProducer : public SimpleFlatTableProducerBase<T, edm::View<
if (type == "int")
extvars_.push_back(
std::make_unique<IntExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
else if (type == "uint")
extvars_.push_back(
std::make_unique<UIntExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
else if (type == "float")
extvars_.push_back(
std::make_unique<FloatExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
Expand All @@ -236,6 +249,12 @@ class SimpleFlatTableProducer : public SimpleFlatTableProducerBase<T, edm::View<
else if (type == "uint8")
extvars_.push_back(
std::make_unique<UInt8ExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
else if (type == "int16")
extvars_.push_back(
std::make_unique<Int16ExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
else if (type == "uint16")
extvars_.push_back(
std::make_unique<UInt16ExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
else if (type == "bool")
extvars_.push_back(
std::make_unique<BoolExtVar>(vname, varPSet, this->consumesCollector(), this->skipNonExistingSrc_));
Expand All @@ -261,9 +280,10 @@ class SimpleFlatTableProducer : public SimpleFlatTableProducerBase<T, edm::View<
edm::ParameterSetDescription extvariable;
extvariable.add<edm::InputTag>("src")->setComment("valuemap input collection to fill the flat table");
extvariable.add<std::string>("doc")->setComment("few words description of the branch content");
extvariable.ifValue(edm::ParameterDescription<std::string>(
"type", "int", true, edm::Comment("the c++ type of the branch in the flat table")),
edm::allowedValues<std::string>("int", "unit", "float", "int8", "uint8", "bool"));
extvariable.ifValue(
edm::ParameterDescription<std::string>(
"type", "int", true, edm::Comment("the c++ type of the branch in the flat table")),
edm::allowedValues<std::string>("int", "uint", "float", "double", "int8", "uint8", "int16", "uint16", "bool"));
extvariable.addOptionalNode(
edm::ParameterDescription<int>(
"precision", true, edm::Comment("the precision with which to store the value in the flat table")) xor
Expand Down Expand Up @@ -316,12 +336,15 @@ class SimpleFlatTableProducer : public SimpleFlatTableProducerBase<T, edm::View<
const unsigned int maxLen_;
const StringCutObjectSelector<T> cut_;

typedef ValueMapVariable<T, int> IntExtVar;
typedef ValueMapVariable<T, int32_t> IntExtVar;
typedef ValueMapVariable<T, uint32_t> UIntExtVar;
typedef ValueMapVariable<T, float> FloatExtVar;
typedef ValueMapVariable<T, double, float> DoubleExtVar;
typedef ValueMapVariable<T, bool> BoolExtVar;
typedef ValueMapVariable<T, int, int8_t> Int8ExtVar;
typedef ValueMapVariable<T, int, uint8_t> UInt8ExtVar;
typedef ValueMapVariable<T, int, int16_t> Int16ExtVar;
typedef ValueMapVariable<T, int, uint16_t> UInt16ExtVar;
std::vector<std::unique_ptr<ExtVariable<T>>> extvars_;
};

Expand Down
5 changes: 3 additions & 2 deletions PhysicsTools/NanoAOD/plugins/CandMCMatchTableProducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ class CandMCMatchTableProducer : public edm::global::EDProducer<> {
iEvent.getByToken(genPartsToken_, genParts);
}

std::vector<int> key(ncand, -1), flav(ncand, 0);
std::vector<int16_t> key(ncand, -1);
std::vector<uint8_t> flav(ncand, 0);
for (unsigned int i = 0; i < ncand; ++i) {
//std::cout << "cand #" << i << ": pT = " << cands->ptrAt(i)->pt() << ", eta = " << cands->ptrAt(i)->eta() << ", phi = " << cands->ptrAt(i)->phi() << std::endl;
const auto& cand = candProd.ptrAt(i);
Expand Down Expand Up @@ -192,7 +193,7 @@ class CandMCMatchTableProducer : public edm::global::EDProducer<> {
};
}

tab->addColumn<int>(branchName_ + "Idx", key, "Index into genParticle list for " + doc_);
tab->addColumn<int16_t>(branchName_ + "Idx", key, "Index into genParticle list for " + doc_);
tab->addColumn<uint8_t>(branchName_ + "Flav",
flav,
"Flavour of genParticle (DressedLeptons for electrons) for " + doc_ + ": " + flavDoc_);
Expand Down
4 changes: 2 additions & 2 deletions PhysicsTools/NanoAOD/plugins/GenJetFlavourTableProducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void GenJetFlavourTableProducer::produce(edm::Event& iEvent, const edm::EventSet
const auto& jetFlavourInfosProd = iEvent.get(jetFlavourInfosToken_);

unsigned int ncand = 0;
std::vector<int> partonFlavour;
std::vector<int16_t> partonFlavour;
std::vector<uint8_t> hadronFlavour;

for (const reco::GenJet& jet : jetsProd) {
Expand All @@ -84,7 +84,7 @@ void GenJetFlavourTableProducer::produce(edm::Event& iEvent, const edm::EventSet
}

auto tab = std::make_unique<nanoaod::FlatTable>(ncand, name_, false, true);
tab->addColumn<int>("partonFlavour", partonFlavour, "flavour from parton matching");
tab->addColumn<int16_t>("partonFlavour", partonFlavour, "flavour from parton matching");
tab->addColumn<uint8_t>("hadronFlavour", hadronFlavour, "flavour from hadron ghost clustering");

iEvent.put(std::move(tab));
Expand Down
15 changes: 11 additions & 4 deletions PhysicsTools/NanoAOD/plugins/LumiOutputBranches.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ void LumiOutputBranches::defineBranchesFromFirstEvent(const nanoaod::FlatTable &
case nanoaod::FlatTable::ColumnType::Float:
m_floatBranches.emplace_back(var, tab.columnDoc(i), "F");
break;
case nanoaod::FlatTable::ColumnType::Int:
case nanoaod::FlatTable::ColumnType::Int32:
m_intBranches.emplace_back(var, tab.columnDoc(i), "I");
break;
case nanoaod::FlatTable::ColumnType::UInt8:
Expand Down Expand Up @@ -44,7 +44,7 @@ void LumiOutputBranches::branch(TTree &tree) {
if (tree.FindBranch(("n" + m_baseName).c_str()) != nullptr) {
throw cms::Exception("LogicError", "Trying to save multiple main tables for " + m_baseName + "\n");
}
m_counterBranch = tree.Branch(("n" + m_baseName).c_str(), &m_counter, ("n" + m_baseName + "/i").c_str());
m_counterBranch = tree.Branch(("n" + m_baseName).c_str(), &m_counter, ("n" + m_baseName + "/I").c_str());
m_counterBranch->SetTitle(m_doc.c_str());
}
}
Expand All @@ -68,7 +68,14 @@ void LumiOutputBranches::fill(const edm::LuminosityBlockForOutput &iLumi, TTree
edm::Handle<nanoaod::FlatTable> handle;
iLumi.getByToken(m_token, handle);
const nanoaod::FlatTable &tab = *handle;
m_counter = tab.size();
auto size = tab.size();
// ROOT native array size branches may only be signed integers,
// until this is changed we need to make sure the vector sizes do not exceed that
if (size > std::numeric_limits<CounterType>::max()) {
throw cms::Exception("Table " + tab.name() + " size is " + std::to_string(size) +
", is too large for ROOT native array branch");
}
m_counter = size;
m_singleton = tab.singleton();
if (!m_branchesBooked) {
m_extension = tab.extension() ? IsExtension : IsMain;
Expand All @@ -80,7 +87,7 @@ void LumiOutputBranches::fill(const edm::LuminosityBlockForOutput &iLumi, TTree
branch(tree);
}
if (!m_singleton && m_extension == IsExtension) {
if (m_counter != *reinterpret_cast<UInt_t *>(m_counterBranch->GetAddress())) {
if (m_counter != *reinterpret_cast<CounterType *>(m_counterBranch->GetAddress())) {
throw cms::Exception("LogicError",
"Mismatch in number of entries between extension and main table for " + tab.name());
}
Expand Down
3 changes: 2 additions & 1 deletion PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ class LumiOutputBranches {
bool m_singleton;
enum { IsMain = 0, IsExtension = 1, DontKnowYetIfMainOrExtension = 2 } m_extension;
std::string m_doc;
UInt_t m_counter;
typedef Int_t CounterType;
CounterType m_counter;
struct NamedBranchPtr {
std::string name, title, rootTypeCode;
TBranch *branch;
Expand Down
Loading