Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ntuple] Add a common RNTupleOpenSpec #16653

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 10 additions & 19 deletions tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,6 @@
namespace ROOT {
namespace Experimental {

/// Helper type representing the name and storage location of an RNTuple.
struct RNTupleSourceSpec {
std::string fName;
std::string fLocation;

RNTupleSourceSpec() = default;
RNTupleSourceSpec(std::string_view n, std::string_view s) : fName(n), fLocation(s) {}
};

// clang-format off
/**
\class ROOT::Experimental::RNTupleProcessor
Expand All @@ -52,17 +43,17 @@ Example usage (see ntpl012_processor.C for a full example):
~~~{.cpp}
#include <ROOT/RNTupleProcessor.hxx>
using ROOT::Experimental::RNTupleProcessor;
using ROOT::Experimental::RNTupleSourceSpec;
using ROOT::Experimental::RNTupleOpenSpec;

std::vector<RNTupleSourceSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
auto processor = RNTupleProcessor::CreateChain(ntuples);

for (const auto &entry : processor) {
std::cout << "pt = " << *entry.GetPtr<float>("pt") << std::endl;
}
~~~

An RNTupleProcessor is created by providing one or more RNTupleSourceSpecs, each of which contains the name and storage
An RNTupleProcessor is created by providing one or more RNTupleOpenSpecs, each of which contains the name and storage
location of a single RNTuple. The RNTuples are processed in the order in which they were provided.

The RNTupleProcessor constructor also (optionally) accepts an RNTupleModel, which determines which fields should be
Expand Down Expand Up @@ -108,7 +99,7 @@ protected:
void SetConcreteField() { fConcreteField = fProtoField->Clone(fProtoField->GetFieldName()); }
};

std::vector<RNTupleSourceSpec> fNTuples;
std::vector<RNTupleOpenSpec> fNTuples;
std::unique_ptr<REntry> fEntry;
std::unique_ptr<Internal::RPageSource> fPageSource;
std::vector<RFieldContext> fFieldContexts;
Expand All @@ -120,13 +111,13 @@ protected:
/////////////////////////////////////////////////////////////////////////////
/// \brief Connect an RNTuple for processing.
///
/// \param[in] ntuple The RNTupleSourceSpec describing the RNTuple to connect.
/// \param[in] ntuple The RNTupleOpenSpec describing the RNTuple to connect.
///
/// \return The number of entries in the newly-connected RNTuple.
///
/// Creates and attaches new page source for the specified RNTuple, and connects the fields that are known by
/// the processor to it.
virtual NTupleSize_t ConnectNTuple(const RNTupleSourceSpec &ntuple) = 0;
virtual NTupleSize_t ConnectNTuple(const RNTupleOpenSpec &ntuple) = 0;

/////////////////////////////////////////////////////////////////////////////
/// \brief Creates and connects concrete fields to the current page source, based on the proto-fields.
Expand All @@ -142,7 +133,7 @@ protected:
/// is connected or the iterator has reached the end.
virtual NTupleSize_t Advance() = 0;

RNTupleProcessor(const std::vector<RNTupleSourceSpec> &ntuples)
RNTupleProcessor(const std::vector<RNTupleOpenSpec> &ntuples)
: fNTuples(ntuples), fNEntriesProcessed(0), fCurrentNTupleNumber(0), fLocalEntryNumber(0)
{
}
Expand Down Expand Up @@ -243,7 +234,7 @@ public:
///
/// \return A pointer to the newly created RNTupleProcessor.
static std::unique_ptr<RNTupleProcessor>
CreateChain(const std::vector<RNTupleSourceSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
CreateChain(const std::vector<RNTupleOpenSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
};

// clang-format off
Expand All @@ -257,7 +248,7 @@ class RNTupleChainProcessor : public RNTupleProcessor {
friend class RNTupleProcessor;

private:
NTupleSize_t ConnectNTuple(const RNTupleSourceSpec &ntuple) final;
NTupleSize_t ConnectNTuple(const RNTupleOpenSpec &ntuple) final;
void ConnectFields() final;
NTupleSize_t Advance() final;

Expand All @@ -270,7 +261,7 @@ private:
/// specified, it is created from the descriptor of the first RNTuple specified in `ntuples`.
///
/// RNTuples are processed in the order in which they are specified.
RNTupleChainProcessor(const std::vector<RNTupleSourceSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
RNTupleChainProcessor(const std::vector<RNTupleOpenSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
};

} // namespace Experimental
Expand Down
12 changes: 1 addition & 11 deletions tree/ntuple/v7/inc/ROOT/RNTupleReader.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,6 @@ public:
bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
};

/// Used to specify the underlying RNTuples in OpenFriends()
struct ROpenSpec {
std::string fNTupleName;
std::string fStorage;
RNTupleReadOptions fOptions;

ROpenSpec() = default;
ROpenSpec(std::string_view n, std::string_view s) : fNTupleName(n), fStorage(s) {}
};

/// Open an RNTuple for reading.
///
/// Throws an RException if there is no RNTuple with the given name.
Expand Down Expand Up @@ -173,7 +163,7 @@ public:
/// have an identical number of entries. Fields in the combined RNTuple are named with the ntuple name
/// as a prefix, e.g. myNTuple1.px and myNTuple2.pt (see tutorial ntpl006_friends)
static std::unique_ptr<RNTupleReader>
OpenFriends(std::span<ROpenSpec> ntuples, const RNTupleReadOptions &options = RNTupleReadOptions());
OpenFriends(std::span<RNTupleOpenSpec> ntuples, const RNTupleReadOptions &options = RNTupleReadOptions());
std::unique_ptr<RNTupleReader> Clone()
{
auto options = RNTupleReadOptions{};
Expand Down
10 changes: 10 additions & 0 deletions tree/ntuple/v7/inc/ROOT/RNTupleUtil.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <variant>

#include <ROOT/RLogger.hxx>
#include <ROOT/RNTupleReadOptions.hxx>

namespace ROOT {
namespace Experimental {
Expand Down Expand Up @@ -228,6 +229,15 @@ struct RNTupleLocator {
}
};

/// Used to specify the underlying RNTuples in RNTupleProcessor and RNTupleReader::OpenFriends()
struct RNTupleOpenSpec {
std::string fNTupleName;
std::string fStorage;
RNTupleReadOptions fOptions;

RNTupleOpenSpec(std::string_view n, std::string_view s) : fNTupleName(n), fStorage(s) {}
};

namespace Internal {
template <typename T>
auto MakeAliasedSharedPtr(T *rawPtr)
Expand Down
11 changes: 5 additions & 6 deletions tree/ntuple/v7/src/RNTupleProcessor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@
#include <ROOT/RFieldBase.hxx>

std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
ROOT::Experimental::RNTupleProcessor::CreateChain(const std::vector<RNTupleSourceSpec> &ntuples,
ROOT::Experimental::RNTupleProcessor::CreateChain(const std::vector<RNTupleOpenSpec> &ntuples,
std::unique_ptr<RNTupleModel> model)
{
return std::unique_ptr<RNTupleChainProcessor>(new RNTupleChainProcessor(ntuples, std::move(model)));
}

//------------------------------------------------------------------------------

ROOT::Experimental::RNTupleChainProcessor::RNTupleChainProcessor(const std::vector<RNTupleSourceSpec> &ntuples,
ROOT::Experimental::RNTupleChainProcessor::RNTupleChainProcessor(const std::vector<RNTupleOpenSpec> &ntuples,
std::unique_ptr<RNTupleModel> model)
: RNTupleProcessor(ntuples)
{
if (fNTuples.empty())
throw RException(R__FAIL("at least one RNTuple must be provided"));

fPageSource = Internal::RPageSource::Create(fNTuples[0].fName, fNTuples[0].fLocation);
fPageSource = Internal::RPageSource::Create(fNTuples[0].fNTupleName, fNTuples[0].fStorage);
fPageSource->Attach();

if (fPageSource->GetNEntries() == 0) {
Expand Down Expand Up @@ -64,13 +64,12 @@ ROOT::Experimental::RNTupleChainProcessor::RNTupleChainProcessor(const std::vect
ConnectFields();
}

ROOT::Experimental::NTupleSize_t
ROOT::Experimental::RNTupleChainProcessor::ConnectNTuple(const RNTupleSourceSpec &ntuple)
ROOT::Experimental::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::ConnectNTuple(const RNTupleOpenSpec &ntuple)
{
for (auto &fieldContext : fFieldContexts) {
fieldContext.ResetConcreteField();
}
fPageSource = Internal::RPageSource::Create(ntuple.fName, ntuple.fLocation);
fPageSource = Internal::RPageSource::Create(ntuple.fNTupleName, ntuple.fStorage);
fPageSource->Attach();
ConnectFields();
return fPageSource->GetNEntries();
Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/src/RNTupleReader.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ ROOT::Experimental::RNTupleReader::Open(std::unique_ptr<RNTupleModel> model, con
}

std::unique_ptr<ROOT::Experimental::RNTupleReader>
ROOT::Experimental::RNTupleReader::OpenFriends(std::span<ROpenSpec> ntuples, const RNTupleReadOptions &options)
ROOT::Experimental::RNTupleReader::OpenFriends(std::span<RNTupleOpenSpec> ntuples, const RNTupleReadOptions &options)
{
std::vector<std::unique_ptr<Internal::RPageSource>> sources;
sources.reserve(ntuples.size());
Expand Down
6 changes: 2 additions & 4 deletions tree/ntuple/v7/test/ntuple_friends.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ TEST(RPageStorageFriends, Null)

TEST(RPageStorageFriends, Empty)
{
std::span<RNTupleReader::ROpenSpec> ntuples;
std::span<RNTupleOpenSpec> ntuples;
auto reader = RNTupleReader::OpenFriends(ntuples);
EXPECT_EQ(0u, reader->GetNEntries());
EXPECT_EQ(0u, reader->GetModel().GetFieldZero().GetOnDiskId());
Expand Down Expand Up @@ -68,9 +68,7 @@ TEST(RPageStorageFriends, Basic)
ntuple->Fill();
}

std::vector<RNTupleReader::ROpenSpec> friends{
{"ntpl1", fileGuard1.GetPath()},
{"ntpl2", fileGuard2.GetPath()} };
std::vector<RNTupleOpenSpec> friends{{"ntpl1", fileGuard1.GetPath()}, {"ntpl2", fileGuard2.GetPath()}};
auto ntuple = RNTupleReader::OpenFriends(friends);
EXPECT_EQ(3u, ntuple->GetNEntries());

Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/test/ntuple_multi_column.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ TEST(RNTuple, MultiColumnRepresentationFriends)
writer->Fill();
}

std::vector<RNTupleReader::ROpenSpec> friends{{"ntpl1", fileGuard1.GetPath()}, {"ntpl2", fileGuard2.GetPath()}};
std::vector<RNTupleOpenSpec> friends{{"ntpl1", fileGuard1.GetPath()}, {"ntpl2", fileGuard2.GetPath()}};
auto reader = RNTupleReader::OpenFriends(friends);
EXPECT_EQ(2u, reader->GetNEntries());
auto viewPt = reader->GetView<float>("ntpl1.pt");
Expand Down
22 changes: 11 additions & 11 deletions tree/ntuple/v7/test/ntuple_processor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ TEST(RNTupleProcessor, Basic)
}
}

std::vector<RNTupleSourceSpec> ntuples;
std::vector<RNTupleOpenSpec> ntuples;
try {
auto proc = RNTupleProcessor::CreateChain(ntuples);
FAIL() << "creating a processor without at least one RNTuple should throw";
Expand Down Expand Up @@ -57,7 +57,7 @@ TEST(RNTupleProcessor, WithModel)
auto model = RNTupleModel::Create();
auto fldY = model->MakeField<float>("y");

std::vector<RNTupleSourceSpec> ntuples = {{"ntuple", fileGuard.GetPath()}};
std::vector<RNTupleOpenSpec> ntuples = {{"ntuple", fileGuard.GetPath()}};

auto proc = RNTupleProcessor::CreateChain(ntuples, std::move(model));
for (const auto &entry : *proc) {
Expand Down Expand Up @@ -89,7 +89,7 @@ TEST(RNTupleProcessor, WithBareModel)

auto model = RNTupleModel::CreateBare();
model->MakeField<float>("y");
std::vector<RNTupleSourceSpec> ntuples = {{"ntuple", fileGuard.GetPath()}};
std::vector<RNTupleOpenSpec> ntuples = {{"ntuple", fileGuard.GetPath()}};

auto proc = RNTupleProcessor::CreateChain(ntuples, std::move(model));
for (const auto &entry : *proc) {
Expand Down Expand Up @@ -132,7 +132,7 @@ TEST(RNTupleProcessor, SimpleChain)
}
}

std::vector<RNTupleSourceSpec> ntuples = {{"ntuple", fileGuard1.GetPath()}, {"ntuple", fileGuard2.GetPath()}};
std::vector<RNTupleOpenSpec> ntuples = {{"ntuple", fileGuard1.GetPath()}, {"ntuple", fileGuard2.GetPath()}};

std::uint64_t nEntries = 0;
auto proc = RNTupleProcessor::CreateChain(ntuples);
Expand Down Expand Up @@ -184,7 +184,7 @@ TEST(RNTupleProcessor, SimpleChainWithModel)
auto model = RNTupleModel::Create();
auto fldX = model->MakeField<float>("x");

std::vector<RNTupleSourceSpec> ntuples = {
std::vector<RNTupleOpenSpec> ntuples = {
{"ntuple", fileGuard1.GetPath()}, {"ntuple", fileGuard2.GetPath()}, {"ntuple", fileGuard3.GetPath()}};

auto proc = RNTupleProcessor::CreateChain(ntuples, std::move(model));
Expand Down Expand Up @@ -246,11 +246,11 @@ TEST(RNTupleProcessor, EmptyNTuples)
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard5.GetPath());
}

std::vector<RNTupleSourceSpec> ntuples = {{"ntuple", fileGuard1.GetPath()},
{"ntuple", fileGuard2.GetPath()},
{"ntuple", fileGuard3.GetPath()},
{"ntuple", fileGuard4.GetPath()},
{"ntuple", fileGuard5.GetPath()}};
std::vector<RNTupleOpenSpec> ntuples = {{"ntuple", fileGuard1.GetPath()},
{"ntuple", fileGuard2.GetPath()},
{"ntuple", fileGuard3.GetPath()},
{"ntuple", fileGuard4.GetPath()},
{"ntuple", fileGuard5.GetPath()}};

std::uint64_t nEntries = 0;

Expand Down Expand Up @@ -294,7 +294,7 @@ TEST(RNTupleProcessor, ChainUnalignedModels)
ntuple->Fill();
}

std::vector<RNTupleSourceSpec> ntuples = {{"ntuple", fileGuard1.GetPath()}, {"ntuple", fileGuard2.GetPath()}};
std::vector<RNTupleOpenSpec> ntuples = {{"ntuple", fileGuard1.GetPath()}, {"ntuple", fileGuard2.GetPath()}};

auto proc = RNTupleProcessor::CreateChain(ntuples);
auto entry = proc->begin();
Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/test/ntuple_show.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ TEST(RNTupleShow, Friends)
writer->Fill();
}

std::vector<RNTupleReader::ROpenSpec> friends = {{"ntpl1", fileGuard1.GetPath()}, {"ntpl2", fileGuard2.GetPath()}};
std::vector<RNTupleOpenSpec> friends = {{"ntpl1", fileGuard1.GetPath()}, {"ntpl2", fileGuard2.GetPath()}};
auto ntuple = RNTupleReader::OpenFriends(friends);
std::ostringstream os;
ntuple->Show(0, os);
Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/test/ntuple_test.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ using RNTupleMerger = ROOT::Experimental::Internal::RNTupleMerger;
using RNTupleMergeOptions = ROOT::Experimental::Internal::RNTupleMergeOptions;
using ENTupleMergingMode = ROOT::Experimental::Internal::ENTupleMergingMode;
using RNTupleModel = ROOT::Experimental::RNTupleModel;
using RNTupleOpenSpec = ROOT::Experimental::RNTupleOpenSpec;
using RNTuplePlainCounter = ROOT::Experimental::Detail::RNTuplePlainCounter;
using RNTuplePlainTimer = ROOT::Experimental::Detail::RNTuplePlainTimer;
using RNTupleProcessor = ROOT::Experimental::RNTupleProcessor;
using RNTupleSerializer = ROOT::Experimental::Internal::RNTupleSerializer;
using RNTupleSourceSpec = ROOT::Experimental::RNTupleSourceSpec;
using RPage = ROOT::Experimental::Internal::RPage;
using RPageAllocatorHeap = ROOT::Experimental::Internal::RPageAllocatorHeap;
using RPagePool = ROOT::Experimental::Internal::RPagePool;
Expand Down
5 changes: 2 additions & 3 deletions tutorials/v7/ntuple/ntpl006_friends.C
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ constexpr char const* kNTupleMainFileName = "ntpl006_data.root";
constexpr char const* kNTupleFriendFileName = "ntpl006_reco.root";

using RNTupleModel = ROOT::Experimental::RNTupleModel;
using RNTupleOpenSpec = ROOT::Experimental::RNTupleOpenSpec;
using RNTupleReader = ROOT::Experimental::RNTupleReader;
using RNTupleWriter = ROOT::Experimental::RNTupleWriter;

Expand Down Expand Up @@ -69,9 +70,7 @@ void ntpl006_friends()
{
Generate();

std::vector<RNTupleReader::ROpenSpec> friends{
{"data", kNTupleMainFileName},
{"reco", kNTupleFriendFileName} };
std::vector<RNTupleOpenSpec> friends{{"data", kNTupleMainFileName}, {"reco", kNTupleFriendFileName}};
auto ntuple = RNTupleReader::OpenFriends(friends);

auto c = new TCanvas("c", "", 200, 10, 700, 500);
Expand Down
13 changes: 7 additions & 6 deletions tutorials/v7/ntuple/ntpl012_processor.C
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

// Import classes from the `Experimental` namespace for the time being.
using ROOT::Experimental::RNTupleModel;
using ROOT::Experimental::RNTupleOpenSpec;
using ROOT::Experimental::RNTupleProcessor;
using ROOT::Experimental::RNTupleSourceSpec;
using ROOT::Experimental::RNTupleWriter;

// Number of events to generate for each ntuple.
Expand Down Expand Up @@ -61,7 +61,7 @@ void Write(std::string_view ntupleName, std::string_view ntupleFileName)
}
}

void Read(const std::vector<RNTupleSourceSpec> &ntuples)
void Read(const std::vector<RNTupleOpenSpec> &ntuples)
{
auto c = new TCanvas("c", "RNTupleProcessor Example", 200, 10, 700, 500);
TH1F hPx("h", "This is the px distribution", 100, -4, 4);
Expand All @@ -80,7 +80,7 @@ void Read(const std::vector<RNTupleSourceSpec> &ntuples)
// The RNTupleProcessor provides some additional bookkeeping information. The local entry number is reset each
// a new ntuple in the chain is opened for processing.
if (processor->GetLocalEntryNumber() == 0) {
std::cout << "Processing " << ntuples.at(processor->GetCurrentNTupleNumber()).fName << " ("
std::cout << "Processing " << ntuples.at(processor->GetCurrentNTupleNumber()).fNTupleName << " ("
<< processor->GetNEntriesProcessed() << " total entries processed so far)" << std::endl;
}

Expand All @@ -96,12 +96,13 @@ void Read(const std::vector<RNTupleSourceSpec> &ntuples)

void ntpl012_processor()
{
// The ntuples to generate (for the purpose of this tutorial) and subsequently process.
std::vector<RNTupleSourceSpec> ntuples = {
// The ntuples to generate and subsequently process. The model of the first ntuple will be used to construct the
// entry used by the processor.
std::vector<RNTupleOpenSpec> ntuples = {
{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}, {"ntuple3", "ntuple3.root"}};

for (const auto &ntuple : ntuples) {
Write(ntuple.fName, ntuple.fLocation);
Write(ntuple.fNTupleName, ntuple.fStorage);
}

Read(ntuples);
Expand Down
Loading