diff --git a/roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h b/roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h index b62b30fa6207f..87bcf71122dba 100644 --- a/roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h +++ b/roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h @@ -1,41 +1,24 @@ -/***************************************************************************** - * Project: RooFit * - * Package: RooFitCore * - * Authors: * - * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu * - * DK, David Kirkby, UC Irvine, dkirkby@uci.edu * - * * - * Copyright (c) 2000-2021, Regents of the University of California * - * and Stanford University. All rights reserved. * - * * - * Redistribution and use in source and binary forms, * - * with or without modification, are permitted according to the terms * - * listed in LICENSE (http://roofit.sourceforge.net/license.txt) * - *****************************************************************************/ -/// Create RooDataSet/RooDataHist from RDataFrame. -/// \date Mar 2021 -/// \author Stephan Hageboeck (CERN) +/* + * Project: RooFit + * Authors: + * Stephan Hageboeck, CERN 2021 + * + * Copyright (c) 2024, CERN + * + * Redistribution and use in source and binary forms, + * with or without modification, are permitted according to the terms + * listed in LICENSE (http://roofit.sourceforge.net/license.txt) + */ + #ifndef ROOABSDATAHELPER #define ROOABSDATAHELPER -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include #include -#include -#include -#include - -class TTreeReader; /// This is a helper for an RDataFrame action, which fills RooFit data classes. /// @@ -45,8 +28,10 @@ class TTreeReader; /// - Construct one of the two action helpers RooDataSetHelper or RooDataHistHelper. Pass constructor arguments /// to RooAbsDataHelper::RooAbsDataHelper() as for the original classes. /// The arguments are forwarded to the actual data classes without any changes. -/// - Book the helper as an RDataFrame action. Here, the RDataFrame column types have to be passed as template parameters. +/// - Book the helper as an RDataFrame action. Here, the RDataFrame column types have to be passed as template +/// parameters. /// - Pass the column names to the Book action. These are matched by position to the variables of the dataset. +/// If there is one more column name than variables in the dataset, the last columns values will be used as weights. /// /// All arguments passed to are forwarded to RooDataSet::RooDataSet() / RooDataHist::RooDataHist(). /// @@ -58,150 +43,50 @@ class TTreeReader; /// RooDataSetHelper{"dataset", // Name (directly forwarded to RooDataSet::RooDataSet()) /// "Title of dataset", // Title ( ~ " ~ ) /// RooArgSet(x, y) }, // Variables to create in dataset -/// {"x", "y"} // Column names from RDataFrame +/// {"x", "y", "weight"} // Column names from RDataFrame +/// // (this example uses an additional column for the weight) /// ); /// /// ``` /// \warning Variables in the dataset and columns in RDataFrame are **matched by position, not by name**. /// This enables the easy exchanging of columns that should be filled into the dataset. -template -class RooAbsDataHelper : public ROOT::Detail::RDF::RActionImpl> { -public: - using Result_t = DataSet_t; - -private: - std::shared_ptr _dataset; - std::mutex _mutex_dataset; - std::size_t _numInvalid = 0; - - std::vector> _events; // One vector of values per data-processing slot - const std::size_t _eventSize; // Number of variables in dataset - +template +class RooAbsDataHelper : public RooFit::Detail::RooAbsDataFiller, + public ROOT::Detail::RDF::RActionImpl> { public: + using Result_t = DataSet_t; + + /// Construct a helper to create RooDataSet/RooDataHist. + /// \tparam Args_t Parameter pack of arguments. + /// \param args Constructor arguments for RooDataSet::RooDataSet() or RooDataHist::RooDataHist(). + /// All arguments will be forwarded as they are. + template + RooAbsDataHelper(Args_t &&...args) : _dataset{new DataSet_t(std::forward(args)...)} + { + } + + /// Return internal dataset/hist. + std::shared_ptr GetResultPtr() const { return _dataset; } + + /// Method that RDataFrame calls to pass a new event. + /// + /// \param slot When IMT is used, this is a number in the range [0, nSlots) to fill lock free. + /// \param values x, y, z, ... coordinates of the event. + template + void Exec(unsigned int slot, ColumnTypes... values) + { + auto &vector = _events[slot]; + for (auto &&val : {values...}) { + vector.push_back(val); + } - /// Construct a helper to create RooDataSet/RooDataHist. - /// \tparam Args_t Parameter pack of arguments. - /// \param args Constructor arguments for RooDataSet::RooDataSet() or RooDataHist::RooDataHist(). - /// All arguments will be forwarded as they are. - template - RooAbsDataHelper(Args_t&&... args) : - _dataset{ new DataSet_t(std::forward(args)...) }, - _eventSize{ _dataset->get()->size() } - { - const auto nSlots = ROOT::IsImplicitMTEnabled() ? ROOT::GetThreadPoolSize() : 1; - _events.resize(nSlots); - } - - - /// Move constructor. It transfers ownership of the internal RooAbsData object. - RooAbsDataHelper(RooAbsDataHelper&& other) : - _dataset{ std::move(other._dataset) }, - _events{ std::move(other._events) }, - _eventSize{ other._eventSize } - { - - } - - /// Copy is discouraged. - /// Use `rdataframe.Book<...>(std::move(absDataHelper), ...)` instead. - RooAbsDataHelper(const RooAbsDataHelper&) = delete; - /// Return internal dataset/hist. - std::shared_ptr GetResultPtr() const { return _dataset; } - /// RDataFrame interface method. Nothing has to be initialised. - void Initialize() {} - /// RDataFrame interface method. No tasks. - void InitTask(TTreeReader *, unsigned int) {} - /// RDataFrame interface method. - std::string GetActionName() { return "RooDataSetHelper"; } - - /// Method that RDataFrame calls to pass a new event. - /// - /// \param slot When IMT is used, this is a number in the range [0, nSlots) to fill lock free. - /// \param values x, y, z, ... coordinates of the event. - template - void Exec(unsigned int slot, ColumnTypes... values) - { - if (sizeof...(values) != _eventSize) { - throw std::invalid_argument(std::string("RooDataSet can hold ") - + std::to_string(_eventSize) - + " variables per event, but RDataFrame passed " - + std::to_string(sizeof...(values)) - + " columns."); - } - - auto& vector = _events[slot]; - for (auto&& val : {values...}) { - vector.push_back(val); - } - - if (vector.size() > 1024 && _mutex_dataset.try_lock()) { - const std::lock_guard guard(_mutex_dataset, std::adopt_lock_t()); - FillDataSet(vector, _eventSize); - vector.clear(); - } - } - - /// Empty all buffers into the dataset/hist to finish processing. - void Finalize() { - for (auto& vector : _events) { - FillDataSet(vector, _eventSize); - vector.clear(); - } - - if (_numInvalid>0) { - const auto prefix = std::string(_dataset->ClassName()) + "Helper::Finalize(" + _dataset->GetName() + ") "; - oocoutW(nullptr, DataHandling) << prefix << "Ignored " << _numInvalid << " out-of-range events\n"; - } - } + ExecImpl(sizeof...(values), vector); + } + RooAbsData &GetAbsData() override { return *_dataset; } private: - /// Append all `events` to the internal RooDataSet or increment the bins of a RooDataHist at the given locations. - /// - /// \param events Events to fill into `data`. The layout is assumed to be `(x, y, z, ...) (x, y, z, ...), (...)`. - /// \note The order of the variables inside `events` must be consistent with the order given in the constructor. - /// No matching by name is performed. - /// \param eventSize Size of a single event. - void FillDataSet(const std::vector& events, unsigned int eventSize) { - if (events.empty()) - return; - - const RooArgSet& argSet = *_dataset->get(); - - for (std::size_t i = 0; i < events.size(); i += eventSize) { - - // Creating a RooDataSet from an RDataFrame should be consistent with the - // creation from a TTree. The construction from a TTree discards entries - // outside the variable definition range, so we have to do that too (see - // also RooTreeDataStore::loadValues). - - bool allOK = true; - for (std::size_t j=0; j < eventSize; ++j) { - auto * destArg = static_cast(argSet[j]); - double sourceVal = events[i+j]; - - if (!destArg->inRange(sourceVal, nullptr)) { - _numInvalid++ ; - allOK = false; - const auto prefix = std::string(_dataset->ClassName()) + "Helper::FillDataSet(" + _dataset->GetName() + ") "; - if (_numInvalid < 5) { - // Unlike in the TreeVectorStore case, we don't log the event - // number here because we don't know it anyway, because of - // RDataFrame slots and multithreading. - oocoutI(nullptr, DataHandling) << prefix << "Skipping event because " << destArg->GetName() - << " cannot accommodate the value " << sourceVal << "\n"; - } else if (_numInvalid == 5) { - oocoutI(nullptr, DataHandling) << prefix << "Skipping ...\n"; - } - break ; - } - destArg->setVal(sourceVal); - } - if(allOK) { - _dataset->add(argSet); - } - } - } + std::shared_ptr _dataset; }; /// Helper for creating a RooDataSet inside RDataFrame. \see RooAbsDataHelper diff --git a/roofit/RDataFrameHelpers/test/testActionHelpers.cxx b/roofit/RDataFrameHelpers/test/testActionHelpers.cxx index 02089f0f26245..96fd796abadc8 100644 --- a/roofit/RDataFrameHelpers/test/testActionHelpers.cxx +++ b/roofit/RDataFrameHelpers/test/testActionHelpers.cxx @@ -14,116 +14,177 @@ #include "gtest/gtest.h" -TEST(RooAbsDataHelper, MTConstruction) +namespace { + +constexpr std::size_t nEvent = 200000; + +constexpr double targetXMean = 0.; +constexpr double targetYMean = 1.; +constexpr double targetXVar = 100. / 12.; +constexpr double targetYVar = 4. / 12.; + +auto makeDataFrame() { #ifdef R__USE_IMT - ROOT::EnableImplicitMT(4); + ROOT::EnableImplicitMT(4); #endif - // We create an RDataFrame with two columns filled with 2 million random numbers. - constexpr std::size_t nEvent = 200000; - ROOT::RDataFrame d(nEvent); - auto dd = - d.DefineSlot("x", [=](unsigned int /*slot*/, ULong64_t entry) { return -5. + 10. * ((double)entry) / nEvent; }, + return ROOT::RDataFrame{nEvent} + .DefineSlot("x", [=](unsigned int /*slot*/, ULong64_t entry) { return -5. + 10. * ((double)entry) / nEvent; }, {"rdfentry_"}) - .DefineSlot("y", [=](unsigned int /*slot*/, ULong64_t entry) { return 0. + 2. * ((double)entry) / nEvent; }, - {"rdfentry_"}); - auto meanX = dd.Mean("x"); - auto meanY = dd.Mean("y"); - - constexpr double targetXMean = 0.; - constexpr double targetYMean = 1.; - constexpr double targetXVar = 100./12.; - constexpr double targetYVar = 4./12.; - - // We create RooFit variables that will represent the dataset. - RooRealVar x("x", "x", -5., 5.); - RooRealVar y("y", "y", -50., 50.); - x.setBins(10); - y.setBins(100); - - - auto rooDataSet = dd.Book( - RooDataSetHelper("dataset", // Name - "Title of dataset", // Title - RooArgSet(x, y) // Variables in this dataset - ), - {"x", "y"} // Column names in RDataFrame. - ); - - RooDataHistHelper rdhMaker{"datahist", // Name - "Title of data hist", // Title - RooArgSet(x, y) // Variables in this dataset - }; - - // Then, we move it into the RDataFrame action: - auto rooDataHist = dd.Book(std::move(rdhMaker), {"x", "y"}); - - - - // Run it and inspect the results - // ------------------------------- - EXPECT_NEAR(meanX.GetValue(), targetXMean, 1.E-4); - EXPECT_NEAR(meanY.GetValue(), targetYMean, 1.E-4); - - ASSERT_EQ(rooDataSet->numEntries(), nEvent); - EXPECT_NEAR(rooDataSet->sumEntries(), nEvent, nEvent * 1.E-9); - EXPECT_NEAR(rooDataSet->mean(x), targetXMean, 1.E-4); - EXPECT_NEAR(rooDataSet->moment(x, 2.), targetXVar, targetXVar * 1.E-4); - EXPECT_NEAR(rooDataSet->mean(y), targetYMean, 1.E-4); - EXPECT_NEAR(rooDataSet->moment(y, 2.), targetYVar, targetYVar * 1.E-4); - - EXPECT_NEAR(rooDataHist->sumEntries(), nEvent, nEvent * 1.E-9); - EXPECT_NEAR(rooDataHist->mean(x), targetXMean, 1.E-4); - EXPECT_NEAR(rooDataHist->moment(x, 2.), 8.25, 1.E-2); // Variance is affected in a binned distribution - EXPECT_NEAR(rooDataHist->mean(y), targetYMean, 1.E-4); - EXPECT_NEAR(rooDataHist->moment(y, 2.), 0.25, 1.E-2); // Variance is affected in a binned distribution + .DefineSlot("y", [=](unsigned int /*slot*/, ULong64_t entry) { return 0. + 2. * ((double)entry) / nEvent; }, + {"rdfentry_"}) + .DefineSlot("w", [=](unsigned int /*slot*/, ULong64_t /*entry*/) { return 0.5; }, {"rdfentry_"}); +} + +RooArgSet makeVariablesSet() +{ + // We create RooFit variables that will represent the dataset. + auto x = std::make_unique("x", "x", -5., 5.); + auto y = std::make_unique("y", "y", -50., 50.); + x->setBins(10); + y->setBins(100); + + RooArgSet out; + out.addOwned(std::move(x)); + out.addOwned(std::move(y)); + return out; +} + +} // namespace + +TEST(RooAbsDataHelper, RooDataSet) +{ + auto dd = makeDataFrame(); + RooArgSet vars = makeVariablesSet(); + RooRealVar &x = static_cast(vars["x"]); + RooRealVar &y = static_cast(vars["y"]); + + RooDataSetHelper rdsMaker{"dataset", "Title of dataset", vars}; + + auto rooDataSet = dd.Book(std::move(rdsMaker), {"x", "y"}); + + double sumEntries = nEvent; + + ASSERT_EQ(rooDataSet->numEntries(), nEvent); + EXPECT_NEAR(rooDataSet->sumEntries(), sumEntries, sumEntries * 1.E-9); + EXPECT_NEAR(rooDataSet->mean(x), targetXMean, 1.E-4); + EXPECT_NEAR(rooDataSet->moment(x, 2.), targetXVar, targetXVar * 1.E-4); + EXPECT_NEAR(rooDataSet->mean(y), targetYMean, 1.E-4); + EXPECT_NEAR(rooDataSet->moment(y, 2.), targetYVar, targetYVar * 1.E-4); +} + +TEST(RooAbsDataHelper, RooDataSetWeighted) +{ + auto dd = makeDataFrame(); + RooArgSet vars = makeVariablesSet(); + RooRealVar &x = static_cast(vars["x"]); + RooRealVar &y = static_cast(vars["y"]); + + RooDataSetHelper rdsMaker{"dataset", "Title of dataset", vars, RooFit::WeightVar("w")}; + + auto rooDataSet = dd.Book(std::move(rdsMaker), {"x", "y", "w"}); + + auto sumEntriesResult = dd.Sum("w"); + + double sumEntries = sumEntriesResult.GetValue(); + + ASSERT_EQ(rooDataSet->numEntries(), nEvent); + EXPECT_NEAR(rooDataSet->sumEntries(), sumEntries, sumEntries * 1.E-9); + EXPECT_NEAR(rooDataSet->mean(x), targetXMean, 1.E-4); + EXPECT_NEAR(rooDataSet->moment(x, 2.), targetXVar, targetXVar * 1.E-4); + EXPECT_NEAR(rooDataSet->mean(y), targetYMean, 1.E-4); + EXPECT_NEAR(rooDataSet->moment(y, 2.), targetYVar, targetYVar * 1.E-4); } +TEST(RooAbsDataHelper, RooDataHist) +{ + auto dd = makeDataFrame(); + RooArgSet vars = makeVariablesSet(); + RooRealVar &x = static_cast(vars["x"]); + RooRealVar &y = static_cast(vars["y"]); + + RooDataHistHelper rdhMaker{"datahist", "Title of data hist", vars}; + + // Then, we move it into the RDataFrame action: + auto rooDataHist = dd.Book(std::move(rdhMaker), {"x", "y"}); + + double sumEntries = nEvent; + + EXPECT_NEAR(rooDataHist->sumEntries(), sumEntries, sumEntries * 1.E-9); + EXPECT_NEAR(rooDataHist->mean(x), targetXMean, 1.E-4); + EXPECT_NEAR(rooDataHist->moment(x, 2.), 8.25, 1.E-2); // Variance is affected in a binned distribution + EXPECT_NEAR(rooDataHist->mean(y), targetYMean, 1.E-4); + EXPECT_NEAR(rooDataHist->moment(y, 2.), 0.25, 1.E-2); // Variance is affected in a binned distribution +} + +TEST(RooAbsDataHelper, RooDataHistWeighted) +{ + auto dd = makeDataFrame(); + RooArgSet vars = makeVariablesSet(); + RooRealVar &x = static_cast(vars["x"]); + RooRealVar &y = static_cast(vars["y"]); + + RooDataHistHelper rdhMaker{"datahist", "Title of data hist", vars}; + + // Then, we move it into the RDataFrame action: + auto rooDataHist = dd.Book(std::move(rdhMaker), {"x", "y", "w"}); + + auto sumEntriesResult = dd.Sum("w"); + + double sumEntries = sumEntriesResult.GetValue(); + + EXPECT_NEAR(rooDataHist->sumEntries(), sumEntries, sumEntries * 1.E-9); + EXPECT_NEAR(rooDataHist->mean(x), targetXMean, 1.E-4); + EXPECT_NEAR(rooDataHist->moment(x, 2.), 8.25, 1.E-2); // Variance is affected in a binned distribution + EXPECT_NEAR(rooDataHist->mean(y), targetYMean, 1.E-4); + EXPECT_NEAR(rooDataHist->moment(y, 2.), 0.25, 1.E-2); // Variance is affected in a binned distribution +} /// This test verifies that out-of-range events are correctly skipped, /// consistent with the construction of a RooDataSet from a TTree. -TEST(RooAbsDataHelper, SkipEventsOutOfRange) { +TEST(RooAbsDataHelper, SkipEventsOutOfRange) +{ - RooMsgService::instance().getStream(0).removeTopic(RooFit::DataHandling); - RooMsgService::instance().getStream(1).removeTopic(RooFit::DataHandling); + RooMsgService::instance().getStream(0).removeTopic(RooFit::DataHandling); + RooMsgService::instance().getStream(1).removeTopic(RooFit::DataHandling); - std::size_t nEvents = 100; - const char * filename = "testRooAbsDataHelper_SkipEventsOutOfRange_tree.root"; - const char * treename = "tree"; + std::size_t nEvents = 100; + const char *filename = "testRooAbsDataHelper_SkipEventsOutOfRange_tree.root"; + const char *treename = "tree"; - { - // Create the ROOT file with the dataset - ROOT::RDataFrame rdf(nEvents); - auto rdf_x = rdf.Define("x", [](){ return gRandom->Gaus(0.0, 1.0); }); - rdf_x.Snapshot(treename, filename); - // We can't reuse the same RDataFrame now, because when we rerun the event - // loop it would generate new random values. So this scope ends here and we - // open a new RDF from the file later. - } + { + // Create the ROOT file with the dataset + ROOT::RDataFrame rdf(nEvents); + auto rdf_x = rdf.Define("x", []() { return gRandom->Gaus(0.0, 1.0); }); + rdf_x.Snapshot(treename, filename); + // We can't reuse the same RDataFrame now, because when we rerun the event + // loop it would generate new random values. So this scope ends here and we + // open a new RDF from the file later. + } - // Open dataset with RDataFrame and TTree - std::unique_ptr file{TFile::Open(filename, "READ")}; - auto tree = file->Get(treename); - ROOT::RDataFrame rdf(treename, filename); + // Open dataset with RDataFrame and TTree + std::unique_ptr file{TFile::Open(filename, "READ")}; + auto tree = file->Get(treename); + ROOT::RDataFrame rdf(treename, filename); - // Create RooFit variable - RooRealVar x{"x", "x", 0.0, -2.0, 2.0}; + // Create RooFit variable + RooRealVar x{"x", "x", 0.0, -2.0, 2.0}; - // Create a RooDataset from the TTree, and one from the RDataFrame - RooDataSet dataSetTree{"dataSetTree", "dataSetTree", x, RooFit::Import(*tree)}; - auto dataSetRDF = rdf.Book(RooDataSetHelper("dataSetRDF", "dataSetRDF", RooArgSet(x)), {"x"}); + // Create a RooDataset from the TTree, and one from the RDataFrame + RooDataSet dataSetTree{"dataSetTree", "dataSetTree", x, RooFit::Import(*tree)}; + auto dataSetRDF = rdf.Book(RooDataSetHelper("dataSetRDF", "dataSetRDF", RooArgSet(x)), {"x"}); - // Check if in the creation of the datasets, the entries outside the - // variable range were successfully discarded. - double nPassing = *rdf.Filter("x >= -2 && x <= 2.0").Count(); + // Check if in the creation of the datasets, the entries outside the + // variable range were successfully discarded. + double nPassing = *rdf.Filter("x >= -2 && x <= 2.0").Count(); - EXPECT_EQ(dataSetRDF->numEntries(), nPassing); - EXPECT_EQ(dataSetTree.numEntries(), nPassing); + EXPECT_EQ(dataSetRDF->numEntries(), nPassing); + EXPECT_EQ(dataSetTree.numEntries(), nPassing); - file.reset(); // Close file - gSystem->Unlink(filename); // delete temporary file + file.reset(); // Close file + gSystem->Unlink(filename); // delete temporary file - RooMsgService::instance().getStream(0).addTopic(RooFit::DataHandling); - RooMsgService::instance().getStream(1).addTopic(RooFit::DataHandling); + RooMsgService::instance().getStream(0).addTopic(RooFit::DataHandling); + RooMsgService::instance().getStream(1).addTopic(RooFit::DataHandling); } diff --git a/roofit/roofitcore/CMakeLists.txt b/roofit/roofitcore/CMakeLists.txt index 683acfddae433..1a9b77da0f9f6 100644 --- a/roofit/roofitcore/CMakeLists.txt +++ b/roofit/roofitcore/CMakeLists.txt @@ -57,6 +57,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(RooFitCore RooAbsCategoryLValue.h RooAbsCollection.h RooAbsData.h + RooAbsDataFiller.h RooAbsDataStore.h RooAbsFunc.h RooAbsGenContext.h @@ -264,6 +265,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(RooFitCore src/RooAbsCategoryLValue.cxx src/RooAbsCollection.cxx src/RooAbsData.cxx + src/RooAbsDataFiller.cxx src/RooAbsDataStore.cxx src/RooAbsFunc.cxx src/RooAbsGenContext.cxx diff --git a/roofit/roofitcore/inc/RooAbsDataFiller.h b/roofit/roofitcore/inc/RooAbsDataFiller.h new file mode 100644 index 0000000000000..274549b78a5e2 --- /dev/null +++ b/roofit/roofitcore/inc/RooAbsDataFiller.h @@ -0,0 +1,71 @@ +/* + * Project: RooFit + * Authors: + * Stephan Hageboeck, CERN 2021 + * + * Copyright (c) 2024, CERN + * + * Redistribution and use in source and binary forms, + * with or without modification, are permitted according to the terms + * listed in LICENSE (http://roofit.sourceforge.net/license.txt) + */ + +#ifndef RooFit_RooFitCore_RooAbsDataFiller_h +#define RooFit_RooFitCore_RooAbsDataFiller_h + +#include +#include +#include +#include + +#include +#include +#include +#include + +class TTreeReader; + +namespace RooFit { +namespace Detail { + +class RooAbsDataFiller { +public: + RooAbsDataFiller(); + + /// Move constructor. It transfers ownership of the internal RooAbsData object. + RooAbsDataFiller(RooAbsDataFiller &&other) : _events{std::move(other._events)}, _eventSize{other._eventSize} {} + + /// Copy is discouraged. + /// Use `rdataframe.Book<...>(std::move(absDataHelper), ...)` instead. + RooAbsDataFiller(const RooAbsDataFiller &) = delete; + /// RDataFrame interface method. + void Initialize(); + /// RDataFrame interface method. No tasks. + void InitTask(TTreeReader *, unsigned int) {} + /// RDataFrame interface method. + std::string GetActionName() { return "RooDataSetHelper"; } + + void ExecImpl(std::size_t nValues, std::vector& vector); + void Finalize(); + + virtual RooAbsData &GetAbsData() = 0; + +protected: + void FillAbsData(const std::vector &events, unsigned int eventSize); + + std::mutex _mutexDataset; + std::size_t _numInvalid = 0; + + std::vector> _events; // One vector of values per data-processing slot + std::size_t _eventSize; // Number of variables in dataset + std::size_t _nValues; // Number of variables in dataframe + + bool _isWeighted = false; + bool _isDataHist = false; +}; + +} // namespace Detail +} // namespace RooFit + + +#endif diff --git a/roofit/roofitcore/src/RooAbsDataFiller.cxx b/roofit/roofitcore/src/RooAbsDataFiller.cxx new file mode 100644 index 0000000000000..d32fc1e0f8bc6 --- /dev/null +++ b/roofit/roofitcore/src/RooAbsDataFiller.cxx @@ -0,0 +1,138 @@ +/* + * Project: RooFit + * Authors: + * Stephan Hageboeck, CERN 2021 + * + * Copyright (c) 2024, CERN + * + * Redistribution and use in source and binary forms, + * with or without modification, are permitted according to the terms + * listed in LICENSE (http://roofit.sourceforge.net/license.txt) + */ + +#include + +#include + +#include + +#include + +namespace RooFit { +namespace Detail { + +RooAbsDataFiller::RooAbsDataFiller() +{ + const auto nSlots = ROOT::IsImplicitMTEnabled() ? ROOT::GetThreadPoolSize() : 1; + _events.resize(nSlots); +} + +void RooAbsDataFiller::Initialize() +{ + RooAbsData &absData = GetAbsData(); + + _eventSize = absData.get()->size(); + _isWeighted = absData.isWeighted(); + _isDataHist = std::string{absData.ClassName()} != "RooDataSet"; +} + +/// Append all `events` to the internal RooDataSet or increment the bins of a RooDataHist at the given locations. +/// +/// \param events Events to fill into `data`. The layout is assumed to be `(x, y, z, ...) (x, y, z, ...), (...)`. +/// \note The order of the variables inside `events` must be consistent with the order given in the constructor. +/// No matching by name is performed. +/// \param eventSize Size of a single event. +void RooAbsDataFiller::FillAbsData(const std::vector &events, unsigned int eventSize) +{ + if (events.empty()) + return; + + RooAbsData &absData = GetAbsData(); + const RooArgSet &argSet = *absData.get(); + + // Relevant for weighted RooDataSet + RooRealVar *weightVar = !_isDataHist && _isWeighted ? static_cast(absData).weightVar() : nullptr; + + for (std::size_t i = 0; i < events.size(); i += eventSize) { + + // The RooDataHist has no dedicated RooRealVar for the weight. So we just + // use a double. + double weightVal = 1.0; + + // Creating a RooDataSet from an RDataFrame should be consistent with the + // creation from a TTree. The construction from a TTree discards entries + // outside the variable definition range, so we have to do that too (see + // also RooTreeDataStore::loadValues). + + bool allOK = true; + for (std::size_t j = 0; j < eventSize; ++j) { + RooAbsRealLValue *destArg = nullptr; + if (j < argSet.size()) { + destArg = static_cast(argSet[j]); + } else { + destArg = weightVar; + } + double sourceVal = events[i + j]; + + if (destArg && !destArg->inRange(sourceVal, nullptr)) { + _numInvalid++; + allOK = false; + const auto prefix = std::string(absData.ClassName()) + "Helper::FillAbsData(" + absData.GetName() + ") "; + if (_numInvalid < 5) { + // Unlike in the TreeVectorStore case, we don't log the event + // number here because we don't know it anyway, because of + // RDataFrame slots and multithreading. + oocoutI(nullptr, DataHandling) << prefix << "Skipping event because " << destArg->GetName() + << " cannot accommodate the value " << sourceVal << "\n"; + } else if (_numInvalid == 5) { + oocoutI(nullptr, DataHandling) << prefix << "Skipping ...\n"; + } + break; + } + if (destArg) { + destArg->setVal(sourceVal); + } else { + weightVal = sourceVal; + } + } + if (allOK) { + absData.add(argSet, weightVar ? weightVar->getVal() : weightVal); + } + } +} + +/// Empty all buffers into the dataset/hist to finish processing. +void RooAbsDataFiller::Finalize() +{ + RooAbsData &absData = GetAbsData(); + + for (auto &vector : _events) { + FillAbsData(vector, _nValues); + vector.clear(); + } + + if (_numInvalid > 0) { + const auto prefix = std::string(absData.ClassName()) + "Helper::Finalize(" + absData.GetName() + ") "; + oocoutW(nullptr, DataHandling) << prefix << "Ignored " << _numInvalid << " out-of-range events\n"; + } +} + +void RooAbsDataFiller::ExecImpl(std::size_t nValues, std::vector &vector) +{ + if (nValues != _eventSize && !(_isWeighted && nValues == _eventSize + 1)) { + throw std::invalid_argument(std::string("RooAbsData can hold ") + std::to_string(_eventSize) + + " variables per event (plus an optional weight in case of weighted data), but RDataFrame passed " + + std::to_string(nValues) + " columns."); + } + + _nValues = nValues; + + if (vector.size() > 1024 && _mutexDataset.try_lock()) { + const std::lock_guard guard(_mutexDataset, std::adopt_lock_t()); + FillAbsData(vector, _nValues); + vector.clear(); + } +} + +} // namespace Detail +} // namespace RooFit