diff --git a/roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h b/roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h index a9a8530ba6871..237410a1e0d3b 100644 --- a/roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h +++ b/roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -71,6 +72,7 @@ class RooAbsDataHelper : public ROOT::Detail::RDF::RActionImpl _dataset; std::mutex _mutex_dataset; + std::size_t _numInvalid = 0; std::vector> _events; // One vector of values per data-processing slot const std::size_t _eventSize; // Number of variables in dataset @@ -146,6 +148,11 @@ class RooAbsDataHelper : public ROOT::Detail::RDF::RActionImpl0) { + const auto prefix = std::string(_dataset->ClassName()) + "Helper::Finalize(" + _dataset->GetName() + ") "; + oocoutW(nullptr, DataHandling) << prefix << "Ignored " << _numInvalid << " out-of-range events\n"; + } } @@ -163,10 +170,37 @@ class RooAbsDataHelper : public ROOT::Detail::RDF::RActionImplget(); for (std::size_t i = 0; i < events.size(); i += eventSize) { + + // Creating a RooDataSet from an RDataFrame should be consistent with the + // creation from a TTree. The construction from a TTree discards entries + // outside the variable definition range, so we have to do that too (see + // also RooTreeDataStore::loadValues). + + bool allOK = true; for (std::size_t j=0; j < eventSize; ++j) { - static_cast(argSet[j])->setVal(events[i+j]); + auto * destArg = static_cast(argSet[j]); + double sourceVal = events[i+j]; + + if (!destArg->inRange(sourceVal, nullptr)) { + _numInvalid++ ; + allOK = false; + const auto prefix = std::string(_dataset->ClassName()) + "Helper::FillDataSet(" + _dataset->GetName() + ") "; + if (_numInvalid < 5) { + // Unlike in the TreeVectorStore case, we don't log the event + // number here because we don't know it anyway, because of + // RDataFrame slots and multithreading. + oocoutI(nullptr, DataHandling) << prefix << "Skipping event because " << destArg->GetName() + << " cannot accommodate the value " << sourceVal << "\n"; + } else if (_numInvalid == 5) { + oocoutI(nullptr, DataHandling) << prefix << "Skipping ...\n"; + } + break ; + } + destArg->setVal(sourceVal); + } + if(allOK) { + _dataset->add(argSet); } - _dataset->add(argSet); } } }; diff --git a/roofit/RDataFrameHelpers/test/testActionHelpers.cxx b/roofit/RDataFrameHelpers/test/testActionHelpers.cxx index fc4db76b78e5d..62153b5de583d 100644 --- a/roofit/RDataFrameHelpers/test/testActionHelpers.cxx +++ b/roofit/RDataFrameHelpers/test/testActionHelpers.cxx @@ -4,8 +4,12 @@ /// \author Stephan Hageboeck (CERN) #include +#include +#include #include +#include +#include #include #include "gtest/gtest.h" @@ -76,3 +80,50 @@ TEST(RooAbsDataHelper, MTConstruction) EXPECT_NEAR(rooDataHist->moment(y, 2.), 0.25, 1.E-2); // Variance is affected in a binned distribution } + +/// This test verifies that out-of-range events are correctly skipped, +/// consistent with the construction of a RooDataSet from a TTree. +TEST(RooAbsDataHelper, SkipEventsOutOfRange) { + + RooMsgService::instance().getStream(0).removeTopic(RooFit::DataHandling); + RooMsgService::instance().getStream(1).removeTopic(RooFit::DataHandling); + + std::size_t nEvents = 100; + const char * filename = "testRooAbsDataHelper_SkipEventsOutOfRange_tree.root"; + const char * treename = "tree"; + + { + // Create the ROOT file with the dataset + ROOT::RDataFrame rdf(nEvents); + auto rdf_x = rdf.Define("x", [](){ return gRandom->Gaus(0.0, 1.0); }); + rdf_x.Snapshot(treename, filename); + // We can't reuse the same RDataFrame now, because when we rerun the event + // loop it would generate new random values. So this scope ends here and we + // open a new RDF from the file later. + } + + // Open dataset with RDataFrame and TTree + std::unique_ptr file{TFile::Open(filename, "READ")}; + auto tree = file->Get(treename); + ROOT::RDataFrame rdf(treename, filename); + + // Create RooFit variable + RooRealVar x{"x", "x", 0.0, -2.0, 2.0}; + + // Create a RooDataset from the TTree, and one from the RDataFrame + RooDataSet dataSetTree{"dataSetTree", "dataSetTree", tree, x}; + auto dataSetRDF = rdf.Book(RooDataSetHelper("dataSetRDF", "dataSetRDF", RooArgSet(x)), {"x"}); + + // Check if in the creation of the datasets, the entries outside the + // variable range were sucessfully discarded. + double nPassing = *rdf.Filter("x >= -2 && x <= 2.0").Count(); + + EXPECT_EQ(dataSetRDF->numEntries(), nPassing); + EXPECT_EQ(dataSetTree.numEntries(), nPassing); + + file.reset(); // Close file + gSystem->Unlink(filename); // delete temporary file + + RooMsgService::instance().getStream(0).addTopic(RooFit::DataHandling); + RooMsgService::instance().getStream(1).addTopic(RooFit::DataHandling); +}