Skip to content

Commit

Permalink
[RF] Support weighted filling from RDF to RooFit in RooAbsDataHelper
Browse files Browse the repository at this point in the history
Support weighted filling from RDF to RooFit in RooAbsDataHelper and also
implement a test for it.

Closes #7223.
  • Loading branch information
guitargeek committed Jul 23, 2024
1 parent 455abc5 commit 3cc28dc
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 107 deletions.
4 changes: 3 additions & 1 deletion roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
/// - Book the helper as an RDataFrame action. Here, the RDataFrame column types have to be passed as template
/// parameters.
/// - Pass the column names to the Book action. These are matched by position to the variables of the dataset.
/// If there is one more column name than variables in the dataset, the last columns values will be used as weights.
///
/// All arguments passed to are forwarded to RooDataSet::RooDataSet() / RooDataHist::RooDataHist().
///
Expand All @@ -42,7 +43,8 @@
/// RooDataSetHelper{"dataset", // Name (directly forwarded to RooDataSet::RooDataSet())
/// "Title of dataset", // Title ( ~ " ~ )
/// RooArgSet(x, y) }, // Variables to create in dataset
/// {"x", "y"} // Column names from RDataFrame
/// {"x", "y", "weight"} // Column names from RDataFrame
/// // (this example uses an additional column for the weight)
/// );
///
/// ```
Expand Down
245 changes: 153 additions & 92 deletions roofit/RDataFrameHelpers/test/testActionHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -14,116 +14,177 @@

#include "gtest/gtest.h"

TEST(RooAbsDataHelper, MTConstruction)
namespace {

constexpr std::size_t nEvent = 200000;

constexpr double targetXMean = 0.;
constexpr double targetYMean = 1.;
constexpr double targetXVar = 100. / 12.;
constexpr double targetYVar = 4. / 12.;

auto makeDataFrame()
{
#ifdef R__USE_IMT
ROOT::EnableImplicitMT(4);
ROOT::EnableImplicitMT(4);
#endif

// We create an RDataFrame with two columns filled with 2 million random numbers.
constexpr std::size_t nEvent = 200000;
ROOT::RDataFrame d(nEvent);
auto dd =
d.DefineSlot("x", [=](unsigned int /*slot*/, ULong64_t entry) { return -5. + 10. * ((double)entry) / nEvent; },
return ROOT::RDataFrame{nEvent}
.DefineSlot("x", [=](unsigned int /*slot*/, ULong64_t entry) { return -5. + 10. * ((double)entry) / nEvent; },
{"rdfentry_"})
.DefineSlot("y", [=](unsigned int /*slot*/, ULong64_t entry) { return 0. + 2. * ((double)entry) / nEvent; },
{"rdfentry_"});
auto meanX = dd.Mean("x");
auto meanY = dd.Mean("y");

constexpr double targetXMean = 0.;
constexpr double targetYMean = 1.;
constexpr double targetXVar = 100./12.;
constexpr double targetYVar = 4./12.;

// We create RooFit variables that will represent the dataset.
RooRealVar x("x", "x", -5., 5.);
RooRealVar y("y", "y", -50., 50.);
x.setBins(10);
y.setBins(100);


auto rooDataSet = dd.Book<double, double>(
RooDataSetHelper("dataset", // Name
"Title of dataset", // Title
RooArgSet(x, y) // Variables in this dataset
),
{"x", "y"} // Column names in RDataFrame.
);

RooDataHistHelper rdhMaker{"datahist", // Name
"Title of data hist", // Title
RooArgSet(x, y) // Variables in this dataset
};

// Then, we move it into the RDataFrame action:
auto rooDataHist = dd.Book<double, double>(std::move(rdhMaker), {"x", "y"});



// Run it and inspect the results
// -------------------------------
EXPECT_NEAR(meanX.GetValue(), targetXMean, 1.E-4);
EXPECT_NEAR(meanY.GetValue(), targetYMean, 1.E-4);

ASSERT_EQ(rooDataSet->numEntries(), nEvent);
EXPECT_NEAR(rooDataSet->sumEntries(), nEvent, nEvent * 1.E-9);
EXPECT_NEAR(rooDataSet->mean(x), targetXMean, 1.E-4);
EXPECT_NEAR(rooDataSet->moment(x, 2.), targetXVar, targetXVar * 1.E-4);
EXPECT_NEAR(rooDataSet->mean(y), targetYMean, 1.E-4);
EXPECT_NEAR(rooDataSet->moment(y, 2.), targetYVar, targetYVar * 1.E-4);

EXPECT_NEAR(rooDataHist->sumEntries(), nEvent, nEvent * 1.E-9);
EXPECT_NEAR(rooDataHist->mean(x), targetXMean, 1.E-4);
EXPECT_NEAR(rooDataHist->moment(x, 2.), 8.25, 1.E-2); // Variance is affected in a binned distribution
EXPECT_NEAR(rooDataHist->mean(y), targetYMean, 1.E-4);
EXPECT_NEAR(rooDataHist->moment(y, 2.), 0.25, 1.E-2); // Variance is affected in a binned distribution
.DefineSlot("y", [=](unsigned int /*slot*/, ULong64_t entry) { return 0. + 2. * ((double)entry) / nEvent; },
{"rdfentry_"})
.DefineSlot("w", [=](unsigned int /*slot*/, ULong64_t /*entry*/) { return 0.5; }, {"rdfentry_"});
}

RooArgSet makeVariablesSet()
{
// We create RooFit variables that will represent the dataset.
auto x = std::make_unique<RooRealVar>("x", "x", -5., 5.);
auto y = std::make_unique<RooRealVar>("y", "y", -50., 50.);
x->setBins(10);
y->setBins(100);

RooArgSet out;
out.addOwned(std::move(x));
out.addOwned(std::move(y));
return out;
}

} // namespace

TEST(RooAbsDataHelper, RooDataSet)
{
auto dd = makeDataFrame();
RooArgSet vars = makeVariablesSet();
RooRealVar &x = static_cast<RooRealVar &>(vars["x"]);
RooRealVar &y = static_cast<RooRealVar &>(vars["y"]);

RooDataSetHelper rdsMaker{"dataset", "Title of dataset", vars};

auto rooDataSet = dd.Book<double, double>(std::move(rdsMaker), {"x", "y"});

double sumEntries = nEvent;

ASSERT_EQ(rooDataSet->numEntries(), nEvent);
EXPECT_NEAR(rooDataSet->sumEntries(), sumEntries, sumEntries * 1.E-9);
EXPECT_NEAR(rooDataSet->mean(x), targetXMean, 1.E-4);
EXPECT_NEAR(rooDataSet->moment(x, 2.), targetXVar, targetXVar * 1.E-4);
EXPECT_NEAR(rooDataSet->mean(y), targetYMean, 1.E-4);
EXPECT_NEAR(rooDataSet->moment(y, 2.), targetYVar, targetYVar * 1.E-4);
}

TEST(RooAbsDataHelper, RooDataSetWeighted)
{
auto dd = makeDataFrame();
RooArgSet vars = makeVariablesSet();
RooRealVar &x = static_cast<RooRealVar &>(vars["x"]);
RooRealVar &y = static_cast<RooRealVar &>(vars["y"]);

RooDataSetHelper rdsMaker{"dataset", "Title of dataset", vars, RooFit::WeightVar("w")};

auto rooDataSet = dd.Book<double, double, double>(std::move(rdsMaker), {"x", "y", "w"});

auto sumEntriesResult = dd.Sum("w");

double sumEntries = sumEntriesResult.GetValue();

ASSERT_EQ(rooDataSet->numEntries(), nEvent);
EXPECT_NEAR(rooDataSet->sumEntries(), sumEntries, sumEntries * 1.E-9);
EXPECT_NEAR(rooDataSet->mean(x), targetXMean, 1.E-4);
EXPECT_NEAR(rooDataSet->moment(x, 2.), targetXVar, targetXVar * 1.E-4);
EXPECT_NEAR(rooDataSet->mean(y), targetYMean, 1.E-4);
EXPECT_NEAR(rooDataSet->moment(y, 2.), targetYVar, targetYVar * 1.E-4);
}

TEST(RooAbsDataHelper, RooDataHist)
{
auto dd = makeDataFrame();
RooArgSet vars = makeVariablesSet();
RooRealVar &x = static_cast<RooRealVar &>(vars["x"]);
RooRealVar &y = static_cast<RooRealVar &>(vars["y"]);

RooDataHistHelper rdhMaker{"datahist", "Title of data hist", vars};

// Then, we move it into the RDataFrame action:
auto rooDataHist = dd.Book<double, double>(std::move(rdhMaker), {"x", "y"});

double sumEntries = nEvent;

EXPECT_NEAR(rooDataHist->sumEntries(), sumEntries, sumEntries * 1.E-9);
EXPECT_NEAR(rooDataHist->mean(x), targetXMean, 1.E-4);
EXPECT_NEAR(rooDataHist->moment(x, 2.), 8.25, 1.E-2); // Variance is affected in a binned distribution
EXPECT_NEAR(rooDataHist->mean(y), targetYMean, 1.E-4);
EXPECT_NEAR(rooDataHist->moment(y, 2.), 0.25, 1.E-2); // Variance is affected in a binned distribution
}

TEST(RooAbsDataHelper, RooDataHistWeighted)
{
auto dd = makeDataFrame();
RooArgSet vars = makeVariablesSet();
RooRealVar &x = static_cast<RooRealVar &>(vars["x"]);
RooRealVar &y = static_cast<RooRealVar &>(vars["y"]);

RooDataHistHelper rdhMaker{"datahist", "Title of data hist", vars};

// Then, we move it into the RDataFrame action:
auto rooDataHist = dd.Book<double, double, double>(std::move(rdhMaker), {"x", "y", "w"});

auto sumEntriesResult = dd.Sum("w");

double sumEntries = sumEntriesResult.GetValue();

EXPECT_NEAR(rooDataHist->sumEntries(), sumEntries, sumEntries * 1.E-9);
EXPECT_NEAR(rooDataHist->mean(x), targetXMean, 1.E-4);
EXPECT_NEAR(rooDataHist->moment(x, 2.), 8.25, 1.E-2); // Variance is affected in a binned distribution
EXPECT_NEAR(rooDataHist->mean(y), targetYMean, 1.E-4);
EXPECT_NEAR(rooDataHist->moment(y, 2.), 0.25, 1.E-2); // Variance is affected in a binned distribution
}

/// This test verifies that out-of-range events are correctly skipped,
/// consistent with the construction of a RooDataSet from a TTree.
TEST(RooAbsDataHelper, SkipEventsOutOfRange) {
TEST(RooAbsDataHelper, SkipEventsOutOfRange)
{

RooMsgService::instance().getStream(0).removeTopic(RooFit::DataHandling);
RooMsgService::instance().getStream(1).removeTopic(RooFit::DataHandling);
RooMsgService::instance().getStream(0).removeTopic(RooFit::DataHandling);
RooMsgService::instance().getStream(1).removeTopic(RooFit::DataHandling);

std::size_t nEvents = 100;
const char * filename = "testRooAbsDataHelper_SkipEventsOutOfRange_tree.root";
const char * treename = "tree";
std::size_t nEvents = 100;
const char *filename = "testRooAbsDataHelper_SkipEventsOutOfRange_tree.root";
const char *treename = "tree";

{
// Create the ROOT file with the dataset
ROOT::RDataFrame rdf(nEvents);
auto rdf_x = rdf.Define("x", [](){ return gRandom->Gaus(0.0, 1.0); });
rdf_x.Snapshot(treename, filename);
// We can't reuse the same RDataFrame now, because when we rerun the event
// loop it would generate new random values. So this scope ends here and we
// open a new RDF from the file later.
}
{
// Create the ROOT file with the dataset
ROOT::RDataFrame rdf(nEvents);
auto rdf_x = rdf.Define("x", []() { return gRandom->Gaus(0.0, 1.0); });
rdf_x.Snapshot(treename, filename);
// We can't reuse the same RDataFrame now, because when we rerun the event
// loop it would generate new random values. So this scope ends here and we
// open a new RDF from the file later.
}

// Open dataset with RDataFrame and TTree
std::unique_ptr<TFile> file{TFile::Open(filename, "READ")};
auto tree = file->Get<TTree>(treename);
ROOT::RDataFrame rdf(treename, filename);
// Open dataset with RDataFrame and TTree
std::unique_ptr<TFile> file{TFile::Open(filename, "READ")};
auto tree = file->Get<TTree>(treename);
ROOT::RDataFrame rdf(treename, filename);

// Create RooFit variable
RooRealVar x{"x", "x", 0.0, -2.0, 2.0};
// Create RooFit variable
RooRealVar x{"x", "x", 0.0, -2.0, 2.0};

// Create a RooDataset from the TTree, and one from the RDataFrame
RooDataSet dataSetTree{"dataSetTree", "dataSetTree", x, RooFit::Import(*tree)};
auto dataSetRDF = rdf.Book<double>(RooDataSetHelper("dataSetRDF", "dataSetRDF", RooArgSet(x)), {"x"});
// Create a RooDataset from the TTree, and one from the RDataFrame
RooDataSet dataSetTree{"dataSetTree", "dataSetTree", x, RooFit::Import(*tree)};
auto dataSetRDF = rdf.Book<double>(RooDataSetHelper("dataSetRDF", "dataSetRDF", RooArgSet(x)), {"x"});

// Check if in the creation of the datasets, the entries outside the
// variable range were successfully discarded.
double nPassing = *rdf.Filter("x >= -2 && x <= 2.0").Count();
// Check if in the creation of the datasets, the entries outside the
// variable range were successfully discarded.
double nPassing = *rdf.Filter("x >= -2 && x <= 2.0").Count();

EXPECT_EQ(dataSetRDF->numEntries(), nPassing);
EXPECT_EQ(dataSetTree.numEntries(), nPassing);
EXPECT_EQ(dataSetRDF->numEntries(), nPassing);
EXPECT_EQ(dataSetTree.numEntries(), nPassing);

file.reset(); // Close file
gSystem->Unlink(filename); // delete temporary file
file.reset(); // Close file
gSystem->Unlink(filename); // delete temporary file

RooMsgService::instance().getStream(0).addTopic(RooFit::DataHandling);
RooMsgService::instance().getStream(1).addTopic(RooFit::DataHandling);
RooMsgService::instance().getStream(0).addTopic(RooFit::DataHandling);
RooMsgService::instance().getStream(1).addTopic(RooFit::DataHandling);
}
6 changes: 5 additions & 1 deletion roofit/roofitcore/inc/RooAbsDataFiller.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,15 @@ class RooAbsDataFiller {
protected:
void FillAbsData(const std::vector<double> &events, unsigned int eventSize);

std::mutex _mutex_dataset;
std::mutex _mutexDataset;
std::size_t _numInvalid = 0;

std::vector<std::vector<double>> _events; // One vector of values per data-processing slot
std::size_t _eventSize; // Number of variables in dataset
std::size_t _nValues; // Number of variables in dataframe

bool _isWeighted = false;
bool _isDataHist = false;
};

} // namespace Detail
Expand Down
Loading

0 comments on commit 3cc28dc

Please sign in to comment.