Skip to content

Commit

Permalink
[RF] Add RDataFrame action helper for creating RooFit datasets.
Browse files Browse the repository at this point in the history
To facilitate the creation of RooFit datasets from RDataFrame, an
ActionHelper is added to RooFit.
It lives in its own micro library, since it depends on RDataFrame, which
RooFit does not.

Fix root-project#7223.

Co-authored by Enrico Guiraud.
  • Loading branch information
hageboeck committed Mar 4, 2021
1 parent 0273582 commit 804516c
Show file tree
Hide file tree
Showing 4 changed files with 215 additions and 0 deletions.
2 changes: 2 additions & 0 deletions roofit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ add_subdirectory(roostats)
if(xml)
add_subdirectory(histfactory)
endif()
add_subdirectory(RDataFrameHelpers)

28 changes: 28 additions & 0 deletions roofit/RDataFrameHelpers/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright (C) 1995-2021, Rene Brun and Fons Rademakers.
# All rights reserved.
#
# For the licensing terms see $ROOTSYS/LICENSE.
# For the list of contributors see $ROOTSYS/README/CREDITS.

############################################################################
# CMakeLists.txt for registering a RooFit RDataFrameHelper
############################################################################

# This library enables compatibility of RooFit and RDataFrame.
# Since these two packages don't know about each other, we connect them here.

if(NOT dataframe)
return()
endif()

ROOT_STANDARD_LIBRARY_PACKAGE(RooFitRDataFrameHelpers
HEADERS
RooAbsDataHelper.h
NO_SOURCES
LINKDEF
LinkDef.h
DEPENDENCIES
ROOTDataFrame
RooFitCore
)

6 changes: 6 additions & 0 deletions roofit/RDataFrameHelpers/inc/LinkDef.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifdef __CINT__

#pragma link off all globals;
#pragma link off all classes;

#endif
179 changes: 179 additions & 0 deletions roofit/RDataFrameHelpers/inc/RooAbsDataHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*****************************************************************************
* Project: RooFit *
* Package: RooFitCore *
* Authors: *
* WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
* DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
* *
* Copyright (c) 2000-2021, Regents of the University of California *
* and Stanford University. All rights reserved. *
* *
* Redistribution and use in source and binary forms, *
* with or without modification, are permitted according to the terms *
* listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
*****************************************************************************/
/// Create RooDataSet/RooDataHist from RDataFrame.
/// \date Mar 2021
/// \author Stephan Hageboeck (CERN)
#ifndef ROOABSDATAHELPER
#define ROOABSDATAHELPER

#include <RooRealVar.h>
#include <RooArgSet.h>
#include <RooDataSet.h>
#include <RooDataHist.h>

#include <ROOT/RDataFrame.hxx>
#include <ROOT/RDF/ActionHelpers.hxx>
#include <TROOT.h>

#include <vector>
#include <mutex>
#include <memory>
#include <cstddef>
#include <string>
#include <stdexcept>

class TTreeReader;

/// This is a helper for an RDataFrame action, which fills RooFit data classes.
///
/// \tparam DataSet_t Either RooDataSet or RooDataHist.
///
/// To construct RooDataSet / RooDataHist within RDataFrame
/// - Construct one of the two action helpers RooDataSetHelper or RooDataHistHelper. Pass constructor arguments
/// to RooAbsDataHelper::RooAbsDataHelper() as for the original classes.
/// The arguments are forwarded to the actual data classes without any changes.
/// - Book the helper as an RDataFrame action. Here, the RDataFrame column types have to be passed as template parameters.
/// - Pass the column names to the Book action. These are matched by position to the variables of the dataset.
///
/// All arguments passed to are forwarded to RooDataSet::RooDataSet() / RooDataHist::RooDataHist().
///
/// #### Usage example:
/// ```
/// RooRealVar x("x", "x", -5., 5.);
/// RooRealVar y("y", "y", -50., 50.);
/// auto myDataSet = rdataframe.Book<double, double>(
/// RooDataSetHelper{"dataset", // Name (directly forwarded to RooDataSet::RooDataSet())
/// "Title of dataset", // Title ( ~ " ~ )
/// RooArgSet(x, y) }, // Variables to create in dataset
/// {"x", "y"} // Column names from RDataFrame
/// );
///
/// ```
/// \warning Variables in the dataset and columns in RDataFrame are **matched by position, not by name**.
/// This enables the easy exchanging of columns that should be filled into the dataset.
template<class DataSet_t>
class RooAbsDataHelper : public ROOT::Detail::RDF::RActionImpl<RooAbsDataHelper<DataSet_t>> {
public:
using Result_t = DataSet_t;

private:
std::shared_ptr<DataSet_t> _dataset;
std::mutex _mutex_dataset;

std::vector<std::vector<double>> _events; // One vector of values per data-processing slot
const std::size_t _eventSize; // Number of variables in dataset

public:

/// Construct a helper to create RooDataSet/RooDataHist.
/// \tparam Args_t Parameter pack of arguments.
/// \param args Constructor arguments for RooDataSet::RooDataSet() or RooDataHist::RooDataHist().
/// All arguments will be forwarded as they are.
template<typename... Args_t>
RooAbsDataHelper(Args_t&&... args) :
_dataset{ new DataSet_t(std::forward<Args_t>(args)...) },
_eventSize{ _dataset->get()->size() }
{
const auto nSlots = ROOT::IsImplicitMTEnabled() ? ROOT::GetThreadPoolSize() : 1;
_events.resize(nSlots);
}


/// Move constructor. It transfers ownership of the internal RooAbsData object.
RooAbsDataHelper(RooAbsDataHelper&& other) :
_dataset{ std::move(other._dataset) },
_mutex_dataset(),
_events{ std::move(other._events) },
_eventSize{ other._eventSize }
{

}

/// Copy is discouraged.
/// Use `rdataframe.Book<...>(std::move(absDataHelper), ...)` instead.
RooAbsDataHelper(const RooAbsDataHelper&) = delete;
/// Return internal dataset/hist.
std::shared_ptr<DataSet_t> GetResultPtr() const { return _dataset; }
/// RDataFrame interface method. Nothing has to be initialised.
void Initialize() {}
/// RDataFrame interface method. No tasks.
void InitTask(TTreeReader *, unsigned int) {}
/// RDataFrame interface method.
std::string GetActionName() { return "RooDataSetHelper"; }

/// Method that RDataFrame calls to pass a new event.
///
/// \param slot When IMT is used, this is a number in the range [0, nSlots) to fill lock free.
/// \param values x, y, z, ... coordinates of the event.
template <typename... ColumnTypes>
void Exec(unsigned int slot, ColumnTypes... values)
{
if (sizeof...(values) != _eventSize) {
throw std::invalid_argument(std::string("RooDataSet can hold ")
+ std::to_string(_eventSize)
+ " variables per event, but RDataFrame passed "
+ std::to_string(sizeof...(values))
+ " columns.");
}

auto& vector = _events[slot];
for (auto&& val : {values...}) {
vector.push_back(val);
}

if (vector.size() > 1024 && _mutex_dataset.try_lock()) {
const std::lock_guard<std::mutex> guard(_mutex_dataset, std::adopt_lock_t());
FillDataSet(vector, _eventSize);
vector.clear();
}
}

/// Empty all buffers into the dataset/hist to finish processing.
void Finalize() {
for (auto& vector : _events) {
FillDataSet(vector, _eventSize);
vector.clear();
}
}


private:
/// Append all `events` to the internal RooDataSet or increment the bins of a RooDataHist at the given locations.
///
/// \param events Events to fill into `data`. The layout is assumed to be `(x, y, z, ...) (x, y, z, ...), (...)`.
/// \note The order of the variables inside `events` must be consistent with the order given in the constructor.
/// No matching by name is performed.
/// \param eventSize Size of a single event.
void FillDataSet(const std::vector<double>& events, unsigned int eventSize) {
if (events.size() == 0)
return;

const RooArgSet& argSet = *_dataset->get();

for (std::size_t i = 0; i < events.size(); i += eventSize) {
for (std::size_t j=0; j < eventSize; ++j) {
static_cast<RooAbsRealLValue*>(argSet[j])->setVal(events[i+j]);
}
_dataset->add(argSet);
}
}
};

/// Helper for creating a RooDataSet inside RDataFrame. \see RooAbsDataHelper
using RooDataSetHelper = RooAbsDataHelper<RooDataSet>;
/// Helper for creating a RooDataHist inside RDataFrame. \see RooAbsDataHelper
using RooDataHistHelper = RooAbsDataHelper<RooDataHist>;

#endif

0 comments on commit 804516c

Please sign in to comment.