Skip to content

Commit

Permalink
[RF] Consistent treatment of empty datasets with new CPU backend
Browse files Browse the repository at this point in the history
As we discovered in a CMSSW ROOT `master` sync PR, the new RooFit CPU
backend treats empty datasets differently from the legacy NLL evaluation
backend:
cms-sw/cmsdist#9025

This commit is fixing this, in particular removing the assumption that
datasets used for fits with the new CPU backend are never empty.

A unit test that validates the behavior for empty data objects is also
added.
  • Loading branch information
guitargeek committed Feb 26, 2024
1 parent 50ed2ee commit bef99a8
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 60 deletions.
16 changes: 13 additions & 3 deletions roofit/batchcompute/src/RooBatchCompute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,19 @@ void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size
batches._output = output;
}

void fillArrays(Batch *arrays, const VarVector &vars, double *buffer, double *bufferDevice)
void fillArrays(Batch *arrays, const VarVector &vars, double *buffer, double *bufferDevice, std::size_t nEvents)
{
for (int i = 0; i < vars.size(); i++) {
const std::span<const double> &span = vars[i];
if (span.size() == 1) {
if (!span.empty() && span.size() < nEvents) {
// In the scalar case, the value is not on the GPU yet, so we have to
// copy the value to the GPU buffer.
buffer[i] = span[0];
arrays[i].set(bufferDevice + i, false);
} else {
// In the vector input cases, they are already on the GPU, so we can
// fill be buffer with some dummy value and set the input span
// directly.
buffer[i] = 0.0;
arrays[i].set(span.data(), true);
}
Expand Down Expand Up @@ -132,7 +137,7 @@ public:
auto extraArgsDevice = reinterpret_cast<double *>(scalarBufferDevice + vars.size());

fillBatches(*batches, output, nEvents, vars.size(), extraArgs.size());
fillArrays(arrays, vars, scalarBuffer, scalarBufferDevice);
fillArrays(arrays, vars, scalarBuffer, scalarBufferDevice, nEvents);
batches->_arrays = arraysDevice;

if (!extraArgs.empty()) {
Expand Down Expand Up @@ -255,6 +260,8 @@ __global__ void nllSumKernel(const double *__restrict__ probas, const double *__

double RooBatchComputeClass::reduceSum(RooBatchCompute::Config const &cfg, InputArr input, size_t n)
{
if (n == 0)
return 0.0;
const int gridSize = getGridSize(n);
cudaStream_t stream = *cfg.cudaStream();
CudaInterface::DeviceArray<double> devOut(2 * gridSize);
Expand All @@ -270,6 +277,9 @@ ReduceNLLOutput RooBatchComputeClass::reduceNLL(RooBatchCompute::Config const &c
std::span<const double> weights, std::span<const double> offsetProbas)
{
ReduceNLLOutput out;
if (probas.empty()) {
return out;
}
const int gridSize = getGridSize(probas.size());
CudaInterface::DeviceArray<double> devOut(2 * gridSize);
cudaStream_t stream = *cfg.cudaStream();
Expand Down
18 changes: 8 additions & 10 deletions roofit/batchcompute/src/RooBatchCompute.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,19 @@ void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size
batches._output = output;
}

void fillArrays(std::vector<Batch> &arrays, const VarVector &vars, double *buffer)
void fillArrays(std::vector<Batch> &arrays, const VarVector &vars, double *buffer, std::size_t nEvents)
{

arrays.resize(vars.size());
for (size_t i = 0; i < vars.size(); i++) {
const std::span<const double> &span = vars[i];
if (span.empty()) {
std::stringstream ss;
ss << "The span number " << i << " passed to Batches::Batches() is empty!";
throw std::runtime_error(ss.str());
} else if (span.size() > 1) {
arrays[i].set(span.data(), true);
} else {
if (!span.empty() && span.size() < nEvents) {
// In the scalar case, copy the value to each element of vector input
// buffer.
std::fill_n(&buffer[i * bufferSize], bufferSize, span.data()[0]);
arrays[i].set(&buffer[i * bufferSize], false);
} else {
arrays[i].set(span.data(), true);
}
}
}
Expand Down Expand Up @@ -131,7 +129,7 @@ class RooBatchComputeClass : public RooBatchComputeInterface {
Batches batches;
std::vector<Batch> arrays;
fillBatches(batches, output, nEventsPerThread, vars.size(), extraArgs);
fillArrays(arrays, vars, buffer.data());
fillArrays(arrays, vars, buffer.data(), nEvents);
batches._arrays = arrays.data();
batches.advance(batches.getNEvents() * idx);

Expand Down Expand Up @@ -163,7 +161,7 @@ class RooBatchComputeClass : public RooBatchComputeInterface {
Batches batches;
std::vector<Batch> arrays;
fillBatches(batches, output, nEvents, vars.size(), extraArgs);
fillArrays(arrays, vars, buffer.data());
fillArrays(arrays, vars, buffer.data(), nEvents);
batches._arrays = arrays.data();

std::size_t events = batches.getNEvents();
Expand Down
3 changes: 1 addition & 2 deletions roofit/roofitcore/inc/RooFit/Detail/BatchModeDataHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ getDataSpans(RooAbsData const &data, std::string const &rangeName, RooSimultaneo
bool takeGlobalObservablesFromData, std::stack<std::vector<double>> &buffers);

std::map<RooFit::Detail::DataKey, std::size_t>
determineOutputSizes(RooAbsArg const &topNode,
std::function<std::size_t(RooFit::Detail::DataKey)> const &inputSizeFunc);
determineOutputSizes(RooAbsArg const &topNode, std::function<int(RooFit::Detail::DataKey)> const &inputSizeFunc);

} // namespace BatchModeDataHelpers
} // namespace Detail
Expand Down
23 changes: 13 additions & 10 deletions roofit/roofitcore/src/BatchModeDataHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ getSingleDataSpans(RooAbsData const &data, std::string_view rangeName, std::stri

std::size_t nEvents = static_cast<size_t>(data.numEntries());

// We also want to support empty datasets: in this case the
// RooFitDriver::Dataset is not filled with anything.
if (nEvents == 0) {
return dataSpans;
}

auto weight = data.getWeightBatch(0, nEvents, /*sumW2=*/false);
auto weightSumW2 = data.getWeightBatch(0, nEvents, /*sumW2=*/true);

Expand Down Expand Up @@ -274,16 +268,18 @@ RooFit::Detail::BatchModeDataHelpers::getDataSpans(RooAbsData const &data, std::
/// \param[in] topNode The top node of the computation graph.
/// \param[in] inputSizeFunc A function to get the input sizes.
std::map<RooFit::Detail::DataKey, std::size_t> RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(
RooAbsArg const &topNode, std::function<std::size_t(RooFit::Detail::DataKey)> const &inputSizeFunc)
RooAbsArg const &topNode, std::function<int(RooFit::Detail::DataKey)> const &inputSizeFunc)
{
std::map<RooFit::Detail::DataKey, std::size_t> output;

RooArgSet serverSet;
RooHelpers::getSortedComputationGraph(topNode, serverSet);

for (RooAbsArg *arg : serverSet) {
std::size_t inputSize = inputSizeFunc(arg);
if (inputSize > 0) {
int inputSize = inputSizeFunc(arg);
// The size == -1 encodes that the input doesn't come from an array
// input.
if (inputSize != -1) {
output[arg] = inputSize;
}
}
Expand All @@ -296,7 +292,14 @@ std::map<RooFit::Detail::DataKey, std::size_t> RooFit::Detail::BatchModeDataHelp
if (!arg->isReducerNode()) {
for (RooAbsArg *server : arg->servers()) {
if (server->isValueServer(*arg)) {
size = std::max(output.at(server), size);
std::size_t inputSize = output.at(server);
if (inputSize != 1) {
// If the input if from an external array, the output will
// adopt its size and we can stop the checking of other
// servers.
size = inputSize;
break;
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion roofit/roofitcore/src/RooDataSet.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,7 @@ std::span<const double> RooDataSet::getWeightBatch(std::size_t first, std::size_

std::size_t nEntries = this->numEntries(); // for the casting to std::size_t

if(first >= nEntries || (first + len) > nEntries) {
if(first + len > nEntries) {
throw std::runtime_error("RooDataSet::getWeightBatch(): requested range not valid for dataset.");
}

Expand Down
39 changes: 18 additions & 21 deletions roofit/roofitcore/src/RooFit/Evaluator.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -262,30 +262,27 @@ void Evaluator::setInput(std::string const &name, std::span<const double> inputA
info.outputSize = inputArray.size();
if (_useGPU) {
#ifdef ROOFIT_CUDA
if (info.outputSize == 1) {
// Scalar observables from the data don't need to be copied to the GPU
if (info.outputSize <= 1) {
// Empty or scalar observables from the data don't need to be
// copied to the GPU.
_dataMapCPU.set(info.absArg, inputArray);
_dataMapCUDA.set(info.absArg, inputArray);
} else {
if (_useGPU) {
// For simplicity, we put the data on both host and device for
// now. This could be optimized by inspecting the clients of the
// variable.
if (isOnDevice) {
_dataMapCUDA.set(info.absArg, inputArray);
auto gpuSpan = _dataMapCUDA.at(info.absArg);
info.buffer = _bufferManager->makeCpuBuffer(gpuSpan.size());
CudaInterface::copyDeviceToHost(gpuSpan.data(), info.buffer->cpuWritePtr(), gpuSpan.size());
_dataMapCPU.set(info.absArg, {info.buffer->cpuReadPtr(), gpuSpan.size()});
} else {
_dataMapCPU.set(info.absArg, inputArray);
auto cpuSpan = _dataMapCPU.at(info.absArg);
info.buffer = _bufferManager->makeGpuBuffer(cpuSpan.size());
CudaInterface::copyHostToDevice(cpuSpan.data(), info.buffer->gpuWritePtr(), cpuSpan.size());
_dataMapCUDA.set(info.absArg, {info.buffer->gpuReadPtr(), cpuSpan.size()});
}
// For simplicity, we put the data on both host and device for
// now. This could be optimized by inspecting the clients of the
// variable.
if (isOnDevice) {
_dataMapCUDA.set(info.absArg, inputArray);
auto gpuSpan = _dataMapCUDA.at(info.absArg);
info.buffer = _bufferManager->makeCpuBuffer(gpuSpan.size());
CudaInterface::copyDeviceToHost(gpuSpan.data(), info.buffer->cpuWritePtr(), gpuSpan.size());
_dataMapCPU.set(info.absArg, {info.buffer->cpuReadPtr(), gpuSpan.size()});
} else {
_dataMapCPU.set(info.absArg, inputArray);
auto cpuSpan = _dataMapCPU.at(info.absArg);
info.buffer = _bufferManager->makeGpuBuffer(cpuSpan.size());
CudaInterface::copyHostToDevice(cpuSpan.data(), info.buffer->gpuWritePtr(), cpuSpan.size());
_dataMapCUDA.set(info.absArg, {info.buffer->gpuReadPtr(), cpuSpan.size()});
}
}
#endif
Expand Down Expand Up @@ -313,9 +310,9 @@ void Evaluator::updateOutputSizes()
}

auto outputSizeMap =
RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(_topNode, [&](RooFit::Detail::DataKey key) {
RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(_topNode, [&](RooFit::Detail::DataKey key) -> int {
auto found = sizeMap.find(key);
return found != sizeMap.end() ? found->second : 0;
return found != sizeMap.end() ? found->second : -1;
});

for (auto &info : _nodes) {
Expand Down
6 changes: 3 additions & 3 deletions roofit/roofitcore/src/RooFuncWrapper.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ void RooFuncWrapper::loadParamsAndData(RooAbsArg const *head, RooArgSet const &p
_gradientVarBuffer.resize(_params.size());

if (head) {
_nodeOutputSizes =
RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(*head, [&spans](RooFit::Detail::DataKey key) {
_nodeOutputSizes = RooFit::Detail::BatchModeDataHelpers::determineOutputSizes(
*head, [&spans](RooFit::Detail::DataKey key) -> int {
auto found = spans.find(key);
return found != spans.end() ? found->second.size() : 0;
return found != spans.end() ? found->second.size() : -1;
});
}
}
Expand Down
73 changes: 63 additions & 10 deletions roofit/roofitcore/test/testTestStatistics.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include <RooSimultaneous.h>
#include <RooWorkspace.h>

#include <ROOT/TestSupport.hxx>

#include "gtest_wrapper.h"

#include <cmath>
Expand Down Expand Up @@ -269,6 +271,57 @@ TEST_P(TestStatisticTest, IntegrateBins_RooDataHist)
<< "Expect chi2/ndf at least 10% better.";
}

// Verify that fitting an empty RooDataSet or a RooDataHist with only empty
// bins does not do anything to the parameters. The point of this test is to
// validate that the new CPU backend behaves the same as the legacy evaluation
// backend for empty data objects.
TEST_P(TestStatisticTest, EmptyData)
{
RooWorkspace ws;
ws.factory("Gaussian::model(x[0, 10], mean[6, 0, 10], sigma[2.0, 0.01, 10.0])");

RooRealVar &x = *ws.var("x");
RooRealVar &mean = *ws.var("mean");
RooRealVar &sigma = *ws.var("sigma");
RooAbsPdf &model = *ws.pdf("model");

const double meanOrigVal = mean.getVal();
const double sigmaOrigVal = sigma.getVal();

std::unique_ptr<RooDataSet> data{model.generate(x, 0)};
std::unique_ptr<RooDataHist> dataHist{data->binnedClone()};

{
// We expect errors in the Hessian calculation because the likelihood is
// constant.
ROOT::TestSupport::CheckDiagsRAII checkDiag;
checkDiag.requiredDiag(kWarning, "ROOT::Math::Fitter::CalculateHessErrors", "Error when calculating Hessian");

model.fitTo(*data, _evalBackend, RooFit::PrintLevel(-1));
}

EXPECT_EQ(mean.getVal(), meanOrigVal) << "Fitting an empty RooDataSet changed \"mean\" value!";
EXPECT_EQ(sigma.getVal(), sigmaOrigVal) << "Fitting an empty RooDataSet changed \"sigma\" value!";

// Reset the parameters for the check with the RooDataHist
mean.setVal(meanOrigVal);
sigma.setVal(sigmaOrigVal);
mean.setError(0.0);
sigma.setError(0.0);

{
// We expect errors in the Hessian calculation because the likelihood is
// constant (same as above for the RooDataSet).
ROOT::TestSupport::CheckDiagsRAII checkDiag;
checkDiag.requiredDiag(kWarning, "ROOT::Math::Fitter::CalculateHessErrors", "Error when calculating Hessian");

model.fitTo(*dataHist, _evalBackend, RooFit::PrintLevel(-1));
}

EXPECT_EQ(mean.getVal(), meanOrigVal) << "Fitting an empty RooDataSet changed \"mean\" value!";
EXPECT_EQ(sigma.getVal(), sigmaOrigVal) << "Fitting an empty RooDataSet changed \"sigma\" value!";
}

TEST(RooChi2Var, IntegrateBins)
{
RooHelpers::LocalChangeMsgLevel changeMsgLvl(RooFit::WARNING);
Expand Down Expand Up @@ -587,11 +640,11 @@ INSTANTIATE_TEST_SUITE_P(RooNLLVar, TestStatisticTest, testing::Values(ROOFIT_EV

INSTANTIATE_TEST_SUITE_P(RooNLLVar, OffsetBinTest,
testing::Combine(testing::Values(ROOFIT_EVAL_BACKENDS), // EvalBackend
testing::Values(false, true), // unbinned or binned
testing::Values(false, true), // extended fit
testing::Values(false, true), // use sumW2
testing::Values(false, true), // wrap in a RooSimultaneous
testing::Values(false) // binned likelihood code path
testing::Values(false, true), // unbinned or binned
testing::Values(false, true), // extended fit
testing::Values(false, true), // use sumW2
testing::Values(false, true), // wrap in a RooSimultaneous
testing::Values(false) // binned likelihood code path
),
[](testing::TestParamInfo<OffsetBinTest::ParamType> const &paramInfo) {
std::stringstream ss;
Expand All @@ -606,11 +659,11 @@ INSTANTIATE_TEST_SUITE_P(RooNLLVar, OffsetBinTest,

INSTANTIATE_TEST_SUITE_P(RooNLLVarBinnedL, OffsetBinTest,
testing::Combine(testing::Values(ROOFIT_EVAL_BACKENDS), // EvalBackend
testing::Values(true), // unbinned or binned
testing::Values(false), // extended fit
testing::Values(false), // use sumW2
testing::Values(false, true), // wrap in a RooSimultaneous
testing::Values(true) // binned likelihood code path
testing::Values(true), // unbinned or binned
testing::Values(false), // extended fit
testing::Values(false), // use sumW2
testing::Values(false, true), // wrap in a RooSimultaneous
testing::Values(true) // binned likelihood code path
),
[](testing::TestParamInfo<OffsetBinTest::ParamType> const &paramInfo) {
std::stringstream ss;
Expand Down

0 comments on commit bef99a8

Please sign in to comment.