Skip to content

Commit

Permalink
[RF] Implement Offset("bin") also for RooDataSet fits
Browse files Browse the repository at this point in the history
Fully implement the `Offset("bin")` feature also for RooDataSet, both
with CPU/CUDA BatchMode and the legacy tests statistics.

This is done now by introducing a new element in the computation graph:
an "offset pdf" that is created as a RooHistPdf from the observed data,
and it is used to get the counterterm in each bin.

It was validated with the `rf614` tutorial that this binwise offsetting
is indeed fixing the convergense problems that the simple offsetting by
the initial NLL value can't fix.

Closes root-project#11965.
  • Loading branch information
guitargeek committed Sep 19, 2023
1 parent 6426d90 commit daa8a63
Show file tree
Hide file tree
Showing 14 changed files with 173 additions and 126 deletions.
11 changes: 5 additions & 6 deletions roofit/batchcompute/inc/RooBatchCompute.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,8 @@ class RooBatchComputeInterface {
}

virtual double reduceSum(Config const &cfg, InputArr input, size_t n) = 0;
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span<const double> probas,
std::span<const double> weightSpan, std::span<const double> weights,
double weightSum, std::span<const double> binVolumes) = 0;
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span<const double> probas, std::span<const double> weights,
std::span<const double> offsetProbas) = 0;

virtual Architecture architecture() const = 0;
virtual std::string architectureName() const = 0;
Expand Down Expand Up @@ -207,12 +206,12 @@ inline double reduceSum(Config cfg, InputArr input, size_t n)
return dispatch->reduceSum(cfg, input, n);
}

inline ReduceNLLOutput reduceNLL(Config cfg, std::span<const double> probas, std::span<const double> weightSpan,
std::span<const double> weights, double weightSum, std::span<const double> binVolumes)
inline ReduceNLLOutput reduceNLL(Config cfg, std::span<const double> probas, std::span<const double> weights,
std::span<const double> offsetProbas)
{
init();
auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
return dispatch->reduceNLL(cfg, probas, weightSpan, weights, weightSum, binVolumes);
return dispatch->reduceNLL(cfg, probas, weights, offsetProbas);
}

} // End namespace RooBatchCompute
Expand Down
31 changes: 15 additions & 16 deletions roofit/batchcompute/src/RooBatchCompute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ public:
/// Return the sum of an input array
double reduceSum(RooBatchCompute::Config const &cfg, InputArr input, size_t n) override;
ReduceNLLOutput reduceNLL(RooBatchCompute::Config const &cfg, std::span<const double> probas,
std::span<const double> weightSpan, std::span<const double> weights, double weightSum,
std::span<const double> binVolumes) override;
std::span<const double> weights, std::span<const double> offsetProbas) override;
}; // End class RooBatchComputeClass

inline __device__ void kahanSumUpdate(double &sum, double &carry, double a, double otherCarry)
Expand Down Expand Up @@ -220,8 +219,8 @@ __global__ void kahanSum(const double *__restrict__ input, const double *__restr
kahanSumReduction(shared, n, result, carry_index);
}

__global__ void kahanSumWeighted(const double *__restrict__ input, const double *__restrict__ weights, size_t n,
double *__restrict__ result)
__global__ void nllSumKernel(const double *__restrict__ probas, const double *__restrict__ weights,
const double *__restrict__ offsetProbas, size_t n, double *__restrict__ result)
{
int thIdx = threadIdx.x;
int gthIdx = thIdx + blockIdx.x * blockSize;
Expand All @@ -237,7 +236,11 @@ __global__ void kahanSumWeighted(const double *__restrict__ input, const double
for (int i = gthIdx; i < n; i += nThreadsTotal) {
// Note: it does not make sense to use the nll option and provide at the
// same time external carries.
double val = -std::log(input[i]) * weights[i];
double val = -std::log(probas[i]);
if (offsetProbas)
val += std::log(offsetProbas[i]);
if (weights)
val = weights[i] * val;
kahanSumUpdate(sum, carry, val, 0.0);
}

Expand All @@ -264,21 +267,17 @@ double RooBatchComputeClass::reduceSum(RooBatchCompute::Config const &cfg, Input
}

ReduceNLLOutput RooBatchComputeClass::reduceNLL(RooBatchCompute::Config const &cfg, std::span<const double> probas,
std::span<const double> weightSpan, std::span<const double> /*weights*/,
double /*weightSum*/, std::span<const double> binVolumes)
std::span<const double> weights, std::span<const double> offsetProbas)
{
ReduceNLLOutput out;
const int gridSize = getGridSize(probas.size());
CudaInterface::DeviceArray<double> devOut(2 * gridSize);
cudaStream_t stream = *cfg.cudaStream();
constexpr int shMemSize = 2 * blockSize * sizeof(double);

if (weightSpan.size() == 1) {
kahanSum<<<gridSize, blockSize, shMemSize, stream>>>(probas.data(), nullptr, probas.size(), devOut.data(), 1);
} else {
kahanSumWeighted<<<gridSize, blockSize, shMemSize, stream>>>(probas.data(), weightSpan.data(), probas.size(),
devOut.data());
}
nllSumKernel<<<gridSize, blockSize, shMemSize, stream>>>(
probas.data(), weights.size() == 1 ? nullptr : weights.data(),
offsetProbas.empty() ? nullptr : offsetProbas.data(), probas.size(), devOut.data());

kahanSum<<<1, blockSize, shMemSize, stream>>>(devOut.data(), devOut.data() + gridSize, gridSize, devOut.data(), 0);

Expand All @@ -287,9 +286,9 @@ ReduceNLLOutput RooBatchComputeClass::reduceNLL(RooBatchCompute::Config const &c
CudaInterface::copyDeviceToHost(devOut.data(), &tmpSum, 1, cfg.cudaStream());
CudaInterface::copyDeviceToHost(devOut.data() + 1, &tmpCarry, 1, cfg.cudaStream());

if (weightSpan.size() == 1) {
tmpSum *= weightSpan[0];
tmpCarry *= weightSpan[0];
if (weights.size() == 1) {
tmpSum *= weights[0];
tmpCarry *= weights[0];
}

out.nllSum = ROOT::Math::KahanSum<double>{tmpSum, tmpCarry};
Expand Down
14 changes: 6 additions & 8 deletions roofit/batchcompute/src/RooBatchCompute.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,8 @@ class RooBatchComputeClass : public RooBatchComputeInterface {
}
/// Return the sum of an input array
double reduceSum(Config const &, InputArr input, size_t n) override;
ReduceNLLOutput reduceNLL(Config const &, std::span<const double> probas, std::span<const double> weightSpan,
std::span<const double> weights, double weightSum,
std::span<const double> binVolumes) override;
ReduceNLLOutput reduceNLL(Config const &, std::span<const double> probas, std::span<const double> weights,
std::span<const double> offsetProbas) override;
}; // End class RooBatchComputeClass

namespace {
Expand Down Expand Up @@ -213,16 +212,15 @@ double RooBatchComputeClass::reduceSum(Config const &, InputArr input, size_t n)
}

ReduceNLLOutput RooBatchComputeClass::reduceNLL(Config const &, std::span<const double> probas,
std::span<const double> weightSpan, std::span<const double> weights,
double weightSum, std::span<const double> binVolumes)
std::span<const double> weights, std::span<const double> offsetProbas)
{
ReduceNLLOutput out;

double badness = 0.0;

for (std::size_t i = 0; i < probas.size(); ++i) {

const double eventWeight = weightSpan.size() > 1 ? weightSpan[i] : weightSpan[0];
const double eventWeight = weights.size() > 1 ? weights[i] : weights[0];

if (0. == eventWeight)
continue;
Expand All @@ -231,8 +229,8 @@ ReduceNLLOutput RooBatchComputeClass::reduceNLL(Config const &, std::span<const
double term = logOut.first;
badness += logOut.second;

if (!binVolumes.empty()) {
term -= std::log(weights[i]) - std::log(binVolumes[i]) - std::log(weightSum);
if (!offsetProbas.empty()) {
term -= std::log(offsetProbas[i]);
}

term *= -eventWeight;
Expand Down
13 changes: 6 additions & 7 deletions roofit/roofitcore/inc/RooNLLVar.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
#include <vector>
#include <utility>

class RooRealSumPdf ;

class RooNLLVar : public RooAbsOptTestStatistic {
public:

Expand Down Expand Up @@ -53,15 +51,15 @@ class RooNLLVar : public RooAbsOptTestStatistic {

double defaultErrorLevel() const override { return 0.5 ; }

void enableBinOffsetting(bool on = true) {
_doBinOffset = on;
}
void enableBinOffsetting(bool on = true);

using ComputeResult = std::pair<ROOT::Math::KahanSum<double>, double>;

static RooNLLVar::ComputeResult computeScalarFunc(const RooAbsPdf *pdfClone, RooAbsData *dataClone, RooArgSet *normSet,
bool weightSq, std::size_t stepSize, std::size_t firstEvent,
std::size_t lastEvent, bool doBinOffset=false);
std::size_t lastEvent, RooAbsPdf const* offsetPdf = nullptr);

bool setData(RooAbsData& data, bool cloneData=true) override;

protected:

Expand All @@ -80,7 +78,8 @@ class RooNLLVar : public RooAbsOptTestStatistic {
ROOT::Math::KahanSum<double> _offsetSaveW2{0.0}; ///<!

mutable std::vector<double> _binw ; ///<!
mutable RooRealSumPdf* _binnedPdf{nullptr}; ///<!
mutable RooAbsPdf* _binnedPdf{nullptr}; ///<!
std::unique_ptr<RooAbsPdf> _offsetPdf; ///<! An optional per-bin likelihood offset

ClassDefOverride(RooNLLVar,0) // Function representing (extended) -log(L) of p.d.f and dataset
};
Expand Down
21 changes: 1 addition & 20 deletions roofit/roofitcore/src/BatchModeDataHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

#include "RooFit/BatchModeDataHelpers.h"
#include <RooAbsData.h>
#include <RooDataHist.h>
#include <RooHelpers.h>
#include "RooNLLVarNew.h"
#include <RooRealVar.h>
Expand Down Expand Up @@ -96,26 +95,10 @@ getSingleDataSpans(RooAbsData const &data, std::string_view rangeName, std::stri
assignSpan(weight, {buffer.data(), nNonZeroWeight});
assignSpan(weightSumW2, {bufferSumW2.data(), nNonZeroWeight});
}
using namespace ROOT::Experimental;
insert(RooNLLVarNew::weightVarName, weight);
insert(RooNLLVarNew::weightVarNameSumW2, weightSumW2);
}

// Add also bin volume information if we are dealing with a RooDataHist
if (auto dataHist = dynamic_cast<RooDataHist const *>(&data)) {
buffers.emplace();
auto &buffer = buffers.top();
buffer.reserve(nNonZeroWeight);

for (std::size_t i = 0; i < nEvents; ++i) {
if (!hasZeroWeight[i]) {
buffer.push_back(dataHist->binVolume(i));
}
}

insert("_bin_volume", {buffer.data(), buffer.size()});
}

// Get the real-valued batches and cast the also to double branches to put in
// the data map
for (auto const &item : data.getBatches(0, nEvents)) {
Expand Down Expand Up @@ -203,9 +186,7 @@ getSingleDataSpans(RooAbsData const &data, std::string_view rangeName, std::stri
/// Spans with the weights and squared weights will be also stored in the map,
/// keyed with the names `_weight` and the `_weight_sumW2`. If the dataset is
/// unweighted, these weight spans will only contain the single value `1.0`.
/// Entries with zero weight will be skipped. If the input dataset is a
/// RooDataHist, the output map will also contain an item for the key
/// `_bin_volume` with the bin volumes.
/// Entries with zero weight will be skipped.
///
/// \return A `std::map` with spans keyed to name pointers.
/// \param[in] data The input dataset.
Expand Down
2 changes: 0 additions & 2 deletions roofit/roofitcore/src/BatchModeHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@

#include <string>

using ROOT::Experimental::RooNLLVarNew;

namespace {

std::unique_ptr<RooAbsArg> createSimultaneousNLL(RooSimultaneous const &simPdf, bool isExtended,
Expand Down
6 changes: 0 additions & 6 deletions roofit/roofitcore/src/RooAbsPdf.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1076,12 +1076,6 @@ RooFit::OwningPtr<RooAbsReal> RooAbsPdf::createNLL(RooAbsData& data, const RooLi
Int_t cloneData = pc.getInt("cloneData") ;
auto offset = static_cast<RooFit::OffsetMode>(pc.getInt("doOffset"));

if(offset == RooFit::OffsetMode::Bin && dynamic_cast<RooDataSet*>(&data)) {
coutE(Minimization) << "The Offset(\"bin\") option doesn't support fits to RooDataSet yet, only to RooDataHist."
" Falling back to no offsetting." << endl;
offset = RooFit::OffsetMode::None;
}

// If no explicit cloneData command is specified, cloneData is set to true if optimization is activated
if (cloneData==2) {
cloneData = optConst ;
Expand Down
2 changes: 1 addition & 1 deletion roofit/roofitcore/src/RooAddition.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ double RooAddition::defaultErrorLevel() const

std::unique_ptr<RooArgSet> comps{getComponents()};
for(RooAbsArg * arg : *comps) {
if (dynamic_cast<RooNLLVar*>(arg) || dynamic_cast<ROOT::Experimental::RooNLLVarNew*>(arg)) {
if (dynamic_cast<RooNLLVar*>(arg) || dynamic_cast<RooNLLVarNew*>(arg)) {
nllArg = (RooAbsReal*)arg ;
}
if (dynamic_cast<RooChi2Var*>(arg)) {
Expand Down
64 changes: 43 additions & 21 deletions roofit/roofitcore/src/RooNLLVar.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,20 @@ In extended mode, a
\f$ N_\mathrm{expect} - N_\mathrm{observed}*log(N_\mathrm{expect}) \f$ term is added.
**/

#include "RooNLLVar.h"

#include "RooAbsData.h"
#include "RooAbsPdf.h"
#include "RooCmdConfig.h"
#include "RooMsgService.h"
#include "RooAbsDataStore.h"
#include "RooRealMPFE.h"
#include "RooRealSumPdf.h"
#include "RooRealVar.h"
#include "RooProdPdf.h"
#include "RooNaNPacker.h"
#include "RooDataHist.h"
#include <RooNLLVar.h>

#include <RooAbsData.h>
#include <RooAbsDataStore.h>
#include <RooAbsPdf.h>
#include <RooCmdConfig.h>
#include <RooDataHist.h>
#include <RooHistPdf.h>
#include <RooMsgService.h>
#include <RooNaNPacker.h>
#include <RooProdPdf.h>
#include <RooRealMPFE.h>
#include <RooRealSumPdf.h>
#include <RooRealVar.h>

#ifdef ROOFIT_CHECK_CACHED_VALUES
#include <iomanip>
Expand Down Expand Up @@ -316,34 +317,30 @@ double RooNLLVar::evaluatePartition(std::size_t firstEvent, std::size_t lastEven

RooNLLVar::ComputeResult RooNLLVar::computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const {
auto pdfClone = static_cast<const RooAbsPdf*>(_funcClone);
return computeScalarFunc(pdfClone, _dataClone, _normSet, _weightSq, stepSize, firstEvent, lastEvent, _doBinOffset);
return computeScalarFunc(pdfClone, _dataClone, _normSet, _weightSq, stepSize, firstEvent, lastEvent, _offsetPdf.get());
}

// static function, also used from TestStatistics::RooUnbinnedL
RooNLLVar::ComputeResult RooNLLVar::computeScalarFunc(const RooAbsPdf *pdfClone, RooAbsData *dataClone,
RooArgSet *normSet, bool weightSq, std::size_t stepSize,
std::size_t firstEvent, std::size_t lastEvent, bool doBinOffset)
std::size_t firstEvent, std::size_t lastEvent, RooAbsPdf const* offsetPdf)
{
ROOT::Math::KahanSum<double> kahanWeight;
ROOT::Math::KahanSum<double> kahanProb;
RooNaNPacker packedNaN(0.f);
const double logSumW = std::log(dataClone->sumEntries());

auto* dataHist = doBinOffset ? static_cast<RooDataHist*>(dataClone) : nullptr;

for (auto i=firstEvent; i<lastEvent; i+=stepSize) {
dataClone->get(i) ;

double weight = dataClone->weight(); //FIXME
const double ni = weight;

if (0. == weight * weight) continue ;
if (weightSq) weight = dataClone->weightSquared() ;

double logProba = pdfClone->getLogVal(normSet);

if(doBinOffset) {
logProba -= std::log(ni) - std::log(dataHist->binVolume(i)) - logSumW;
if(offsetPdf) {
logProba -= offsetPdf->getLogVal(normSet);
}

const double term = -weight * logProba;
Expand All @@ -360,3 +357,28 @@ RooNLLVar::ComputeResult RooNLLVar::computeScalarFunc(const RooAbsPdf *pdfClone,

return {kahanProb, kahanWeight.Sum()};
}

bool RooNLLVar::setData(RooAbsData &data, bool cloneData)
{
bool ret = RooAbsOptTestStatistic::setData(data, cloneData);
// To re-create the data template pdf if necessary
_offsetPdf.reset();
enableBinOffsetting(_doBinOffset);
return ret;
}

void RooNLLVar::enableBinOffsetting(bool on)
{
if (on && !_offsetPdf) {
std::string name = std::string{GetName()} + "_offsetPdf";
std::unique_ptr<RooDataHist> dataTemplate;
if (auto dh = dynamic_cast<RooDataHist *>(_dataClone)) {
dataTemplate = std::make_unique<RooDataHist>(*dh);
} else {
dataTemplate = std::unique_ptr<RooDataHist>(static_cast<RooDataSet const &>(*_dataClone).binnedClone());
}
_offsetPdf = std::make_unique<RooHistPdf>(name.c_str(), name.c_str(), *_funcObsSet, std::move(dataTemplate));
_offsetPdf->setOperMode(ADirty);
}
_doBinOffset = on;
}
Loading

0 comments on commit daa8a63

Please sign in to comment.