Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RF] Consider relative number of events in sub-ranges for multirange fit #7719

Merged
merged 4 commits into from
Apr 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README/ReleaseNotes/v626/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ RooFit now contains two RDataFrame action helpers, `RooDataSetHelper` and `RooDa
```
For more details, consult the tutorial [rf408_RDataFrameToRooFit](https://root.cern/doc/v626/rf408__RDataFrameToRooFit_8C.html).

### Changes in `RooAbsPdf::fitTo` behaviour for multi-range fits

The `RooAbsPdf::fitTo` and `RooAbsPdf::createNLL` functions accept a command argument to specify the fit range.
One can also fit in multiple ranges simultaneously.
The definition of such multi-range likelihoods for non-extended fits changes in this release.
Previously, the individual likelihoods were normalized separately in each range, which meant that the relative number of events in each sub-range was not used to estimate the PDF parameters.
From now on, the likelihoods are normalized by the sum of integrals in each range. This implies that the likelihood takes into account all inter-range and intra-range information.

## 2D Graphics Libraries


Expand Down
59 changes: 58 additions & 1 deletion roofit/roofitcore/src/RooAbsPdf.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ called for each data event.
#include "RooNaNPacker.h"
#include "RooHelpers.h"
#include "RooBatchCompute.h"
#include "RooFormulaVar.h"

#include "TClass.h"
#include "TMath.h"
Expand Down Expand Up @@ -915,7 +916,47 @@ RooAbsReal* RooAbsPdf::createNLL(RooAbsData& data, const RooCmdArg& arg1, const
return createNLL(data,l) ;
}

namespace {

RooAbsReal *createMultiRangeNLLCorrectionTerm(RooAbsPdf const &pdf, RooAbsData const &data, std::string const &baseName,
std::string const &rangeNames)
{
double sumEntriesTotal = 0.0;

RooArgList termList;
RooArgList integralList;

for (const auto &currentRangeName : RooHelpers::tokenise(rangeNames, ",")) {
const std::string currentName = baseName + "_" + currentRangeName;

auto sumEntriesCurrent = data.sumEntries("1", currentRangeName.c_str());
sumEntriesTotal += sumEntriesCurrent;

auto pdfIntegralCurrent =
pdf.createIntegral(*pdf.getObservables(data), nullptr, nullptr, currentRangeName.c_str());

auto term = new RooFormulaVar((currentName + "_correctionTerm").c_str(),
(std::string("-(") + std::to_string(sumEntriesCurrent) + " * log(x[0]))").c_str(),
RooArgList(*pdfIntegralCurrent));

termList.add(*term);
integralList.add(*pdfIntegralCurrent);
}

auto integralFull = new RooAddition((baseName + "_correctionFullIntegralTerm").c_str(),
"integral",
integralList,
true);

auto fullRangeTerm = new RooFormulaVar((baseName + "_foobar").c_str(),
(std::string("(") + std::to_string(sumEntriesTotal) + " * log(x[0]))").c_str(),
RooArgList(*integralFull));

termList.add(*fullRangeTerm);
return new RooAddition((baseName + "_correction").c_str(), "correction", termList, true);
}

} // namespace


////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -1095,7 +1136,19 @@ RooAbsReal* RooAbsPdf::createNLL(RooAbsData& data, const RooLinkedList& cmdList)
nllComp->batchMode(pc.getInt("BatchMode"));
nllList.add(*nllComp) ;
}
nll = new RooAddition(baseName.c_str(),"-log(likelihood)",nllList,kTRUE) ;

if (!ext) {
// Each RooNLLVar was created with the normalization set corresponding to
// the subrange, not the union range like it should be. We have to add an
// extra term to cancel this normalization problem. However, this is
// only necessarry for the non-extended case, because adding an extension
// term to the individual NLLs as done here is mathematicall equivalent
// to adding the normalization correction terms plus a global extension
// term.
nllList.add(*createMultiRangeNLLCorrectionTerm(*this, data, baseName, rangeName));
}

nll = new RooAddition(baseName.c_str(),"-log(likelihood)",nllList,true) ;
}
RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors) ;

Expand Down Expand Up @@ -1185,6 +1238,10 @@ RooAbsReal* RooAbsPdf::createNLL(RooAbsData& data, const RooLinkedList& cmdList)
/// <tr><td> `ConditionalObservables(const RooArgSet& set)` <td> Do not normalize PDF over listed observables
/// <tr><td> `Extended(Bool_t flag)` <td> Add extended likelihood term, off by default
/// <tr><td> `Range(const char* name)` <td> Fit only data inside range with given name. Multiple comma-separated range names can be specified.
/// In this case, the unnormalized PDF \f$f(x)\f$ is normalized by the integral over all ranges \f$r_i\f$:
/// \f[
/// p(x) = \frac{f(x)}{\sum_i \int_{r_i} f(x) dx}.
/// \f]
/// <tr><td> `Range(Double_t lo, Double_t hi)` <td> Fit only data inside given range. A range named "fit" is created on the fly on all observables.
/// <tr><td> `SumCoefRange(const char* name)` <td> Set the range in which to interpret the coefficients of RooAddPdf components
/// <tr><td> `NumCPU(int num, int strat)` <td> Parallelize NLL calculation on `num` CPUs
Expand Down
64 changes: 63 additions & 1 deletion roofit/roofitcore/test/testRooAbsPdf.cxx
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
// Tests for RooAbsPdf
// Author: Stephan Hageboeck, CERN 04/2020
// Authors: Stephan Hageboeck, CERN 04/2020
// Jonas Rembser, CERN 04/2021

#include "RooRealVar.h"
#include "RooGenericPdf.h"
#include "RooFormulaVar.h"
#include "RooDataSet.h"
#include "RooDataHist.h"
#include "RooFitResult.h"
#include "RooAddPdf.h"
#include "RooProduct.h"
#include "RooHelpers.h"
#include "RooGaussian.h"
#include "RooPoisson.h"

#include "TClass.h"
#include "TRandom.h"

#include <ROOT/RMakeUnique.hxx>

#include "gtest/gtest.h"

#include <memory>


// ROOT-10668: Asympt. correct errors don't work when title and name differ
TEST(RooAbsPdf, AsymptoticallyCorrectErrors)
{
Expand Down Expand Up @@ -47,6 +53,7 @@ TEST(RooAbsPdf, AsymptoticallyCorrectErrors)
EXPECT_GT(aError, a.getError()*2.) << "Asymptotically correct errors should be significantly larger.";
}


// Test a conditional fit with batch mode
//
// In a conditional fit, it happens that the value normalization integrals can
Expand Down Expand Up @@ -121,5 +128,60 @@ TEST(RooAbsPdf, ConditionalFitBatchMode)
EXPECT_TRUE(hijack.str().empty() != expectFastEvaluationsWarnings[iMean]);
++iMean;
}
}

// ROOT-9530: RooFit side-band fit inconsistent with fit to full range
TEST(RooAbsPdf, MultiRangeFit)
{
using namespace RooFit;

RooRealVar x("x","x",-10,10);

double cut = -5;
x.setRange("full", -10, 10);
x.setRange("low", -10, cut);
x.setRange("high", cut, 10);

RooRealVar mean("mean", "mean",-1, -10, 10);
RooRealVar width("width", "width", 3., 0.1, 10);
RooGaussian modelSimple("model_simple","model_simple",x,mean,width);

std::size_t nEvents = 100;

// model for extended fit
RooRealVar nsig("nsig","nsig",nEvents,0.,2000) ;
RooAddPdf modelExtended("model_extended","model_simple+a",RooArgList(modelSimple),RooArgList(nsig)) ;

auto resetValues = [&](){
mean.setVal(-1);
width.setVal(3);
};

// loop over non-extended and extended fit
for (auto* model : {static_cast<RooAbsPdf*>(&modelSimple),
static_cast<RooAbsPdf*>(&modelExtended)}) {

std::unique_ptr<RooDataSet> dataSet{model->generate(x, nEvents)};
std::unique_ptr<RooDataHist> dataHist{dataSet->binnedClone()};

// loop over binned fit and unbinned fit
for (auto* data : {static_cast<RooAbsData*>(dataSet.get()),
static_cast<RooAbsData*>(dataHist.get())}) {
// full range
resetValues();
std::unique_ptr<RooFitResult> fitResultFull{
model->fitTo(*data, Range("full"), Save(), PrintLevel(-1))
};

// part (side band fit, but the union of the side bands is the full range)
resetValues();
std::unique_ptr<RooFitResult> fitResultPart{
model->fitTo(*data, Range("low,high"), Save(), PrintLevel(-1))
};

EXPECT_TRUE(fitResultPart->isIdentical(*fitResultFull))
<< "Results of fitting " << model->GetName() << " to a "
<< data->IsA()->GetName() << " should be very similar.";
}
}
}
Binary file modified test/stressRooFit_ref.root
Binary file not shown.