-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
RooAbsOptTestStatistic.cxx
775 lines (606 loc) · 30.4 KB
/
RooAbsOptTestStatistic.cxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
/*****************************************************************************
* Project: RooFit *
* Package: RooFitCore *
* @(#)root/roofitcore:$Id$
* Authors: *
* WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
* DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
* *
* Copyright (c) 2000-2005, Regents of the University of California *
* and Stanford University. All rights reserved. *
* *
* Redistribution and use in source and binary forms, *
* with or without modification, are permitted according to the terms *
* listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
*****************************************************************************/
/**
\file RooAbsOptTestStatistic.cxx
\class RooAbsOptTestStatistic
\ingroup Roofitcore
Abstract base class for test
statistics objects that evaluate a function or PDF at each point of a given
dataset. This class provides generic optimizations, such as
caching and precalculation of constant terms that can be made for
all such quantities.
Implementations should define evaluatePartition(), which calculates the
value of a (sub)range of the dataset and optionally combinedValue(),
which combines the values calculated for each partition. If combinedValue()
is not overloaded, the default implementation will add the partition results
to obtain the combined result.
Support for calculation in partitions is needed to allow multi-core
parallelized calculation of test statistics.
**/
#include "RooAbsOptTestStatistic.h"
#include "Riostream.h"
#include "TClass.h"
#include <cstring>
#include "RooAbsData.h"
#include "RooAbsDataStore.h"
#include "RooAbsPdf.h"
#include "RooAddPdf.h"
#include "RooArgSet.h"
#include "RooBinSamplingPdf.h"
#include "RooBinning.h"
#include "RooCategory.h"
#include "RooDataHist.h"
#include "RooDataSet.h"
#include "RooErrorHandler.h"
#include "RooFitImplHelpers.h"
#include "RooGlobalFunc.h"
#include "RooMsgService.h"
#include "RooProdPdf.h"
#include "RooProduct.h"
#include "RooRealSumPdf.h"
#include "RooRealVar.h"
#include "RooTrace.h"
#include "RooVectorDataStore.h"
#include "ROOT/StringUtils.hxx"
using std::endl, std::ostream;
////////////////////////////////////////////////////////////////////////////////
/// Create a test statistic, and optimise its calculation.
/// \param[in] name Name of the instance.
/// \param[in] title Title (for e.g. plotting).
/// \param[in] real Function to evaluate.
/// \param[in] indata Dataset for which to compute test statistic.
/// \param[in] projDeps A set of projected observables.
/// \param[in] cfg the statistic configuration
///
/// cfg contains:
/// - rangeName If not null, only events in the dataset inside the range will be used in the test
/// statistic calculation.
/// - addCoefRangeName If not null, all RooAddPdf components of `real` will be
/// instructed to fix their fraction definitions to the given named range.
/// - nCPU If > 1, the test statistic calculation will be parallelised over multiple processes. By default, the data
/// is split with 'bulk' partitioning (each process calculates a contiguous block of fraction 1/nCPU
/// of the data). For binned data, this approach may be suboptimal as the number of bins with >0 entries
/// in each processing block may vary greatly; thereby distributing the workload rather unevenly.
/// - interleave Strategy how to distribute events among workers. If an interleave partitioning strategy is used where each partition
/// i takes all bins for which (ibin % ncpu == i), an even distribution of work is more likely.
/// - splitCutRange If true, a different rangeName constructed as `rangeName_{catName}` will be used
/// as range definition for each index state of a RooSimultaneous.
/// - cloneInputData Not used. Data is always cloned.
/// - integrateOverBinsPrecision If > 0, PDF in binned fits are integrated over the bins. This sets the precision. If = 0,
/// only unbinned PDFs fit to RooDataHist are integrated. If < 0, PDFs are never integrated.
RooAbsOptTestStatistic::RooAbsOptTestStatistic(const char *name, const char *title, RooAbsReal &real,
RooAbsData &indata, const RooArgSet &projDeps,
RooAbsTestStatistic::Configuration const &cfg)
: RooAbsTestStatistic(name, title, real, indata, projDeps, cfg),
_integrateBinsPrecision(cfg.integrateOverBinsPrecision)
{
// Don't do a thing in master mode
if (operMode() != Slave) {
return;
}
initSlave(real, indata, projDeps, _rangeName.c_str(), _addCoefRangeName.c_str());
}
////////////////////////////////////////////////////////////////////////////////
/// Copy constructor
RooAbsOptTestStatistic::RooAbsOptTestStatistic(const RooAbsOptTestStatistic &other, const char *name)
: RooAbsTestStatistic(other, name),
_sealed(other._sealed),
_sealNotice(other._sealNotice),
_skipZeroWeights(other._skipZeroWeights),
_integrateBinsPrecision(other._integrateBinsPrecision)
{
// Don't do a thing in master mode
if (operMode() != Slave) {
if (other._normSet) {
_normSet = new RooArgSet;
other._normSet->snapshot(*_normSet);
}
return;
}
initSlave(*other._funcClone, *other._dataClone, other._projDeps ? *other._projDeps : RooArgSet(),
other._rangeName.c_str(), other._addCoefRangeName.c_str());
}
////////////////////////////////////////////////////////////////////////////////
void RooAbsOptTestStatistic::initSlave(RooAbsReal& real, RooAbsData& indata, const RooArgSet& projDeps, const char* rangeName,
const char* addCoefRangeName) {
// ******************************************************************
// *** PART 1 *** Clone incoming pdf, attach to each other *
// ******************************************************************
// Clone FUNC
_funcClone = RooHelpers::cloneTreeWithSameParameters(real, indata.get()).release();
_funcCloneSet = nullptr ;
// Attach FUNC to data set
_funcObsSet = std::unique_ptr<RooArgSet>{_funcClone->getObservables(indata)}.release();
if (_funcClone->getAttribute("BinnedLikelihood")) {
_funcClone->setAttribute("BinnedLikelihoodActive") ;
}
// Mark all projected dependents as such
if (!projDeps.empty()) {
std::unique_ptr<RooArgSet> projDataDeps{_funcObsSet->selectCommon(projDeps)};
projDataDeps->setAttribAll("projectedDependent") ;
}
// If PDF is a RooProdPdf (with possible constraint terms)
// analyze pdf for actual parameters (i.e those in unconnected constraint terms should be
// ignored as here so that the test statistic will not be recalculated if those
// are changed
RooProdPdf* pdfWithCons = dynamic_cast<RooProdPdf*>(_funcClone) ;
if (pdfWithCons) {
std::unique_ptr<RooArgSet> connPars{pdfWithCons->getConnectedParameters(*indata.get())};
// Add connected parameters as servers
_paramSet.add(*connPars) ;
} else {
// Add parameters as servers
_funcClone->getParameters(indata.get(), _paramSet);
}
// Store normalization set
_normSet = new RooArgSet;
indata.get()->snapshot(*_normSet, false);
// Expand list of observables with any observables used in parameterized ranges.
// This NEEDS to be a counting loop since we are inserting during the loop.
for (std::size_t i = 0; i < _funcObsSet->size(); ++i) {
auto realDepRLV = dynamic_cast<const RooAbsRealLValue*>((*_funcObsSet)[i]);
if (realDepRLV && realDepRLV->isDerived()) {
RooArgSet tmp2;
realDepRLV->leafNodeServerList(&tmp2, nullptr, true);
_funcObsSet->add(tmp2,true);
}
}
// ******************************************************************
// *** PART 2 *** Clone and adjust incoming data, attach to PDF *
// ******************************************************************
// Check if the fit ranges of the dependents in the data and in the FUNC are consistent
const RooArgSet* dataDepSet = indata.get() ;
for (const auto arg : *_funcObsSet) {
// Check that both dataset and function argument are of type RooRealVar
RooRealVar* realReal = dynamic_cast<RooRealVar*>(arg) ;
if (!realReal) continue ;
RooRealVar* datReal = dynamic_cast<RooRealVar*>(dataDepSet->find(realReal->GetName())) ;
if (!datReal) continue ;
// Check that range of observables in pdf is equal or contained in range of observables in data
if (!realReal->getBinning().lowBoundFunc() && realReal->getMin()<(datReal->getMin()-1e-6)) {
coutE(InputArguments) << "RooAbsOptTestStatistic: ERROR minimum of FUNC observable " << arg->GetName()
<< "(" << realReal->getMin() << ") is smaller than that of "
<< arg->GetName() << " in the dataset (" << datReal->getMin() << ")" << endl ;
RooErrorHandler::softAbort() ;
return ;
}
if (!realReal->getBinning().highBoundFunc() && realReal->getMax()>(datReal->getMax()+1e-6)) {
coutE(InputArguments) << "RooAbsOptTestStatistic: ERROR maximum of FUNC observable " << arg->GetName()
<< " is larger than that of " << arg->GetName() << " in the dataset" << endl ;
RooErrorHandler::softAbort() ;
return ;
}
}
// Copy data and strip entries lost by adjusted fit range, _dataClone ranges will be copied from realDepSet ranges
if (rangeName && strlen(rangeName)) {
_dataClone = std::unique_ptr<RooAbsData>{indata.reduce(RooFit::SelectVars(*_funcObsSet),RooFit::CutRange(rangeName))}.release();
// cout << "RooAbsOptTestStatistic: reducing dataset to fit in range named " << rangeName << " resulting dataset has " << _dataClone->sumEntries() << " events" << endl ;
} else {
_dataClone = static_cast<RooAbsData*>(indata.Clone()) ;
}
_ownData = true ;
// ******************************************************************
// *** PART 3 *** Make adjustments for fit ranges, if specified *
// ******************************************************************
std::unique_ptr<RooArgSet> origObsSet( real.getObservables(indata) );
if (rangeName && strlen(rangeName)) {
cxcoutI(Fitting) << "RooAbsOptTestStatistic::ctor(" << GetName() << ") constructing test statistic for sub-range named " << rangeName << endl ;
if(auto pdfClone = dynamic_cast<RooAbsPdf*>(_funcClone)) {
pdfClone->setNormRange(rangeName);
}
// Print warnings if the requested ranges are not available for the observable
for (const auto arg : *_funcObsSet) {
if (auto realObs = dynamic_cast<RooRealVar*>(arg)) {
auto tokens = ROOT::Split(rangeName, ",");
for(std::string const& token : tokens) {
if(!realObs->hasRange(token.c_str())) {
std::stringstream errMsg;
errMsg << "The observable \"" << realObs->GetName() << "\" doesn't define the requested range \""
<< token << "\". Replacing it with the default range." << std::endl;
coutI(Fitting) << errMsg.str() << std::endl;
}
}
}
}
}
// ******************************************************************
// *** PART 3.2 *** Binned fits *
// ******************************************************************
setUpBinSampling();
// Fix RooAddPdf coefficients to original normalization range
if (rangeName && strlen(rangeName)) {
// WVE Remove projected dependents from normalization
_funcClone->fixAddCoefNormalization(*_dataClone->get(),false) ;
if (addCoefRangeName && strlen(addCoefRangeName)) {
cxcoutI(Fitting) << "RooAbsOptTestStatistic::ctor(" << GetName()
<< ") fixing interpretation of coefficients of any RooAddPdf component to range " << addCoefRangeName << endl ;
_funcClone->fixAddCoefRange(addCoefRangeName,false) ;
}
}
// This is deferred from part 2 - but must happen after part 3 - otherwise invalid bins cannot be properly marked in cacheValidEntries
_dataClone->attachBuffers(*_funcObsSet) ;
setEventCount(_dataClone->numEntries()) ;
// *********************************************************************
// *** PART 4 *** Adjust normalization range for projected observables *
// *********************************************************************
// Remove projected dependents from normalization set
if (!projDeps.empty()) {
_projDeps = new RooArgSet;
projDeps.snapshot(*_projDeps, false) ;
//RooArgSet* tobedel = (RooArgSet*) _normSet->selectCommon(*_projDeps) ;
_normSet->remove(*_projDeps,true,true) ;
// Mark all projected dependents as such
RooArgSet projDataDeps;
_funcObsSet->selectCommon(*_projDeps, projDataDeps);
projDataDeps.setAttribAll("projectedDependent") ;
}
coutI(Optimization) << "RooAbsOptTestStatistic::ctor(" << GetName() << ") optimizing internal clone of p.d.f for likelihood evaluation."
<< "Lazy evaluation and associated change tracking will disabled for all nodes that depend on observables" << endl ;
// *********************************************************************
// *** PART 4 *** Finalization and activation of optimization *
// *********************************************************************
// Redirect pointers of base class to clone
_func = _funcClone ;
_data = _dataClone ;
_funcClone->getVal(_normSet) ;
optimizeCaching() ;
// It would be unusual if the global observables are used in the likelihood
// outside of the constraint terms, but if they are we have to be consistent
// and also redirect them to the snapshots in the dataset if appropriate.
if(_takeGlobalObservablesFromData && _data->getGlobalObservables()) {
recursiveRedirectServers(*_data->getGlobalObservables()) ;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Destructor
RooAbsOptTestStatistic::~RooAbsOptTestStatistic()
{
if (operMode()==Slave) {
delete _funcClone ;
delete _funcObsSet ;
if (_projDeps) {
delete _projDeps ;
}
if (_ownData) {
delete _dataClone ;
}
}
delete _normSet ;
}
////////////////////////////////////////////////////////////////////////////////
/// Method to combined test statistic results calculated into partitions into
/// the global result. This default implementation adds the partition return
/// values
double RooAbsOptTestStatistic::combinedValue(RooAbsReal** array, Int_t n) const
{
// Default implementation returns sum of components
double sum(0);
double carry(0);
for (Int_t i = 0; i < n; ++i) {
double y = array[i]->getValV();
carry += reinterpret_cast<RooAbsOptTestStatistic*>(array[i])->getCarry();
y -= carry;
const double t = sum + y;
carry = (t - sum) - y;
sum = t;
}
_evalCarry = carry;
return sum ;
}
////////////////////////////////////////////////////////////////////////////////
/// Catch server redirect calls and forward to internal clone of function
bool RooAbsOptTestStatistic::redirectServersHook(const RooAbsCollection& newServerList, bool mustReplaceAll, bool nameChange, bool isRecursive)
{
RooAbsTestStatistic::redirectServersHook(newServerList,mustReplaceAll,nameChange,isRecursive) ;
if (operMode()!=Slave) return false ;
bool ret = _funcClone->recursiveRedirectServers(newServerList,false,nameChange) ;
return ret || RooAbsReal::redirectServersHook(newServerList, mustReplaceAll, nameChange, isRecursive);
}
////////////////////////////////////////////////////////////////////////////////
/// Catch print hook function and forward to function clone
void RooAbsOptTestStatistic::printCompactTreeHook(ostream& os, const char* indent)
{
RooAbsTestStatistic::printCompactTreeHook(os,indent) ;
if (operMode()!=Slave) return ;
TString indent2(indent) ;
indent2 += "opt >>" ;
_funcClone->printCompactTree(os,indent2.Data()) ;
os << indent2 << " dataset clone = " << _dataClone << " first obs = " << _dataClone->get()->first() << endl ;
}
////////////////////////////////////////////////////////////////////////////////
/// Driver function to propagate constant term optimizations in test statistic.
/// If code Activate is sent, constant term optimization will be executed.
/// If code Deactivate is sent, any existing constant term optimizations will
/// be abandoned. If codes ConfigChange or ValueChange are sent, any existing
/// constant term optimizations will be redone.
void RooAbsOptTestStatistic::constOptimizeTestStatistic(ConstOpCode opcode, bool doAlsoTrackingOpt)
{
// cout << "ROATS::constOpt(" << GetName() << ") funcClone structure dump BEFORE const-opt" << endl ;
// _funcClone->Print("t") ;
RooAbsTestStatistic::constOptimizeTestStatistic(opcode,doAlsoTrackingOpt);
if (operMode()!=Slave) return ;
if (_dataClone->hasFilledCache() && _dataClone->store()->cacheOwner()!=this) {
if (opcode==Activate) {
cxcoutW(Optimization) << "RooAbsOptTestStatistic::constOptimize(" << GetName()
<< ") dataset cache is owned by another object, no constant term optimization can be applied" << endl ;
}
return ;
}
if (!allowFunctionCache()) {
if (opcode==Activate) {
cxcoutI(Optimization) << "RooAbsOptTestStatistic::constOptimize(" << GetName()
<< ") function caching prohibited by test statistic, no constant term optimization is applied" << endl ;
}
return ;
}
if (_dataClone->hasFilledCache() && opcode==Activate) {
opcode=ValueChange ;
}
switch(opcode) {
case Activate:
cxcoutI(Optimization) << "RooAbsOptTestStatistic::constOptimize(" << GetName()
<< ") optimizing evaluation of test statistic by finding all nodes in p.d.f that depend exclusively"
<< " on observables and constant parameters and precalculating their values" << endl ;
optimizeConstantTerms(true,doAlsoTrackingOpt) ;
break ;
case DeActivate:
cxcoutI(Optimization) << "RooAbsOptTestStatistic::constOptimize(" << GetName()
<< ") deactivating optimization of constant terms in test statistic" << endl ;
optimizeConstantTerms(false) ;
break ;
case ConfigChange:
cxcoutI(Optimization) << "RooAbsOptTestStatistic::constOptimize(" << GetName()
<< ") one ore more parameter were changed from constant to floating or vice versa, "
<< "re-evaluating constant term optimization" << endl ;
optimizeConstantTerms(false) ;
optimizeConstantTerms(true,doAlsoTrackingOpt) ;
break ;
case ValueChange:
cxcoutI(Optimization) << "RooAbsOptTestStatistic::constOptimize(" << GetName()
<< ") the value of one ore more constant parameter were changed re-evaluating constant term optimization" << endl ;
// Request a forcible cache update of all cached nodes
_dataClone->store()->forceCacheUpdate() ;
break ;
}
// cout << "ROATS::constOpt(" << GetName() << ") funcClone structure dump AFTER const-opt" << endl ;
// _funcClone->Print("t") ;
}
////////////////////////////////////////////////////////////////////////////////
/// This method changes the value caching logic for all nodes that depends on any of the observables
/// as defined by the given dataset. When evaluating a test statistic constructed from the RooAbsReal
/// with a dataset the observables are guaranteed to change with every call, thus there is no point
/// in tracking these changes which result in a net overhead. Thus for observable-dependent nodes,
/// the evaluation mechanism is changed from being dependent on a 'valueDirty' flag to guaranteed evaluation.
/// On the dataset side, the observables objects are modified to no longer send valueDirty messages
/// to their client
void RooAbsOptTestStatistic::optimizeCaching()
{
// cout << "RooAbsOptTestStatistic::optimizeCaching(" << GetName() << "," << this << ")" << endl ;
// Trigger create of all object caches now in nodes that have deferred object creation
// so that cache contents can be processed immediately
_funcClone->getVal(_normSet) ;
// Set value caching mode for all nodes that depend on any of the observables to ADirty
_funcClone->optimizeCacheMode(*_funcObsSet) ;
// Disable propagation of dirty state flags for observables
_dataClone->setDirtyProp(false) ;
// Disable reading of observables that are not used
_dataClone->optimizeReadingWithCaching(*_funcClone, RooArgSet(),requiredExtraObservables()) ;
}
////////////////////////////////////////////////////////////////////////////////
/// Driver function to activate global constant term optimization.
/// If activated, constant terms are found and cached with the dataset.
/// The operation mode of cached nodes is set to AClean meaning that
/// their getVal() call will never result in an evaluate call.
/// Finally the branches in the dataset that correspond to observables
/// that are exclusively used in constant terms are disabled as
/// they serve no more purpose
void RooAbsOptTestStatistic::optimizeConstantTerms(bool activate, bool applyTrackingOpt)
{
if(activate) {
if (_optimized) {
return ;
}
// Trigger create of all object caches now in nodes that have deferred object creation
// so that cache contents can be processed immediately
_funcClone->getVal(_normSet) ;
// WVE - Patch to allow customization of optimization level per component pdf
if (_funcClone->getAttribute("NoOptimizeLevel1")) {
coutI(Minimization) << " Optimization customization: Level-1 constant-term optimization prohibited by attribute NoOptimizeLevel1 set on top-level pdf "
<< _funcClone->ClassName() << "::" << _funcClone->GetName() << endl ;
return ;
}
if (_funcClone->getAttribute("NoOptimizeLevel2")) {
coutI(Minimization) << " Optimization customization: Level-2 constant-term optimization prohibited by attribute NoOptimizeLevel2 set on top-level pdf "
<< _funcClone->ClassName() << "::" << _funcClone->GetName() << endl ;
applyTrackingOpt=false ;
}
// Apply tracking optimization here. Default strategy is to track components
// of RooAddPdfs and RooRealSumPdfs. If these components are a RooProdPdf
// or a RooProduct respectively, track the components of these products instead
// of the product term
RooArgSet trackNodes ;
// Add safety check here - applyTrackingOpt will only be applied if present
// dataset is constructed in terms of a RooVectorDataStore
if (applyTrackingOpt) {
if (!dynamic_cast<RooVectorDataStore*>(_dataClone->store())) {
coutW(Optimization) << "RooAbsOptTestStatistic::optimizeConstantTerms(" << GetName()
<< ") WARNING Cache-and-track optimization (Optimize level 2) is only available for datasets"
<< " implement in terms of RooVectorDataStore - ignoring this option for current dataset" << endl ;
applyTrackingOpt = false ;
}
}
if (applyTrackingOpt) {
RooArgSet branches ;
_funcClone->branchNodeServerList(&branches) ;
for (auto arg : branches) {
arg->setCacheAndTrackHints(trackNodes);
}
// Do not set CacheAndTrack on constant expressions
trackNodes.remove(*std::unique_ptr<RooAbsCollection>{trackNodes.selectByAttrib("Constant",true)});
// Set CacheAndTrack flag on all remaining nodes
trackNodes.setAttribAll("CacheAndTrack",true) ;
}
// Find all nodes that depend exclusively on constant parameters
_cachedNodes.removeAll() ;
_funcClone->findConstantNodes(*_dataClone->get(),_cachedNodes) ;
// Cache constant nodes with dataset - also cache entries corresponding to zero-weights in data when using BinnedLikelihood
_dataClone->cacheArgs(this,_cachedNodes,_normSet, _skipZeroWeights);
// Put all cached nodes in AClean value caching mode so that their evaluate() is never called
for (auto cacheArg : _cachedNodes) {
cacheArg->setOperMode(RooAbsArg::AClean) ;
}
std::unique_ptr<RooAbsCollection> constNodes{_cachedNodes.selectByAttrib("ConstantExpressionCached",true)};
RooArgSet actualTrackNodes(_cachedNodes) ;
actualTrackNodes.remove(*constNodes) ;
if (!constNodes->empty()) {
if (constNodes->size()<20) {
coutI(Minimization) << " The following expressions have been identified as constant and will be precalculated and cached: " << *constNodes << endl ;
} else {
coutI(Minimization) << " A total of " << constNodes->size() << " expressions have been identified as constant and will be precalculated and cached." << endl ;
}
}
if (!actualTrackNodes.empty()) {
if (actualTrackNodes.size()<20) {
coutI(Minimization) << " The following expressions will be evaluated in cache-and-track mode: " << actualTrackNodes << endl ;
} else {
coutI(Minimization) << " A total of " << constNodes->size() << " expressions will be evaluated in cache-and-track-mode." << endl ;
}
}
// Disable reading of observables that are no longer used
_dataClone->optimizeReadingWithCaching(*_funcClone, _cachedNodes,requiredExtraObservables()) ;
_optimized = true ;
} else {
// Delete the cache
_dataClone->resetCache() ;
// Reactivate all tree branches
_dataClone->setArgStatus(*_dataClone->get(),true) ;
// Reset all nodes to ADirty
optimizeCaching() ;
// Disable propagation of dirty state flags for observables
_dataClone->setDirtyProp(false) ;
_cachedNodes.removeAll() ;
_optimized = false ;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Change dataset that is used to given one. If cloneData is true, a clone of
/// in the input dataset is made. If the test statistic was constructed with
/// a range specification on the data, the cloneData argument is ignored and
/// the data is always cloned.
bool RooAbsOptTestStatistic::setDataSlave(RooAbsData& indata, bool cloneData, bool ownNewData)
{
if (operMode()==SimMaster) {
//cout << "ROATS::setDataSlave() ERROR this is SimMaster _funcClone = " << _funcClone << endl ;
return false ;
}
//cout << "ROATS::setDataSlave() new dataset size = " << indata.numEntries() << endl ;
//indata.Print("v") ;
// If the current dataset is owned, transfer the ownership to unique pointer
// that will get out of scope at the end of this function. We can't delete it
// right now, because there might be global observables in the model that
// first need to be redirected to the new dataset with a later call to
// RooAbsArg::recursiveRedirectServers.
std::unique_ptr<RooAbsData> oldOwnedData;
if (_ownData) {
oldOwnedData.reset(_dataClone);
_dataClone = nullptr ;
}
if (!cloneData && !_rangeName.empty()) {
coutW(InputArguments) << "RooAbsOptTestStatistic::setData(" << GetName() << ") WARNING: test statistic was constructed with range selection on data, "
<< "ignoring request to _not_ clone the input dataset" << endl ;
cloneData = true ;
}
if (cloneData) {
// Cloning input dataset
if (_rangeName.empty()) {
_dataClone = std::unique_ptr<RooAbsData>{indata.reduce(*indata.get())}.release();
} else {
_dataClone = std::unique_ptr<RooAbsData>{indata.reduce(RooFit::SelectVars(*indata.get()),RooFit::CutRange(_rangeName.c_str()))}.release();
}
_ownData = true ;
} else {
// Taking input dataset
_dataClone = &indata ;
_ownData = ownNewData ;
}
// Attach function clone to dataset
_dataClone->attachBuffers(*_funcObsSet) ;
_dataClone->setDirtyProp(false) ;
_data = _dataClone ;
// ReCache constant nodes with dataset
if (!_cachedNodes.empty()) {
_dataClone->cacheArgs(this,_cachedNodes,_normSet, _skipZeroWeights);
}
// Adjust internal event count
setEventCount(indata.numEntries()) ;
setValueDirty() ;
// It would be unusual if the global observables are used in the likelihood
// outside of the constraint terms, but if they are we have to be consistent
// and also redirect them to the snapshots in the dataset if appropriate.
if(_takeGlobalObservablesFromData && _data->getGlobalObservables()) {
recursiveRedirectServers(*_data->getGlobalObservables()) ;
}
return true ;
}
////////////////////////////////////////////////////////////////////////////////
RooAbsData& RooAbsOptTestStatistic::data()
{
if (_sealed) {
bool notice = (sealNotice() && strlen(sealNotice())) ;
coutW(ObjectHandling) << "RooAbsOptTestStatistic::data(" << GetName()
<< ") WARNING: object sealed by creator - access to data is not permitted: "
<< (notice?sealNotice():"<no user notice>") << endl ;
static RooDataSet dummy ("dummy","dummy",RooArgSet()) ;
return dummy ;
}
return *_dataClone ;
}
////////////////////////////////////////////////////////////////////////////////
const RooAbsData& RooAbsOptTestStatistic::data() const
{
if (_sealed) {
bool notice = (sealNotice() && strlen(sealNotice())) ;
coutW(ObjectHandling) << "RooAbsOptTestStatistic::data(" << GetName()
<< ") WARNING: object sealed by creator - access to data is not permitted: "
<< (notice?sealNotice():"<no user notice>") << endl ;
static RooDataSet dummy ("dummy","dummy",RooArgSet()) ;
return dummy ;
}
return *_dataClone ;
}
////////////////////////////////////////////////////////////////////////////////
/// Inspect PDF to find out if we are doing a binned fit to a 1-dimensional unbinned PDF.
/// If this is the case, enable finer sampling of bins by wrapping PDF into a RooBinSamplingPdf.
/// The member _integrateBinsPrecision decides how we act:
/// - < 0: Don't do anything.
/// - = 0: Only enable feature if fitting unbinned PDF to RooDataHist.
/// - > 0: Enable as requested.
void RooAbsOptTestStatistic::setUpBinSampling() {
auto& pdf = static_cast<RooAbsPdf&>(*_funcClone);
if (auto newPdf = RooBinSamplingPdf::create(pdf, *_dataClone, _integrateBinsPrecision)) {
newPdf->addOwnedComponents(*_funcClone);
_funcClone = newPdf.release();
}
}
/// Returns a suffix string that is unique for RooAbsOptTestStatistic
/// instances that don't share the same cloned input data object.
const char* RooAbsOptTestStatistic::cacheUniqueSuffix() const {
return Form("_%lx", _dataClone->uniqueId().value()) ;
}
void RooAbsOptTestStatistic::runRecalculateCache(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const
{
_dataClone->store()->recalculateCache(_projDeps, firstEvent, lastEvent, stepSize, _skipZeroWeights);
}