Skip to content

Commit

Permalink
Merge pull request #576 from gpetruc/102x-debug
Browse files Browse the repository at this point in the history
Options to speed up discrete minimization
  • Loading branch information
gpetruc authored Aug 26, 2019
2 parents a41d391 + c7c0290 commit a6950ba
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 12 deletions.
7 changes: 7 additions & 0 deletions interface/CachingNLL.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ class CachingSimNLL : public RooAbsReal {
void setAnalyticBarlowBeeston(bool flag);
void setHideRooCategories(bool flag) { hideRooCategories_ = flag; }
void setHideConstants(bool flag) { hideConstants_ = flag; }
void setMaskConstraints(bool flag) ;
void setMaskNonDiscreteChannels(bool mask) ;
friend class CachingAddNLL;
// trap this call, since we don't care about propagating it to the sub-components
virtual void constOptimizeTestStatistic(ConstOpCode opcode, Bool_t doAlsoTrackingOpt=kTRUE) { }
Expand Down Expand Up @@ -208,6 +210,11 @@ class CachingSimNLL : public RooAbsReal {
std::vector<double> constrainZeroPointsFast_;
std::vector<double> constrainZeroPointsFastPoisson_;
std::vector<RooAbsReal*> channelMasks_;
std::vector<bool> internalMasks_;
bool maskConstraints_;
RooArgSet activeParameters_, activeCatParameters_;
double maskingOffset_; // offset to ensure that interal or constraint masking doesn't change NLL value
double maskingOffsetZero_; // and associated zero point
};

}
Expand Down
2 changes: 1 addition & 1 deletion python/ModelTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def doNuisances(self):
self.out.var(n).setRange(self.out.function('%s_BoundLo' % n), self.out.function('%s_BoundHi' % n))
else:
if len(args) == 3: # mean, sigma, range
sigma = double(args[2])
sigma = float(args[2])
if self.out.var(n):
bounds = [float(x) for x in args[2][1:-1].split(",")]
self.out.var(n).setConstant(False)
Expand Down
76 changes: 70 additions & 6 deletions src/CachingNLL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ cacheutils::CachingSimNLL::CachingSimNLL(RooSimultaneous *pdf, RooAbsData *data,
nuis_(nuis),
params_("params","parameters",this),
catParams_("catParams","Category parameters",this),
hideRooCategories_(false), hideConstants_(false)
hideRooCategories_(false), hideConstants_(false), maskConstraints_(false), maskingOffset_(0), maskingOffsetZero_(0)
{
setup_();
}
Expand All @@ -851,7 +851,11 @@ cacheutils::CachingSimNLL::CachingSimNLL(const CachingSimNLL &other, const char
params_("params","parameters",this),
catParams_("catParams","Category parameters",this),
hideRooCategories_(other.hideRooCategories_),
hideConstants_(other.hideConstants_)
hideConstants_(other.hideConstants_),
internalMasks_(other.internalMasks_),
maskConstraints_(other.maskConstraints_),
maskingOffset_(other.maskingOffset_),
maskingOffsetZero_(other.maskingOffsetZero_)
{
setup_();
}
Expand Down Expand Up @@ -989,6 +993,7 @@ cacheutils::CachingSimNLL::setup_()
datasets_.resize(pdfs_.size(), 0);
splitWithWeights(*dataOriginal_, simpdf->indexCat(), true);
//std::cout << "Pdf " << simpdf->GetName() <<" is a SimPdf over category " << catClone->GetName() << ", with " << pdfs_.size() << " bins" << std::endl;
unsigned int nchannels = 0;
for (int ib = 0, nb = pdfs_.size(); ib < nb; ++ib) {
catClone->setBin(ib);
RooAbsPdf *pdf = simpdf->getPdf(catClone->getLabel());
Expand All @@ -1001,12 +1006,19 @@ cacheutils::CachingSimNLL::setup_()
pdfs_[ib] = new CachingAddNLL(catClone->getLabel(), "", pdf, data, includeZeroWeights);
params_.add(pdfs_[ib]->params(), /*silent=*/true);
catParams_.add(pdfs_[ib]->catParams(), /*silent=*/true);
++nchannels;
} else {
pdfs_[ib] = 0;
//std::cout << " bin " << ib << " (label " << catClone->getLabel() << ") has no pdf" << std::endl;
}
}

std::cout << "SimNLL created with " << nchannels << " channels, " <<
constrainPdfs_.size() << " generic constraints, " <<
constrainPdfsFast_.size() << " fast gaussian constraints, " <<
constrainPdfsFastPoisson_.size() << " fast poisson constraints, " <<
constrainPdfGroups_.size() << " fast group constraints, " <<
std::endl;
setValueDirty();
}

Expand All @@ -1026,18 +1038,21 @@ cacheutils::CachingSimNLL::evaluate() const
unsigned idx = 0;
for (std::vector<CachingAddNLL*>::const_iterator it = pdfs_.begin(), ed = pdfs_.end(); it != ed; ++it, ++idx) {
if (*it != 0) {
if (channelMasks_.size() > 0 && channelMasks_[idx]->getVal() != 0.) {
if (!channelMasks_.empty() && channelMasks_[idx]->getVal() != 0.) {
// std::cout << "Channel " << (*it)->GetName() << " will be masked as "
// << channelMasks_[idx]->GetName() << " evalutes to "
// << channelMasks_[idx]->getVal() << "\n";
continue;
}
if (!internalMasks_.empty() && !internalMasks_[idx]) {
continue;
}
double nllval = (*it)->getVal();
// what sanity check could I put here?
ret += nllval;
}
}
if (!constrainPdfs_.empty() || !constrainPdfsFast_.empty()) {
if (!maskConstraints_ && (!constrainPdfs_.empty() || !constrainPdfsFast_.empty() || !constrainPdfsFastPoisson_.empty() || !constrainPdfGroups_.empty())) {
DefaultAccumulator<double> ret2 = 0;
/// ============= GENERIC CONSTRAINTS =========
std::vector<double>::const_iterator itz = constrainZeroPoints_.begin();
Expand Down Expand Up @@ -1074,6 +1089,7 @@ cacheutils::CachingSimNLL::evaluate() const
}
ret -= ret2.sum();
}
ret += (maskingOffset_ - maskingOffsetZero_);
#ifdef TRACE_NLL_EVALS
static unsigned long _trace_ = 0; _trace_++;
if (_trace_ % 10 == 0) { putchar('.'); fflush(stdout); }
Expand Down Expand Up @@ -1174,6 +1190,7 @@ void cacheutils::CachingSimNLL::setZeroPoint() {
for (SimpleConstraintGroup & g : constrainPdfGroups_) {
g.setZeroPoint();
}
maskingOffsetZero_ = maskingOffset_;
setValueDirty();
}

Expand All @@ -1185,6 +1202,7 @@ void cacheutils::CachingSimNLL::clearZeroPoint() {
std::fill(constrainZeroPointsFast_.begin(), constrainZeroPointsFast_.end(), 0.0);
std::fill(constrainZeroPointsFastPoisson_.begin(), constrainZeroPointsFastPoisson_.end(), 0.0);
for (SimpleConstraintGroup & g : constrainPdfGroups_) g.clearZeroPoint();
maskingOffsetZero_ = 0;
setValueDirty();
}

Expand Down Expand Up @@ -1231,9 +1249,55 @@ cacheutils::CachingSimNLL::getObservables(const RooArgSet* depList, Bool_t value
RooArgSet*
cacheutils::CachingSimNLL::getParameters(const RooArgSet* depList, Bool_t stripDisconnected) const
{
RooArgSet *ret = new RooArgSet(params_);
if (!hideRooCategories_) ret->add(catParams_);
RooArgSet *ret;
if (internalMasks_.empty()) {
ret = new RooArgSet(params_);
if (!hideRooCategories_) ret->add(catParams_);
} else {
ret = new RooArgSet(activeParameters_);
if (!hideRooCategories_) ret->add(activeCatParameters_);
}
if (hideConstants_) RooStats::RemoveConstantParameters(ret);
return ret;
}

void cacheutils::CachingSimNLL::setMaskConstraints(bool flag) {
double nllBefore = evaluate();
maskConstraints_ = flag;
double nllAfter = evaluate();
maskingOffset_ += (nllBefore - nllAfter);
//printf("CachingSimNLL: setMaskConstraints(%d): nll before %.12g, nll after %.12g (diff %.12g), new maskingOffset %.12g, check = %.12g\n",
// int(flag), nllBefore, nllAfter, (nllBefore-nllAfter), maskingOffset_, evaluate() - nllBefore);
}

void cacheutils::CachingSimNLL::setMaskNonDiscreteChannels(bool mask) {
double nllBefore = evaluate();
internalMasks_.clear(); // reset
activeParameters_.removeAll();
activeCatParameters_.removeAll();
if (mask) {
internalMasks_.resize(pdfs_.size(), false);
unsigned int idx = 0;
for (std::vector<CachingAddNLL*>::const_iterator it = pdfs_.begin(), ed = pdfs_.end(); it != ed; ++it, ++idx) {
if ((*it) == 0) continue;
RooLinkedListIter iter = (*it)->catParams().iterator();
for (RooAbsArg *P = (RooAbsArg *) iter.Next(); P != 0; P = (RooAbsArg *) iter.Next()) {
RooCategory *cat = dynamic_cast<RooCategory *>(P);
if (!cat) continue;
if (cat && !cat->isConstant()) {
internalMasks_[idx] = true;
activeParameters_.add((*it)->params(), /*silent=*/true);
activeCatParameters_.add((*it)->catParams(), /*silent=*/true);
std::cout << "Enabling channel " << (*it)->GetName() << " that depends on non-const category " << cat->GetName() << std::endl;
break;
}
}
}
}
double nllAfter = evaluate();
maskingOffset_ += (nllBefore - nllAfter);
//printf("CachingSimNLL: setMaskNonDiscreteChannels(%d): nll before %.12g, nll after %.12g (diff %.12g), new maskingOffset %.12g, check = %.12g\n",
// int(mask), nllBefore, nllAfter, (nllBefore-nllAfter), maskingOffset_, evaluate() - nllBefore);

}

48 changes: 43 additions & 5 deletions src/CascadeMinimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ bool CascadeMinimizer::improve(int verbose, bool cascade, bool forceResetMinimiz
simnllbb->setAnalyticBarlowBeeston(true);
forceResetMinimizer = true;
}
if (forceResetMinimizer) remakeMinimizer();
if (forceResetMinimizer || !minimizer_.get()) remakeMinimizer();
minimizer_->setPrintLevel(verbose-1);

strategy_ = ROOT::Math::MinimizerOptions::DefaultStrategy(); // re-configure
Expand Down Expand Up @@ -171,6 +171,7 @@ bool CascadeMinimizer::improveOnce(int verbose, bool noHesse)
bool outcome = false;
double tol = ROOT::Math::MinimizerOptions::DefaultTolerance();
static int maxcalls = runtimedef::get("MINIMIZER_MaxCalls");
if (!minimizer_.get()) remakeMinimizer();
if (maxcalls) {
minimizer_->setMaxFunctionCalls(maxcalls);
minimizer_->setMaxIterations(maxcalls);
Expand Down Expand Up @@ -230,6 +231,7 @@ bool CascadeMinimizer::minos(const RooArgSet & params , int verbose ) {
utils::setAllConstant(toFreeze, false);
remakeMinimizer();
}
if (!minimizer_.get()) remakeMinimizer();
minimizer_->setPrintLevel(verbose-1); // for debugging
std::string myType(ROOT::Math::MinimizerOptions::DefaultMinimizerType());
std::string myAlgo(ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo());
Expand Down Expand Up @@ -272,6 +274,7 @@ bool CascadeMinimizer::hesse(int verbose ) {
minimizer_->setStrategy(strategy_);
improveOnce(verbose - 1);
}
if (!minimizer_.get()) remakeMinimizer();
minimizer_->setPrintLevel(verbose-1); // for debugging
std::string myType(ROOT::Math::MinimizerOptions::DefaultMinimizerType());
std::string myAlgo(ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo());
Expand Down Expand Up @@ -342,6 +345,7 @@ bool CascadeMinimizer::iterativeMinimize(double &minimumNLL,int verbose, bool ca

//if (simnll) simnll->clearZeroPoint();

TStopwatch tw; tw.Start();
utils::setAllConstant(frozen,false);

// Run one last fully floating fit to maintain RooFitResult
Expand All @@ -351,6 +355,8 @@ bool CascadeMinimizer::iterativeMinimize(double &minimumNLL,int verbose, bool ca
// unfreeze from *
freezeDiscParams(false);

tw.Stop(); if (verbose > 2) std::cout << "Done the full fit in " << tw.RealTime() << std::endl;

return ret;
}

Expand All @@ -369,7 +375,7 @@ bool CascadeMinimizer::minimize(int verbose, bool cascade)
bool doMultipleMini = (CascadeMinimizerGlobalConfigs::O().pdfCategories.getSize()>0);
if (runtimedef::get(std::string("MINIMIZER_skipDiscreteIterations"))) doMultipleMini=false;
// if ( doMultipleMini ) preFit_ = 1;

if (!minimizer_.get()) remakeMinimizer();
minimizer_->setPrintLevel(verbose-2);
minimizer_->setStrategy(strategy_);

Expand Down Expand Up @@ -462,6 +468,8 @@ bool CascadeMinimizer::minimize(int verbose, bool cascade)
bool CascadeMinimizer::multipleMinimize(const RooArgSet &reallyCleanParameters, bool& ret, double& minimumNLL, int verbose, bool cascade,int mode, std::vector<std::vector<bool> >&contributingIndeces){
static bool freezeDisassParams = runtimedef::get(std::string("MINIMIZER_freezeDisassociatedParams"));
static bool hideConstants = freezeDisassParams && runtimedef::get(std::string("MINIMIZER_multiMin_hideConstants"));
static bool maskConstraints = freezeDisassParams && runtimedef::get(std::string("MINIMIZER_multiMin_maskConstraints"));
static int maskChannels = freezeDisassParams ? runtimedef::get(std::string("MINIMIZER_multiMin_maskChannels")) : 0;
cacheutils::CachingSimNLL *simnll = dynamic_cast<cacheutils::CachingSimNLL *>(&nll_);

//RooTrace::active(true);
Expand Down Expand Up @@ -524,9 +532,13 @@ bool CascadeMinimizer::multipleMinimize(const RooArgSet &reallyCleanParameters,
RooArgSet snap;
params->snapshot(snap);

if (maskChannels && simnll) {
simnll->setMaskNonDiscreteChannels(true);
}
if (hideConstants && simnll) {
simnll->setHideConstants(true);
remakeMinimizer();
if (maskConstraints) simnll->setMaskConstraints(true);
minimizer_.reset(); // will be recreated when needed by whoever needs it
}

std::vector<std::vector<int> > myCombos;
Expand Down Expand Up @@ -555,13 +567,14 @@ bool CascadeMinimizer::multipleMinimize(const RooArgSet &reallyCleanParameters,
std::vector<std::vector<int> >::iterator my_it = myCombos.begin();
if (mode!=0) my_it++; // already did the best fit case

TStopwatch tw; tw.Start();

int fitCounter = 0;
for (;my_it!=myCombos.end(); my_it++){

bool isValidCombo = true;

int pdfIndex=0;
int pdfIndex=0, changedIndex = -1;
// Set the current indeces;
std::vector<int> cit = *my_it;
for (std::vector<int>::iterator it = cit.begin();
Expand All @@ -571,6 +584,7 @@ bool CascadeMinimizer::multipleMinimize(const RooArgSet &reallyCleanParameters,
if (!isValidCombo ) /*&& runShortCombinations)*/ continue;

fPdf = (RooCategory*) pdfCategoryIndeces.at(pdfIndex);
if (fPdf->getIndex() != *it) changedIndex = pdfIndex;
fPdf->setIndex(*it);
pdfIndex++;
}
Expand All @@ -587,6 +601,10 @@ bool CascadeMinimizer::multipleMinimize(const RooArgSet &reallyCleanParameters,

if (fitCounter>0) params->assignValueOnly(reallyCleanParameters); // no need to reset from 0'th fit

if (maskChannels == 2 && simnll) {
for (int id=0;id<numIndeces;id++) ((RooCategory*)(pdfCategoryIndeces.at(id)))->setConstant(id != changedIndex && changedIndex != -1);
simnll->setMaskNonDiscreteChannels(true);
}
// Remove parameters which are not associated to the current PDF (only works if using --X-rtd MINIMIZER_freezeDisassociatedParams)
freezeDiscParams(true);

Expand All @@ -597,13 +615,21 @@ bool CascadeMinimizer::multipleMinimize(const RooArgSet &reallyCleanParameters,

ret = improve(verbose, cascade, freezeDisassParams);

if (maskChannels == 2 && simnll) {
for (int id=0;id<numIndeces;id++) ((RooCategory*)(pdfCategoryIndeces.at(id)))->setConstant(false);
simnll->setMaskNonDiscreteChannels(false);
}
freezeDiscParams(false);


fitCounter++;
double thisNllValue = nll_.getVal();

if ( thisNllValue < minimumNLL ){
// Now we insert the correction !
if (verbose>2) {
std::cout << " .... Found a better fit: new NLL = " << thisNllValue << " (improvement: " << (thisNllValue-minimumNLL) << std::endl;
}
minimumNLL = thisNllValue;
//std::cout << " .... Found a better fit! hoorah! " << minimumNLL << std::endl;
snap.assignValueOnly(*params);
Expand All @@ -612,6 +638,11 @@ bool CascadeMinimizer::multipleMinimize(const RooArgSet &reallyCleanParameters,
if (bestIndeces[id] != ((RooCategory*)(pdfCategoryIndeces.at(id)))->getIndex() ) newDiscreteMinimum = true;
bestIndeces[id]=((RooCategory*)(pdfCategoryIndeces.at(id)))->getIndex();
}
if (verbose>2 && newDiscreteMinimum) {
std::cout << " .... Better fit corresponds to a new set of indices :=" ;
for (int id=0;id<numIndeces;id++) { std::cout << " " << bestIndeces[id]; }
std::cout << std::endl;
}
}

// FIXME this should be made configurable!
Expand Down Expand Up @@ -654,11 +685,18 @@ bool CascadeMinimizer::multipleMinimize(const RooArgSet &reallyCleanParameters,
runtimedef::set("MINIMIZER_analytic", currentBarlowBeeston);
ROOT::Math::MinimizerOptions::SetDefaultStrategy(backupStrategy);

tw.Stop(); if (verbose > 2) std::cout << "Done " << myCombos.size() << " combinations in " << tw.RealTime() << " s. New discrete minimum? " << newDiscreteMinimum << std::endl;

if (maskChannels && simnll) {
simnll->setMaskNonDiscreteChannels(false);
}
if (hideConstants && simnll) {
simnll->setHideConstants(false);
remakeMinimizer();
if (maskConstraints) simnll->setMaskConstraints(false);
minimizer_.reset(); // will be recreated when needed by whoever needs it
}


return newDiscreteMinimum;
}

Expand Down

0 comments on commit a6950ba

Please sign in to comment.