From 2fd2c1140561e2b11320466462a13976a0f19d7e Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Fri, 9 Jun 2023 17:31:57 -0500 Subject: [PATCH 01/24] Working CMSInterferenceFunc --- Makefile | 5 +- interface/CMSInterferenceFunc.h | 66 +++++++++++++++++ src/CMSInterferenceFunc.cxx | 123 ++++++++++++++++++++++++++++++++ src/classes.h | 1 + src/classes_def.xml | 4 ++ 5 files changed, 197 insertions(+), 2 deletions(-) create mode 100644 interface/CMSInterferenceFunc.h create mode 100644 src/CMSInterferenceFunc.cxx diff --git a/Makefile b/Makefile index c5d9a120597..51ce762aabb 100644 --- a/Makefile +++ b/Makefile @@ -53,11 +53,12 @@ DICTNAME=$(LIBNAME)_xr # Linker and flags ------------------------------------------------------------- LD = $(shell root-config --ld) ROOTLDFLAGS = $(shell root-config --ldflags) +ROOTLIBDIR = $(shell root-config --libdir) # OS x specific linkage DARWIN := $(shell uname|grep Darwin) ifdef DARWIN -LDFLAGS = $(ROOTLDFLAGS) -shared -install_name @rpath/$(SONAME) -fPIC -EXELDFLAGS = -Wl,-rpath,'@executable_path/../lib' +LDFLAGS = $(ROOTLDFLAGS) -g -shared -install_name @rpath/$(SONAME) -fPIC +EXELDFLAGS = -Wl,-rpath,'@executable_path/../lib' -Wl,-rpath,$(ROOTLIBDIR) else LDFLAGS = $(ROOTLDFLAGS) -shared -Wl,-soname,$(SONAME) -Wl,-E -Wl,-z,defs -fPIC EXELDFLAGS = diff --git a/interface/CMSInterferenceFunc.h b/interface/CMSInterferenceFunc.h new file mode 100644 index 00000000000..02391a680f2 --- /dev/null +++ b/interface/CMSInterferenceFunc.h @@ -0,0 +1,66 @@ +#ifndef CMSInterferenceFunc_h +#define CMSInterferenceFunc_h +#include + +#include "RooAbsReal.h" +#include "RooRealVar.h" +#include "RooRealProxy.h" +#include "RooListProxy.h" +#include "RooChangeTracker.h" + +#include "SimpleCacheSentry.h" + +class _InterferenceEval; + +class CMSInterferenceFunc : public RooAbsReal { + public: + CMSInterferenceFunc(); + CMSInterferenceFunc(CMSInterferenceFunc const& other, const char* name = 0); + /* + * For a coefficients list of length n and edges array of length b+1, + * the binscaling nested vector should have b entries (for b bins) with + * each being of length n*(n+1)/2, corresponding to the upper triangular + * elements of the scaling matrix, i.e. (m_00, m_01, m_02, ..., m_11, m_12, ...) + */ + CMSInterferenceFunc( + const char* name, + const char* title, + RooRealVar& x, + const RooArgList& coefficients, + const std::vector& edges, + const std::vector> binscaling + ); + virtual ~CMSInterferenceFunc(); + + virtual TObject* clone(const char* newname) const override { + return new CMSInterferenceFunc(*this, newname); + }; + + void printMultiline( + std::ostream& os, Int_t contents, Bool_t verbose, TString indent + ) const override; + + // public because it is called on deserializing, in classes_def.xml + void initialize() const; + + // batch accessor for CMSHistFunc / CMSHistSum + const std::vector& batchGetBinValues() const; + + protected: + RooRealProxy x_; + RooListProxy coefficients_; + std::vector edges_; + std::vector> binscaling_; + + mutable RooChangeTracker sentry_; //! + mutable std::unique_ptr<_InterferenceEval> evaluator_; //! + + double evaluate() const override; + + private: + void updateCache() const; + + ClassDefOverride(CMSInterferenceFunc, 1) +}; + +#endif // CMSInterferenceFunc_h diff --git a/src/CMSInterferenceFunc.cxx b/src/CMSInterferenceFunc.cxx new file mode 100644 index 00000000000..945089e7a8e --- /dev/null +++ b/src/CMSInterferenceFunc.cxx @@ -0,0 +1,123 @@ +#include "../interface/CMSInterferenceFunc.h" +#include "TBuffer.h" +#include + +class _InterferenceEval { + public: + _InterferenceEval(std::vector> scaling_in, size_t ncoef) { + binscaling_.reserve(scaling_in.size()); + for(const auto& bin : scaling_in) { + Eigen::MatrixXd mat(ncoef, ncoef); + size_t k=0; + for(size_t i=0; i& getValues() const { return values_; }; + + private: + std::vector binscaling_; + Eigen::VectorXd coefficients_; + std::vector values_; +}; + + +CMSInterferenceFunc::CMSInterferenceFunc() {}; + +CMSInterferenceFunc::CMSInterferenceFunc( + CMSInterferenceFunc const& other, const char* name + ) : + RooAbsReal(other, name), x_("x", this, other.x_), + coefficients_("coefficients", this, other.coefficients_), + edges_(other.edges_), binscaling_(other.binscaling_), + sentry_(other.sentry_, name) +{ + initialize(); +} + +CMSInterferenceFunc::CMSInterferenceFunc( + const char* name, const char* title, RooRealVar& x, + RooArgList const& coefficients, const std::vector& edges, + const std::vector> binscaling + ) : + RooAbsReal(name, title), x_("x", "", this, x), + coefficients_("coefficients", "", this), + edges_(edges), binscaling_(binscaling), + sentry_(TString(name) + "_sentry", "", coefficients) +{ + coefficients_.add(coefficients); + initialize(); +} + +CMSInterferenceFunc::~CMSInterferenceFunc() = default; + +void CMSInterferenceFunc::printMultiline( + std::ostream& os, Int_t contents, Bool_t verbose, TString indent + ) const { + RooAbsReal::printMultiline(os, contents, verbose, indent); + os << ">> Sentry: " << sentry_.hasChanged(false) << "\n"; + sentry_.Print("v"); +} + +void CMSInterferenceFunc::initialize() const { + // take the opportunity to validate persistent data + size_t nbins = edges_.size() - 1; + size_t ncoef = coefficients_.getSize(); + + if ( binscaling_.size() != nbins ) { + throw std::invalid_argument( + "Number of bins as determined from bin edges (" + + std::to_string(nbins) + ") does not match bin" + " scaling array (" + std::to_string(binscaling_.size()) + ")" + ); + } + for (size_t i=0; i < nbins; ++i) { + if ( binscaling_[i].size() != ncoef*(ncoef+1)/2 ) { + throw std::invalid_argument( + "Length of bin scaling matrix upper triangle for bin " + std::to_string(i) + + " (" + std::to_string(binscaling_[i].size() ) + ") is not consistent" + + " with the length of the coefficient array (" + std::to_string(ncoef) + ")" + ); + } + } + + evaluator_ = std::make_unique<_InterferenceEval>(binscaling_, ncoef); +} + +void CMSInterferenceFunc::updateCache() const { + for (int i=0; i < coefficients_.getSize(); ++i) { + evaluator_->setCoefficient(i, dynamic_cast(coefficients_.at(i))->getVal()); + } + evaluator_->computeValues(); +} + +double CMSInterferenceFunc::evaluate() const { + if ( sentry_.hasChanged(true) ) updateCache(); + + auto it = std::upper_bound(std::begin(edges_), std::end(edges_), x_->getVal()); + if ( (it == std::begin(edges_)) or (it == std::end(edges_)) ) { + return 0.0; + } + size_t idx = std::distance(std::begin(edges_), it) - 1; + return evaluator_->getValues()[idx]; +} + +const std::vector& CMSInterferenceFunc::batchGetBinValues() const { + // we don't really expect the cache to be valid, as upstream callers are + // managing their own and calling this only when dirty, but let's check anyway + if ( not sentry_.hasChanged(true) ) updateCache(); + return evaluator_->getValues(); +} + diff --git a/src/classes.h b/src/classes.h index fc5ece7e62a..41ad96a356a 100644 --- a/src/classes.h +++ b/src/classes.h @@ -66,3 +66,4 @@ #include "HiggsAnalysis/CombinedLimit/interface/RooCheapProduct.h" #include "HiggsAnalysis/CombinedLimit/interface/CMSHggFormula.h" #include "HiggsAnalysis/CombinedLimit/interface/SimpleProdPdf.h" +#include "HiggsAnalysis/CombinedLimit/interface/CMSInterferenceFunc.h" diff --git a/src/classes_def.xml b/src/classes_def.xml index ae303590427..43a07cebb72 100644 --- a/src/classes_def.xml +++ b/src/classes_def.xml @@ -221,4 +221,8 @@ + + + initialize();]]> + From 8c494e800be80c78994b672c36c26271564eaebc Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 13 Jun 2023 10:25:50 -0500 Subject: [PATCH 02/24] Give up on RooChangeTracker, add hook in CMSHistFunc --- interface/CMSHistFunc.h | 10 +++++++-- interface/CMSInterferenceFunc.h | 9 +++----- src/CMSHistFunc.cc | 37 ++++++++++++++++++++++++++++++--- src/CMSInterferenceFunc.cxx | 22 +++++++++++++------- src/classes_def.xml | 3 --- 5 files changed, 59 insertions(+), 22 deletions(-) diff --git a/interface/CMSHistFunc.h b/interface/CMSHistFunc.h index 53c09837458..2c8ee8b509a 100644 --- a/interface/CMSHistFunc.h +++ b/interface/CMSHistFunc.h @@ -15,6 +15,7 @@ #include "CMSHistV.h" #include "FastTemplate_Old.h" #include "SimpleCacheSentry.h" +#include "CMSInterferenceFunc.h" class CMSHistFuncWrapper; @@ -85,7 +86,7 @@ class CMSHistFunc : public RooAbsReal { virtual TObject* clone(const char* newname) const { return new CMSHistFunc(*this, newname); } - virtual ~CMSHistFunc() {} + virtual ~CMSHistFunc(); void addHorizontalMorph(RooAbsReal& hvar, TVectorD hpoints); @@ -148,6 +149,8 @@ class CMSHistFunc : public RooAbsReal { friend class CMSHistV; friend class CMSHistSum; + // TODO: allow any class that implements hasChanged() and batchGetBinValues() + void injectExternalMorph(CMSInterferenceFunc& morph); /* – RooAbsArg::setVerboseEval(Int_t level) • Level 0 – No messages @@ -190,6 +193,9 @@ class CMSHistFunc : public RooAbsReal { static bool enable_fast_vertical_; //! not to be serialized + // This is an "optional proxy", i.e. a list with either zero or one entry + RooListProxy external_morph_; + private: void initialize() const; void setGlobalCache() const; @@ -214,7 +220,7 @@ class CMSHistFunc : public RooAbsReal { void applyRebin() const; - ClassDef(CMSHistFunc, 1) + ClassDef(CMSHistFunc, 2) }; diff --git a/interface/CMSInterferenceFunc.h b/interface/CMSInterferenceFunc.h index 02391a680f2..4d8525b5027 100644 --- a/interface/CMSInterferenceFunc.h +++ b/interface/CMSInterferenceFunc.h @@ -6,8 +6,6 @@ #include "RooRealVar.h" #include "RooRealProxy.h" #include "RooListProxy.h" -#include "RooChangeTracker.h" - #include "SimpleCacheSentry.h" class _InterferenceEval; @@ -40,10 +38,8 @@ class CMSInterferenceFunc : public RooAbsReal { std::ostream& os, Int_t contents, Bool_t verbose, TString indent ) const override; - // public because it is called on deserializing, in classes_def.xml - void initialize() const; - // batch accessor for CMSHistFunc / CMSHistSum + bool hasChanged() const { return !sentry_.good(); }; const std::vector& batchGetBinValues() const; protected: @@ -52,12 +48,13 @@ class CMSInterferenceFunc : public RooAbsReal { std::vector edges_; std::vector> binscaling_; - mutable RooChangeTracker sentry_; //! + mutable SimpleCacheSentry sentry_; //! mutable std::unique_ptr<_InterferenceEval> evaluator_; //! double evaluate() const override; private: + void initialize() const; void updateCache() const; ClassDefOverride(CMSInterferenceFunc, 1) diff --git a/src/CMSHistFunc.cc b/src/CMSHistFunc.cc index 6c7463d7943..a05a5c1854f 100644 --- a/src/CMSHistFunc.cc +++ b/src/CMSHistFunc.cc @@ -48,7 +48,8 @@ CMSHistFunc::CMSHistFunc(const char* name, const char* title, RooRealVar& x, vtype_(VerticalSetting::QuadLinear), divide_by_width_(divide_by_width), vsmooth_par_(1.0), - fast_vertical_(false) { + fast_vertical_(false), + external_morph_("external_morph", "", this) { if (divide_by_width_) { for (unsigned i = 0; i < cache_.size(); ++i) { cache_[i] /= cache_.GetWidth(i); @@ -120,16 +121,25 @@ CMSHistFunc::CMSHistFunc(CMSHistFunc const& other, const char* name) vtype_(other.vtype_), divide_by_width_(other.divide_by_width_), vsmooth_par_(other.vsmooth_par_), - fast_vertical_(false) { + fast_vertical_(false), + external_morph_("external_morph", this, other.external_morph_) +{ // initialize(); } +CMSHistFunc::~CMSHistFunc() = default; + void CMSHistFunc::initialize() const { if (initialized_) return; vmorph_sentry_.SetName(TString(this->GetName()) + "_vmorph_sentry"); hmorph_sentry_.SetName(TString(this->GetName()) + "_hmorph_sentry"); hmorph_sentry_.addVars(hmorphs_); vmorph_sentry_.addVars(vmorphs_); + if ( external_morph_.getSize() ) { + RooArgSet* deps = external_morph_.at(0)->getParameters({*x_}); + vmorph_sentry_.addVars(*deps); + delete deps; + } hmorph_sentry_.setValueDirty(); vmorph_sentry_.setValueDirty(); vmorphs_vec_.resize(vmorphs_.getSize()); @@ -313,7 +323,8 @@ void CMSHistFunc::updateCache() const { if (mcache_.size() == 0) mcache_.resize(storage_.size()); } - if (step1) { + bool external_morph_updated = (external_morph_.getSize() && dynamic_cast(external_morph_.at(0))->hasChanged()); + if (step1 || external_morph_updated) { fast_vertical_ = false; } @@ -551,6 +562,16 @@ void CMSHistFunc::updateCache() const { } if (!fast_vertical_) { mcache_[idx].step2 = mcache_[idx].step1; + if ( external_morph_.getSize() ) { +#if HFVERBOSE > 0 + std::cout << "Template before external morph update:" << mcache_[idx].step2.Integral() << "\n"; + mcache_[idx].step2.Dump(); +#endif + auto& extdata = dynamic_cast(external_morph_.at(0))->batchGetBinValues(); + for(size_t i=0; i> Sentry: " << sentry_.hasChanged(false) << "\n"; + os << ">> Sentry: " << (sentry_.good() ? "clean" : "dirty") << "\n"; sentry_.Print("v"); } @@ -93,18 +91,25 @@ void CMSInterferenceFunc::initialize() const { } } + sentry_.SetName(TString(GetName()) + "_sentry"); + sentry_.addVars(coefficients_); + sentry_.setValueDirty(); evaluator_ = std::make_unique<_InterferenceEval>(binscaling_, ncoef); } void CMSInterferenceFunc::updateCache() const { for (int i=0; i < coefficients_.getSize(); ++i) { - evaluator_->setCoefficient(i, dynamic_cast(coefficients_.at(i))->getVal()); + auto* coef = dynamic_cast(coefficients_.at(i)); + if ( coef == nullptr ) throw std::runtime_error("Lost coef!"); + evaluator_->setCoefficient(i, coef->getVal()); } evaluator_->computeValues(); + sentry_.reset(); } double CMSInterferenceFunc::evaluate() const { - if ( sentry_.hasChanged(true) ) updateCache(); + if ( not evaluator_ ) initialize(); + if ( not sentry_.good() ) updateCache(); auto it = std::upper_bound(std::begin(edges_), std::end(edges_), x_->getVal()); if ( (it == std::begin(edges_)) or (it == std::end(edges_)) ) { @@ -117,7 +122,8 @@ double CMSInterferenceFunc::evaluate() const { const std::vector& CMSInterferenceFunc::batchGetBinValues() const { // we don't really expect the cache to be valid, as upstream callers are // managing their own and calling this only when dirty, but let's check anyway - if ( not sentry_.hasChanged(true) ) updateCache(); + if ( not evaluator_ ) initialize(); + if ( not sentry_.good() ) updateCache(); return evaluator_->getValues(); } diff --git a/src/classes_def.xml b/src/classes_def.xml index 43a07cebb72..61ff7496828 100644 --- a/src/classes_def.xml +++ b/src/classes_def.xml @@ -222,7 +222,4 @@ - - initialize();]]> - From 6619f7eb2904df94e8d13bbc9a2189973721898a Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 13 Jun 2023 11:09:02 -0500 Subject: [PATCH 03/24] Add test to exercise functionality --- test/test_interference.py | 134 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100755 test/test_interference.py diff --git a/test/test_interference.py b/test/test_interference.py new file mode 100755 index 00000000000..29eee08e4f1 --- /dev/null +++ b/test/test_interference.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +import ROOT +import numpy as np +import subprocess + + +def array2vector_2d(array): + assert len(array.shape) == 2 + out = ROOT.std.vector["std::vector"]() + out.reserve(len(array)) + for row in array: + out.push_back(row) + return out + + +def to_TH1(name, array): + h = ROOT.TH1D(name, name, 20, 250, 1200) + for i, val in enumerate(array): + h.SetBinContent(i+1, val) + return h + + +# make some shapes +fout = ROOT.TFile("shapes.root", "recreate") +nom = 100 * np.array([0.14253603, 0.21781641, 0.22698837, 0.19603483, 0.19259561, 0.15552859, 0.13909682, 0.09438712, 0.08521593, 0.06878416, 0.06419854, 0.04318116, 0.04776676, 0.03057073, 0.02866007, 0.02292805, 0.02139951, 0.02063524, 0.01222829, 0.01337466]) +obs = nom.copy() + +hnom = to_TH1("VBFHH", nom) +for i, val in enumerate(nom): + hnom.SetBinError(i+1, np.sqrt(val)*0.1) +hnom.Write() +to_TH1("VBFHH_jesUp", nom*np.linspace(0.95, 1.05, 20)).Write() +to_TH1("VBFHH_jesDown", nom/np.linspace(0.95, 1.05, 20)).Write() + +nom = 100*np.exp(-np.linspace(0, 1, 20)) +obs += nom +hnom = to_TH1("background", nom) +for i, val in enumerate(nom): + hnom.SetBinError(i+1, np.sqrt(val)*0.1) +hnom.Write() +to_TH1("background_jesUp", nom*np.linspace(0.95, 1.05, 20)).Write() +to_TH1("background_jesDown", nom/np.linspace(0.95, 1.05, 20)).Write() + +to_TH1("data_obs", np.round(obs)).Write() + +# write a card +with open("card.txt", "w") as fout: + fout.write("""\ +Combination of card.txt +imax 1 number of bins +jmax 1 number of processes minus 1 +kmax 3 number of nuisance parameters +---------------------------------------------------------------------------------------------------------------------------------- +shapes * ch1 shapes.root $PROCESS $PROCESS_$SYSTEMATIC +---------------------------------------------------------------------------------------------------------------------------------- +bin ch1 +observation -1 +---------------------------------------------------------------------------------------------------------------------------------- +bin ch1 ch1 +process VBFHH background +process 0 1 +rate -1 -1 +---------------------------------------------------------------------------------------------------------------------------------- +bgnorm lnN - 1.3 +jes shape 1.0 1.0 +lumi lnN 1.02 1.02 +* autoMCStats 0 +""") + +subprocess.call("text2workspace.py card.txt".split(" ")) +fcard = ROOT.TFile.Open("card.root") +w = fcard.Get("w") + +scaling = array2vector_2d( + np.array( + [ + [ 3.303536746664150e00, -8.541709820382220e00, 4.235348320712800e00, 2.296464188467882e01, -1.107996258835088e01, 5.504469544697623e00, ], + [ 2.206443321428910e00, -7.076836641962523e00, 4.053185685866683e00, 2.350989689214267e01, -1.308569222837996e01, 7.502346155380032e00, ], + [ 2.323314512827915e00, -9.040565356058327e00, 6.135410429832603e00, 3.597836755817654e01, -2.387500375686657e01, 1.625863529518014e01, ], + [ 5.925805332888091e-01, -3.139640204484572e00, 1.976422909655008e00, 1.713589230378096e01, -1.051500934722200e01, 6.627980447033357e00, ], + [ 3.505170703269003e-01, -2.237551781735236e00, 1.483246407331564e00, 1.484170437843385e01, -9.493221195626012e00, 6.302831691298613e00, ], + [ 2.334740816002986e-01, -1.799815305424720e00, 1.225734691240980e00, 1.440275907010786e01, -9.473328823485497e00, 6.458585723630323e00, ], + [ 1.959374052725543e-01, -1.757624939190617e00, 1.257071800464856e00, 1.626453309470772e01, -1.127433100208976e01, 8.089297781650776e00, ], + [ 1.865040295194290e-01, -1.822069966644771e00, 1.391201452068932e00, 1.849884116334009e01, -1.360944960795888e01, 1.039529105220993e01, ], + [ 9.726648808697150e-02, -1.169327097322687e00, 8.635283997541450e-01, 1.459008882076805e01, -1.037926824643990e01, 7.682778579161866e00, ], + [ 8.780552015079510e-02, -1.156467907936071e00, 9.032722960981284e-01, 1.589008007570035e01, -1.189269332401088e01, 9.313892275846490e00, ], + [ 2.006114827320551e-01, -2.776793529232688e00, 2.335437756631023e00, 3.912099794229794e01, -3.232054661687562e01, 2.720219535392458e01, ], + [ 5.179066270217598e-02, -8.550833654170061e-01, 6.753041305320768e-01, 1.482948643635128e01, -1.117147343869588e01, 8.821228248108168e00, ], + [ 5.095452842967559e-02, -9.482107499527080e-01, 7.561787601252029e-01, 1.825692153880631e01, -1.408060600559859e01, 1.123739992361621e01, ], + [ 1.801729907958822e-02, -4.102710173962256e-01, 2.950547079487041e-01, 9.828612567782274e00, -6.724075992501888e00, 4.831954737036956e00, ], + [ 2.762233787081839e-02, -6.400852678490596e-01, 5.127706114146487e-01, 1.546123935330864e01, -1.188156027745066e01, 9.528888176590677e00, ], + [ 3.165499001015300e-02, -7.790988142691099e-01, 6.486071576135261e-01, 1.986193598270219e01, -1.597020317079789e01, 1.330779868219462e01, ], + [ 1.316620750610005e-02, -3.821583465978776e-01, 2.914985575363581e-01, 1.167177892863827e01, -8.480579644763935e00, 6.457533731506537e00, ], + [ 2.886802887767344e-02, -8.369930524359994e-01, 7.103796160970196e-01, 2.491187028333814e01, -2.057801184441048e01, 1.746851224928310e01, ], + [ 2.930989281275648e-02, -9.242683589392606e-01, 7.918145320034853e-01, 2.981526833971985e01, -2.501001674094063e01, 2.144036290322020e01, ], + [ 5.160360892358020e-02, -1.673006024492507e00, 1.526329404862879e00, 5.506880032083917e01, -4.949344845658728e01, 4.515984622267106e01, ], + ] + ) +) + +kv, k2v, kl = ( + w.factory("kv[1, 0, 2]"), + w.factory("k2v[1, 0, 2]"), + w.factory("kl[1, 0, 2]"), +) + +w.Import( + ROOT.CMSInterferenceFunc( + "ch1_vbfhh_morph", + "", + w.var("CMS_th1x"), + ROOT.RooArgList( + w.factory("expr('@0*@1', kv, kl)"), + w.factory("expr('@0*@0', kv)"), + k2v, + ), + list(range(21)), + scaling, + ), + ROOT.RooFit.RecycleConflictNodes() +) +func = w.function("ch1_vbfhh_morph") +print(func.getVal()) + +histfunc = w.function("shapeSig_ch1_VBFHH_morph") +histfunc.injectExternalMorph(func) + +fout = ROOT.TFile("card_morph.root", "recreate") +w.Write() +fout.Close() + +subprocess.call( + "combine -M MultiDimFit card_morph.root --redefineSignalPOIs kv,k2v,kl --freezeParameters r --setParameters r=1,kv=1,k2v=1,kl=1 -t 100".split(" ") +) From a62418d2ebde5395765efa80a0322eab2f5a64f7 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 13 Jun 2023 11:18:07 -0500 Subject: [PATCH 04/24] Lint --- test/test_interference.py | 249 +++++++++++++++++++++++++++++++------- 1 file changed, 208 insertions(+), 41 deletions(-) diff --git a/test/test_interference.py b/test/test_interference.py index 29eee08e4f1..dee90307dd9 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -16,36 +16,60 @@ def array2vector_2d(array): def to_TH1(name, array): h = ROOT.TH1D(name, name, 20, 250, 1200) for i, val in enumerate(array): - h.SetBinContent(i+1, val) + h.SetBinContent(i + 1, val) return h # make some shapes fout = ROOT.TFile("shapes.root", "recreate") -nom = 100 * np.array([0.14253603, 0.21781641, 0.22698837, 0.19603483, 0.19259561, 0.15552859, 0.13909682, 0.09438712, 0.08521593, 0.06878416, 0.06419854, 0.04318116, 0.04776676, 0.03057073, 0.02866007, 0.02292805, 0.02139951, 0.02063524, 0.01222829, 0.01337466]) +nom = 100 * np.array( + [ + 0.14253603, + 0.21781641, + 0.22698837, + 0.19603483, + 0.19259561, + 0.15552859, + 0.13909682, + 0.09438712, + 0.08521593, + 0.06878416, + 0.06419854, + 0.04318116, + 0.04776676, + 0.03057073, + 0.02866007, + 0.02292805, + 0.02139951, + 0.02063524, + 0.01222829, + 0.01337466, + ] +) obs = nom.copy() hnom = to_TH1("VBFHH", nom) for i, val in enumerate(nom): - hnom.SetBinError(i+1, np.sqrt(val)*0.1) + hnom.SetBinError(i + 1, np.sqrt(val) * 0.1) hnom.Write() -to_TH1("VBFHH_jesUp", nom*np.linspace(0.95, 1.05, 20)).Write() -to_TH1("VBFHH_jesDown", nom/np.linspace(0.95, 1.05, 20)).Write() +to_TH1("VBFHH_jesUp", nom * np.linspace(0.95, 1.05, 20)).Write() +to_TH1("VBFHH_jesDown", nom / np.linspace(0.95, 1.05, 20)).Write() -nom = 100*np.exp(-np.linspace(0, 1, 20)) +nom = 100 * np.exp(-np.linspace(0, 1, 20)) obs += nom hnom = to_TH1("background", nom) for i, val in enumerate(nom): - hnom.SetBinError(i+1, np.sqrt(val)*0.1) + hnom.SetBinError(i + 1, np.sqrt(val) * 0.1) hnom.Write() -to_TH1("background_jesUp", nom*np.linspace(0.95, 1.05, 20)).Write() -to_TH1("background_jesDown", nom/np.linspace(0.95, 1.05, 20)).Write() +to_TH1("background_jesUp", nom * np.linspace(0.95, 1.05, 20)).Write() +to_TH1("background_jesDown", nom / np.linspace(0.95, 1.05, 20)).Write() to_TH1("data_obs", np.round(obs)).Write() # write a card with open("card.txt", "w") as fout: - fout.write("""\ + fout.write( + """\ Combination of card.txt imax 1 number of bins jmax 1 number of processes minus 1 @@ -53,19 +77,20 @@ def to_TH1(name, array): ---------------------------------------------------------------------------------------------------------------------------------- shapes * ch1 shapes.root $PROCESS $PROCESS_$SYSTEMATIC ---------------------------------------------------------------------------------------------------------------------------------- -bin ch1 -observation -1 +bin ch1 +observation -1 ---------------------------------------------------------------------------------------------------------------------------------- -bin ch1 ch1 +bin ch1 ch1 process VBFHH background -process 0 1 -rate -1 -1 +process 0 1 +rate -1 -1 ---------------------------------------------------------------------------------------------------------------------------------- -bgnorm lnN - 1.3 -jes shape 1.0 1.0 -lumi lnN 1.02 1.02 +bgnorm lnN - 1.3 +jes shape 1.0 1.0 +lumi lnN 1.02 1.02 * autoMCStats 0 -""") +""" + ) subprocess.call("text2workspace.py card.txt".split(" ")) fcard = ROOT.TFile.Open("card.root") @@ -74,26 +99,166 @@ def to_TH1(name, array): scaling = array2vector_2d( np.array( [ - [ 3.303536746664150e00, -8.541709820382220e00, 4.235348320712800e00, 2.296464188467882e01, -1.107996258835088e01, 5.504469544697623e00, ], - [ 2.206443321428910e00, -7.076836641962523e00, 4.053185685866683e00, 2.350989689214267e01, -1.308569222837996e01, 7.502346155380032e00, ], - [ 2.323314512827915e00, -9.040565356058327e00, 6.135410429832603e00, 3.597836755817654e01, -2.387500375686657e01, 1.625863529518014e01, ], - [ 5.925805332888091e-01, -3.139640204484572e00, 1.976422909655008e00, 1.713589230378096e01, -1.051500934722200e01, 6.627980447033357e00, ], - [ 3.505170703269003e-01, -2.237551781735236e00, 1.483246407331564e00, 1.484170437843385e01, -9.493221195626012e00, 6.302831691298613e00, ], - [ 2.334740816002986e-01, -1.799815305424720e00, 1.225734691240980e00, 1.440275907010786e01, -9.473328823485497e00, 6.458585723630323e00, ], - [ 1.959374052725543e-01, -1.757624939190617e00, 1.257071800464856e00, 1.626453309470772e01, -1.127433100208976e01, 8.089297781650776e00, ], - [ 1.865040295194290e-01, -1.822069966644771e00, 1.391201452068932e00, 1.849884116334009e01, -1.360944960795888e01, 1.039529105220993e01, ], - [ 9.726648808697150e-02, -1.169327097322687e00, 8.635283997541450e-01, 1.459008882076805e01, -1.037926824643990e01, 7.682778579161866e00, ], - [ 8.780552015079510e-02, -1.156467907936071e00, 9.032722960981284e-01, 1.589008007570035e01, -1.189269332401088e01, 9.313892275846490e00, ], - [ 2.006114827320551e-01, -2.776793529232688e00, 2.335437756631023e00, 3.912099794229794e01, -3.232054661687562e01, 2.720219535392458e01, ], - [ 5.179066270217598e-02, -8.550833654170061e-01, 6.753041305320768e-01, 1.482948643635128e01, -1.117147343869588e01, 8.821228248108168e00, ], - [ 5.095452842967559e-02, -9.482107499527080e-01, 7.561787601252029e-01, 1.825692153880631e01, -1.408060600559859e01, 1.123739992361621e01, ], - [ 1.801729907958822e-02, -4.102710173962256e-01, 2.950547079487041e-01, 9.828612567782274e00, -6.724075992501888e00, 4.831954737036956e00, ], - [ 2.762233787081839e-02, -6.400852678490596e-01, 5.127706114146487e-01, 1.546123935330864e01, -1.188156027745066e01, 9.528888176590677e00, ], - [ 3.165499001015300e-02, -7.790988142691099e-01, 6.486071576135261e-01, 1.986193598270219e01, -1.597020317079789e01, 1.330779868219462e01, ], - [ 1.316620750610005e-02, -3.821583465978776e-01, 2.914985575363581e-01, 1.167177892863827e01, -8.480579644763935e00, 6.457533731506537e00, ], - [ 2.886802887767344e-02, -8.369930524359994e-01, 7.103796160970196e-01, 2.491187028333814e01, -2.057801184441048e01, 1.746851224928310e01, ], - [ 2.930989281275648e-02, -9.242683589392606e-01, 7.918145320034853e-01, 2.981526833971985e01, -2.501001674094063e01, 2.144036290322020e01, ], - [ 5.160360892358020e-02, -1.673006024492507e00, 1.526329404862879e00, 5.506880032083917e01, -4.949344845658728e01, 4.515984622267106e01, ], + [ + 3.303536746664150e00, + -8.541709820382220e00, + 4.235348320712800e00, + 2.296464188467882e01, + -1.107996258835088e01, + 5.504469544697623e00, + ], + [ + 2.206443321428910e00, + -7.076836641962523e00, + 4.053185685866683e00, + 2.350989689214267e01, + -1.308569222837996e01, + 7.502346155380032e00, + ], + [ + 2.323314512827915e00, + -9.040565356058327e00, + 6.135410429832603e00, + 3.597836755817654e01, + -2.387500375686657e01, + 1.625863529518014e01, + ], + [ + 5.925805332888091e-01, + -3.139640204484572e00, + 1.976422909655008e00, + 1.713589230378096e01, + -1.051500934722200e01, + 6.627980447033357e00, + ], + [ + 3.505170703269003e-01, + -2.237551781735236e00, + 1.483246407331564e00, + 1.484170437843385e01, + -9.493221195626012e00, + 6.302831691298613e00, + ], + [ + 2.334740816002986e-01, + -1.799815305424720e00, + 1.225734691240980e00, + 1.440275907010786e01, + -9.473328823485497e00, + 6.458585723630323e00, + ], + [ + 1.959374052725543e-01, + -1.757624939190617e00, + 1.257071800464856e00, + 1.626453309470772e01, + -1.127433100208976e01, + 8.089297781650776e00, + ], + [ + 1.865040295194290e-01, + -1.822069966644771e00, + 1.391201452068932e00, + 1.849884116334009e01, + -1.360944960795888e01, + 1.039529105220993e01, + ], + [ + 9.726648808697150e-02, + -1.169327097322687e00, + 8.635283997541450e-01, + 1.459008882076805e01, + -1.037926824643990e01, + 7.682778579161866e00, + ], + [ + 8.780552015079510e-02, + -1.156467907936071e00, + 9.032722960981284e-01, + 1.589008007570035e01, + -1.189269332401088e01, + 9.313892275846490e00, + ], + [ + 2.006114827320551e-01, + -2.776793529232688e00, + 2.335437756631023e00, + 3.912099794229794e01, + -3.232054661687562e01, + 2.720219535392458e01, + ], + [ + 5.179066270217598e-02, + -8.550833654170061e-01, + 6.753041305320768e-01, + 1.482948643635128e01, + -1.117147343869588e01, + 8.821228248108168e00, + ], + [ + 5.095452842967559e-02, + -9.482107499527080e-01, + 7.561787601252029e-01, + 1.825692153880631e01, + -1.408060600559859e01, + 1.123739992361621e01, + ], + [ + 1.801729907958822e-02, + -4.102710173962256e-01, + 2.950547079487041e-01, + 9.828612567782274e00, + -6.724075992501888e00, + 4.831954737036956e00, + ], + [ + 2.762233787081839e-02, + -6.400852678490596e-01, + 5.127706114146487e-01, + 1.546123935330864e01, + -1.188156027745066e01, + 9.528888176590677e00, + ], + [ + 3.165499001015300e-02, + -7.790988142691099e-01, + 6.486071576135261e-01, + 1.986193598270219e01, + -1.597020317079789e01, + 1.330779868219462e01, + ], + [ + 1.316620750610005e-02, + -3.821583465978776e-01, + 2.914985575363581e-01, + 1.167177892863827e01, + -8.480579644763935e00, + 6.457533731506537e00, + ], + [ + 2.886802887767344e-02, + -8.369930524359994e-01, + 7.103796160970196e-01, + 2.491187028333814e01, + -2.057801184441048e01, + 1.746851224928310e01, + ], + [ + 2.930989281275648e-02, + -9.242683589392606e-01, + 7.918145320034853e-01, + 2.981526833971985e01, + -2.501001674094063e01, + 2.144036290322020e01, + ], + [ + 5.160360892358020e-02, + -1.673006024492507e00, + 1.526329404862879e00, + 5.506880032083917e01, + -4.949344845658728e01, + 4.515984622267106e01, + ], ] ) ) @@ -117,7 +282,7 @@ def to_TH1(name, array): list(range(21)), scaling, ), - ROOT.RooFit.RecycleConflictNodes() + ROOT.RooFit.RecycleConflictNodes(), ) func = w.function("ch1_vbfhh_morph") print(func.getVal()) @@ -130,5 +295,7 @@ def to_TH1(name, array): fout.Close() subprocess.call( - "combine -M MultiDimFit card_morph.root --redefineSignalPOIs kv,k2v,kl --freezeParameters r --setParameters r=1,kv=1,k2v=1,kl=1 -t 100".split(" ") + "combine -M MultiDimFit card_morph.root --redefineSignalPOIs kv,k2v,kl --freezeParameters r --setParameters r=1,kv=1,k2v=1,kl=1 -t 100".split( + " " + ) ) From 7bc66a4dc99aad03023f3188c092929f6a565654 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 13 Jun 2023 11:22:24 -0500 Subject: [PATCH 05/24] Lint correctly --- test/test_interference.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/test_interference.py b/test/test_interference.py index dee90307dd9..4bfe43b9785 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -295,7 +295,5 @@ def to_TH1(name, array): fout.Close() subprocess.call( - "combine -M MultiDimFit card_morph.root --redefineSignalPOIs kv,k2v,kl --freezeParameters r --setParameters r=1,kv=1,k2v=1,kl=1 -t 100".split( - " " - ) + "combine -M MultiDimFit card_morph.root --redefineSignalPOIs kv,k2v,kl --freezeParameters r --setParameters r=1,kv=1,k2v=1,kl=1 -t 100".split(" ") ) From 1473ddf9671150e3994c8a35868f14ee003c7097 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 13 Jun 2023 12:28:39 -0500 Subject: [PATCH 06/24] Errant space is a problem on linux --- test/test_interference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_interference.py b/test/test_interference.py index 4bfe43b9785..a5c302683c6 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -295,5 +295,5 @@ def to_TH1(name, array): fout.Close() subprocess.call( - "combine -M MultiDimFit card_morph.root --redefineSignalPOIs kv,k2v,kl --freezeParameters r --setParameters r=1,kv=1,k2v=1,kl=1 -t 100".split(" ") + "combine -M MultiDimFit card_morph.root --redefineSignalPOIs kv,k2v,kl --freezeParameters r --setParameters r=1,kv=1,k2v=1,kl=1 -t 100".split(" ") ) From 76fa6ac694e0cef9fda5c617ba6721f763217e51 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 22 Aug 2023 09:15:31 -0500 Subject: [PATCH 07/24] Use static cast for speed --- src/CMSInterferenceFunc.cxx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/CMSInterferenceFunc.cxx b/src/CMSInterferenceFunc.cxx index 10af7517013..c9aad55f3ce 100644 --- a/src/CMSInterferenceFunc.cxx +++ b/src/CMSInterferenceFunc.cxx @@ -74,6 +74,16 @@ void CMSInterferenceFunc::initialize() const { size_t nbins = edges_.size() - 1; size_t ncoef = coefficients_.getSize(); + for (size_t i=0; i < ncoef; ++i) { + if ( coefficients_.at(i) == nullptr ) { + throw std::invalid_argument("Lost coefficient " + std::to_string(i)); + } + if ( not coefficients_.at(i)->InheritsFrom("RooAbsReal") ) { + throw std::invalid_argument( + "Coefficient " + std::to_string(i) + " is not a RooAbsReal" + ); + } + } if ( binscaling_.size() != nbins ) { throw std::invalid_argument( "Number of bins as determined from bin edges (" @@ -99,7 +109,7 @@ void CMSInterferenceFunc::initialize() const { void CMSInterferenceFunc::updateCache() const { for (int i=0; i < coefficients_.getSize(); ++i) { - auto* coef = dynamic_cast(coefficients_.at(i)); + auto* coef = static_cast(coefficients_.at(i)); if ( coef == nullptr ) throw std::runtime_error("Lost coef!"); evaluator_->setCoefficient(i, coef->getVal()); } From 7f90b0c2df0d242c1a5ec9e533bc74a36f1a25da Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Wed, 23 Aug 2023 14:02:58 -0500 Subject: [PATCH 08/24] Implement a PhysicsModel to set up interference morphing --- .flake8 | 3 +- python/InterferenceModels.py | 166 +++++++++++++++++++++++++++++++++++ test/test_interference.py | 69 ++++++--------- 3 files changed, 195 insertions(+), 43 deletions(-) create mode 100644 python/InterferenceModels.py diff --git a/.flake8 b/.flake8 index 6742d005678..5b45d35fefd 100644 --- a/.flake8 +++ b/.flake8 @@ -15,5 +15,6 @@ # E741: ambiguous variable name 'l' (too pedantic! get a good font) # E711: sometimes needed for comparison to ROOT nullptr (better to cast to bool) # F401: unused import (some may have side effects, need to clean by hand) -ignore = F403,F405,E402,W504,E203,W503,E262,E265,E266,E501,E741,E711,F401 +# E721: do not compare types (new, could be fixed relatively easily) +ignore = F403,F405,E402,W504,E203,W503,E262,E265,E266,E501,E741,E711,F401,E721 max-line-length = 160 diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py new file mode 100644 index 00000000000..f9289f30dd2 --- /dev/null +++ b/python/InterferenceModels.py @@ -0,0 +1,166 @@ +import gzip +import json +import pickle +import re + +import numpy as np +import ROOT +from HiggsAnalysis.CombinedLimit.PhysicsModel import PhysicsModelBase_NiceSubclasses + + +def read_scaling(path: str): + if path.endswith(".json"): + with open(path) as fin: + out = json.load(fin) + elif path.endswith(".json.gz"): + with gzip.open(path, "rt") as fin: + out = json.load(fin) + elif path.endswith(".pkl.gz"): + with gzip.open(path, "rb") as fin: + out = pickle.load(fin) + else: + raise RuntimeError(f"Unrecognized scaling data path {path}; must be either .json, .json.gz, or .pkl.gz") + # normalize + if not isinstance(out, list): + raise RuntimeError("Scaling data in invalid format: expected list") + expected_fields = {"channel", "process", "parameters", "scaling"} + for item in out: + if not isinstance(item, dict): + raise RuntimeError("Scaling data in invalid format: expected each item in list to be a dict") + missing = expected_fields - set(item) + if missing: + raise RuntimeError(f"Missing fields in scaling item: {missing}") + shortname = item["channel"] + "/" + item["process"] + if not all(isinstance(par, str) for par in item["parameters"]): + raise RuntimeError(f"Parameters must be a list of strings in {shortname}") + try: + item["scaling"] = np.array(item["scaling"], dtype=float) + except ValueError as ex: + raise RuntimeError(f"Scaling data invalid: could not normalize array for {shortname}") from ex + if len(item["scaling"].shape) != 2: + raise RuntimeError(f"Scaling data invalid: array shape incorrect for {shortname}") + npar = len(item["parameters"]) + if item["scaling"].shape[1] != npar * (npar + 1) // 2: + raise RuntimeError(f"Scaling data invalid: array has insufficent terms for parameters in {shortname}") + return out + + +class InterferenceModel(PhysicsModelBase_NiceSubclasses): + def __init__(self): + self.verbose = False + self.scaling = None + self.scaling_map = {} + self.explict_pois = None + super(InterferenceModel, self).__init__() + + def processPhysicsOptions(self, physOptions): + processed = [] + for po in physOptions: + if po == "verbose": + self.verbose = True + processed.append(po) + elif po.startswith("scalingData="): + self.scaling = read_scaling(po[len("scalingData=") :]) + processed.append(po) + elif po.startswith("POIs="): + self.explict_pois = po[len("POIs=") :].split(":") + processed.append(po) + if self.scaling is None: + raise RuntimeError("Must specify --PO=scalingData= physics option!") + # make map for quick lookup + for item in self.scaling: + self.scaling_map[(item["channel"], item["process"])] = item + return processed + super(InterferenceModel, self).processPhysicsOptions(physOptions) + + def getPOIList(self): + poiNames = [] + if self.explict_pois: + if self.verbose: + print("Building explicitly requested POIs:") + for poi in self.explict_pois: + if self.verbose: + print(f" - {poi}") + self.modelBuilder.doVar(poi) + poiname = re.sub(r"\[.*", "", poi) + if not self.modelBuilder.out.var(poiname): + raise RuntimeError(f"Could not find POI {poiname} after factory call") + poiNames.append(poiname) + # RooFit would also detect duplicate params with mismatched factory, but this is faster + constructed = {} + for item in self.scaling: + item["parameters_inworkspace"] = [] + for param in item["parameters"]: + if param in constructed: + rooparam = constructed[param] + else: + rooparam = self.modelBuilder.factory_(param) + if not rooparam: + raise RuntimeError(f"Failed to build {param}") + if not self.explict_pois and isinstance(rooparam, ROOT.RooRealVar) and not rooparam.isConstant(): + if self.verbose: + print(f"Assuming parameter {param} is to be a POI (name: {rooparam.GetName()})") + poiNames.append(rooparam.GetName()) + constructed[param] = rooparam + # save for later use in making CMSInterferenceFunc + item["parameters_inworkspace"].append(rooparam) + if self.verbose: + print(f"All POIs: {poiNames}") + return poiNames + + def getYieldScale(self, channel, process): + string = "%s/%s" % (channel, process) + try: + item = self.scaling_map[(channel, process)] + except KeyError: + if self.verbose: + print(f"Will scale {string} by 1") + return 1 + print(f"Will scale {string} using CMSInterferenceFunc dependent on parameters {item['parameters']}") + item["found"] = True + # We don't actually scale the total via this mechanism + # and we'll set up the shape effects in done() since shapes aren't available yet + return 1 + + def done(self): + for item in self.scaling_map.values(): + channel = item["channel"] + process = item["process"] + string = "%s/%s" % (channel, process) + if "found" not in item: + if self.verbose: + print(f"Did not find {string} in workspace, even though it is in scaling data") + continue + + hfname = f"shapeSig_{channel}_{process}_morph" + histfunc = self.modelBuilder.out.function(hfname) + if not histfunc: + # if there are no systematics, it ends up with a different name? + hfname = f"shapeSig_{process}_{channel}_rebinPdf" + histfunc = self.modelBuilder.out.function(hfname) + # TODO: support FastVerticalInterpHistPdf2 + if not isinstance(histfunc, ROOT.CMSHistFunc): + self.modelBuilder.out.Print("v") + raise RuntimeError( + f"Could not locate the CMSHistFunc for {string}.\nNote that CMSInterferenceFunc currently only supports workspaces that use CMSHistFunc" + ) + + funcname = hfname + "_externalMorph" + + tpl = histfunc.cache() + edges = [tpl.GetEdge(i) for i in range(tpl.size() + 1)] + params = ROOT.RooArgList() + for p in item["parameters_inworkspace"]: + params.add(p) + + scaling_array = ROOT.std.vector["std::vector"]() + nbins, ncoef = item["scaling"].shape + scaling_array.reserve(nbins) + for sbin in item["scaling"]: + scaling_array.push_back(sbin) + + self.modelBuilder.out.safe_import(ROOT.CMSInterferenceFunc(funcname, "", histfunc.getXVar(), params, edges, scaling_array)) + func = self.modelBuilder.out.function(funcname) + histfunc.injectExternalMorph(func) + + +interferenceModel = InterferenceModel() diff --git a/test/test_interference.py b/test/test_interference.py index a5c302683c6..d5681b195bc 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -2,6 +2,7 @@ import ROOT import numpy as np import subprocess +import json def array2vector_2d(array): @@ -92,13 +93,13 @@ def to_TH1(name, array): """ ) -subprocess.call("text2workspace.py card.txt".split(" ")) -fcard = ROOT.TFile.Open("card.root") -w = fcard.Get("w") - -scaling = array2vector_2d( - np.array( - [ +# write the scaling data +scaling = [ + { + "channel": "ch1", + "process": "VBFHH", + "parameters": ["expr::a0('@0*@1', kv[1,0,2], kl[1,0,2])", "expr::a1('@0*@0', kv[1,0,2])", "k2v[1,0,2]"], + "scaling": [ [ 3.303536746664150e00, -8.541709820382220e00, @@ -259,41 +260,25 @@ def to_TH1(name, array): -4.949344845658728e01, 4.515984622267106e01, ], - ] - ) -) - -kv, k2v, kl = ( - w.factory("kv[1, 0, 2]"), - w.factory("k2v[1, 0, 2]"), - w.factory("kl[1, 0, 2]"), -) - -w.Import( - ROOT.CMSInterferenceFunc( - "ch1_vbfhh_morph", - "", - w.var("CMS_th1x"), - ROOT.RooArgList( - w.factory("expr('@0*@1', kv, kl)"), - w.factory("expr('@0*@0', kv)"), - k2v, - ), - list(range(21)), - scaling, - ), - ROOT.RooFit.RecycleConflictNodes(), -) -func = w.function("ch1_vbfhh_morph") -print(func.getVal()) + ], + }, +] -histfunc = w.function("shapeSig_ch1_VBFHH_morph") -histfunc.injectExternalMorph(func) +with open("scaling.json", "w") as fout: + json.dump(scaling, fout) -fout = ROOT.TFile("card_morph.root", "recreate") -w.Write() -fout.Close() +t2wcmd = [ + "text2workspace.py", + "card.txt", + "-P", + "HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel", + "--PO", + "verbose", + "--PO", + "scalingData=scaling.json", + "--PO", + "POIs=kl[1,0,2]:kv[1,0,2]:k2v[1,0,2]", +] -subprocess.call( - "combine -M MultiDimFit card_morph.root --redefineSignalPOIs kv,k2v,kl --freezeParameters r --setParameters r=1,kv=1,k2v=1,kl=1 -t 100".split(" ") -) +subprocess.call(t2wcmd) +subprocess.call("combine -M MultiDimFit card.root -t 100".split(" ")) From 45dd13b14a291d61ee70ccaca790e1999c535e25 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Wed, 23 Aug 2023 15:45:08 -0500 Subject: [PATCH 09/24] Docs for interference model --- docs/part2/physicsmodels.md | 86 ++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/docs/part2/physicsmodels.md b/docs/part2/physicsmodels.md index e5822449f54..6f30fe010a7 100644 --- a/docs/part2/physicsmodels.md +++ b/docs/part2/physicsmodels.md @@ -213,7 +213,7 @@ The `PhysicsModel` that encodes the signal model above is the [twoHypothesisHigg The two processes (S and S_ALT) will get different scaling parameters. The LEP-style likelihood for hypothesis testing can now be performed by setting **x** or **not_x** to 1 and 0 and comparing two likelihood evaluations. -### Interference +### Signal-background interference Since there are no such things as negative probability distribution functions, the recommended way to implement this is to start from the expression for the individual amplitudes and the parameter of interest $k$, @@ -225,7 +225,7 @@ $$ where -$\mu = k^2, ~S = A_{s}^2,~B = Ab^2$ and $ S+B+I = (As + Ab)^2$. +$\mu = k^2, ~S = A_{s}^2,~B = Ab^2$ and $S+B+I = (As + Ab)^2$. With some algebra you can work out that, @@ -244,3 +244,85 @@ An example of this scheme is implemented in a [HiggsWidth](https://gitlab.cern.c self.modelBuilder.factory_( "expr::qqH_b_func(\"1-sqrt(@0)\", CMS_zz4l_mu)") self.modelBuilder.factory_( "expr::qqH_sbi_func(\"sqrt(@0)\", CMS_zz4l_mu)") ``` + +### Multi-process interference + +The above formulation can be extended to multiple parameters of interest +(POIs). See +[AnalyticAnomalousCoupling](https://github.com/amassiro/AnalyticAnomalousCoupling) +for an example. However, the computational performance scales quadratically +with the number of POIs, and can get extremely expensive for 10 or more, as may +be encountered often with EFT analyses. To alleviate this issue, an accelerated +interference modeling technique is implemented for template-based analyses via +the `interferenceModel` physics model. At present, this technique only works with +`CMSHistFunc`-based workspaces, as these are the most common workspace types +encountered and the default when using +[autoMCStats](https://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part2/bin-wise-stats). + +To use this physics model, you will first need to derive a nominal template and +a scaling matrix that describes how the POI vector $\theta$ morphs the nominal +template. For each bin yield $y(\theta) = y_0 (\theta^\top M \theta)$; +find $y_0$ and put it into the datacard as a signal process; then find $M$ and +save the upper triangular component as an array in a `scaling.json` file with a +syntax as follows: + +```json +[ + { + "channel": "my_channel", + "process": "my_nominal_process", + "parameters": ["sqrt_mu[1,0,2]", "Bscaling[1]"], + "scaling": [ + [0.5, 0.1, 1.0], + [0.6, 0.2, 1.0], + [0.7, 0.3, 1.0] + ] + } +] +``` + +where the parameters are declared using RooFit's [factory +syntax](https://root.cern.ch/doc/v622/classRooWorkspace.html#a0ddded1d65f5c6c4732a7a3daa8d16b0). +Then, to construct the workspace, run + +```bash +text2workspace.py card.txt -P HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel --PO verbose --PO scalingData=scaling.json +``` +For large amounts of scaling data, you can optionally use gzipped json (`.json.gz`) or pickle (`.pkl.gz`) files with 2D numpy arrays for the scaling coefficients instead of lists. + +The above formulation, assuming the nominal template `B` has three bins, would +be equivalent to the previous section's if the `S` template is `[0.5, 0.6, +0.7]*B` and the `I` template is `[0.05, 0.1, 0.15]*B`. You could pick any +nominal template, and adjust the scaling as appropriate. Generally it is +advisable to use a nomlinal template corresponding to near where you expect the +POIs to land so that the shape systematic effects are well-modeled in that +region. + +It may be the case that the relative contributions of the terms are themselves +a function of the POIs. For example, in VBF di-Higgs production, BSM +modifications to the production rate can be parameterized in the "kappa" +framework via three diagrams, with scaling coefficients $\kappa_V$, +$\kappa_\lambda \kappa_V$, and $\kappa_{2V}$, respectively, that interfere. In +that case, you can declare formulas with the factory syntax to represent each +amplitude as follows: + +```json +[ + { + "channel": "a_vbf_channel", + "process": "VBFHH", + "parameters": ["expr::a0('@0*@1', kv[1,0,2], kl[1,0,2])", "expr::a1('@0*@0', kv[1,0,2])", "k2v[1,0,2]"], + "scaling": [ + [3.303536746664150e00, -8.541709820382220e00, 4.235348320712800e00, 2.296464188467882e01, -1.107996258835088e01, 5.504469544697623e00], + [2.206443321428910e00, -7.076836641962523e00, 4.053185685866683e00, 2.350989689214267e01, -1.308569222837996e01, 7.502346155380032e00] + ] + } +] +``` + +However, you will need to manually specify what the POIs should be when creating the workspace using the `POIs=` physics option, e.g. + +```bash +text2workspace.py card.txt -P HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel \ + --PO scalingData=scaling.json --PO 'POIs=kl[1,0,2]:kv[1,0,2]:k2v[1,0,2]' +``` From 6f5e6de006ad3dee916fd7c51cb3d5092da0c773 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Wed, 23 Aug 2023 15:59:18 -0500 Subject: [PATCH 10/24] Some clarifications in docs --- docs/part2/physicsmodels.md | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/docs/part2/physicsmodels.md b/docs/part2/physicsmodels.md index 6f30fe010a7..93afadbb333 100644 --- a/docs/part2/physicsmodels.md +++ b/docs/part2/physicsmodels.md @@ -225,7 +225,7 @@ $$ where -$\mu = k^2, ~S = A_{s}^2,~B = Ab^2$ and $S+B+I = (As + Ab)^2$. +$\mu = k^2, ~S = A_{s}^2,~B = A_b^2$ and $S+B+I = (A_s + A_b)^2$. With some algebra you can work out that, @@ -286,23 +286,37 @@ syntax](https://root.cern.ch/doc/v622/classRooWorkspace.html#a0ddded1d65f5c6c473 Then, to construct the workspace, run ```bash -text2workspace.py card.txt -P HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel --PO verbose --PO scalingData=scaling.json +text2workspace.py card.txt -P HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel \ + --PO verbose --PO scalingData=scaling.json ``` For large amounts of scaling data, you can optionally use gzipped json (`.json.gz`) or pickle (`.pkl.gz`) files with 2D numpy arrays for the scaling coefficients instead of lists. The above formulation, assuming the nominal template `B` has three bins, would be equivalent to the previous section's if the `S` template is `[0.5, 0.6, -0.7]*B` and the `I` template is `[0.05, 0.1, 0.15]*B`. You could pick any +0.7]*B` and the `I` template is `[0.05, 0.1, 0.15]*B`. More explicitly, we are setting + +$$ +y_0 = A_b^2, \qquad +M = \frac{1}{A_b^2} \begin{bmatrix} + A_s^2 & A_s A_b \\ + A_s A_b & A_b^2 + \end{bmatrix}, \qquad \theta = \begin{bmatrix} + \sqrt{\mu} \\ + 1 + \end{bmatrix} +$$ + +You could pick any nominal template, and adjust the scaling as appropriate. Generally it is -advisable to use a nomlinal template corresponding to near where you expect the +advisable to use a nominal template corresponding to near where you expect the POIs to land so that the shape systematic effects are well-modeled in that region. It may be the case that the relative contributions of the terms are themselves a function of the POIs. For example, in VBF di-Higgs production, BSM modifications to the production rate can be parameterized in the "kappa" -framework via three diagrams, with scaling coefficients $\kappa_V$, -$\kappa_\lambda \kappa_V$, and $\kappa_{2V}$, respectively, that interfere. In +framework via three diagrams, with scaling coefficients $\kappa_V \kappa_\lambda$, +$\kappa_V^2$, and $\kappa_{2V}$, respectively, that interfere. In that case, you can declare formulas with the factory syntax to represent each amplitude as follows: From a7737f9fa2afddb332c8e8746e41dd41868ade48 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Fri, 25 Aug 2023 14:47:17 -0500 Subject: [PATCH 11/24] Improve interference docs --- docs/part2/physicsmodels.md | 66 ++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/docs/part2/physicsmodels.md b/docs/part2/physicsmodels.md index 93afadbb333..61c30f1252c 100644 --- a/docs/part2/physicsmodels.md +++ b/docs/part2/physicsmodels.md @@ -215,17 +215,17 @@ The two processes (S and S_ALT) will get different scaling parameters. The LEP-s ### Signal-background interference -Since there are no such things as negative probability distribution functions, the recommended way to implement this is to start from the expression for the individual amplitudes and the parameter of interest $k$, +Since there are no such things as negative probability distribution functions, the recommended way to implement this is to start from the expression for the individual amplitudes $A$ and the parameter of interest $k$, $$ -\mathrm{Yield} = (k * A_{s} + A_{b})^2 -= k^2 * A_{s}^2 + k * 2 A_{s} A_{b} + A_{b}^2 +\mathrm{Yield} = |k * A_{s} + A_{b}|^2 += k^2 * |A_{s}|^2 + k * 2 \Re(A_{s}^* A_{b}) + |A_{b}|^2 = \mu * S + \sqrt{\mu} * I + B $$ where -$\mu = k^2, ~S = A_{s}^2,~B = A_b^2$ and $S+B+I = (A_s + A_b)^2$. +$\mu = k^2, ~S = |A_{s}|^2,~B = |A_b|^2$ and $S+B+I = |A_s + A_b|^2$. With some algebra you can work out that, @@ -254,15 +254,27 @@ for an example. However, the computational performance scales quadratically with the number of POIs, and can get extremely expensive for 10 or more, as may be encountered often with EFT analyses. To alleviate this issue, an accelerated interference modeling technique is implemented for template-based analyses via -the `interferenceModel` physics model. At present, this technique only works with +the `interferenceModel` physics model. In this model, each bin yield $y$ is parameterized +$$y(\theta) = y_0 (\theta^\top M \theta)$$ +as a function of the POI vector $\theta$, a nominal template $y_0$, and a scaling matrix $M$. +To see how this parameterization relates to that of the previous section, we can define: + +$$ +y_0 = A_b^2, \qquad +M = \frac{1}{A_b^2} \begin{bmatrix} + |A_s|^2 & \Re(A_s^* A_b) \\ + \Re(A_s A_b^*) & |A_b|^2 + \end{bmatrix}, \qquad \theta = \begin{bmatrix} + \sqrt{\mu} \\ + 1 + \end{bmatrix} +$$ + +which leads to the same parameterization. At present, this technique only works with `CMSHistFunc`-based workspaces, as these are the most common workspace types encountered and the default when using [autoMCStats](https://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part2/bin-wise-stats). - -To use this physics model, you will first need to derive a nominal template and -a scaling matrix that describes how the POI vector $\theta$ morphs the nominal -template. For each bin yield $y(\theta) = y_0 (\theta^\top M \theta)$; -find $y_0$ and put it into the datacard as a signal process; then find $M$ and +To use this model, for each bin find $y_0$ and put it into the datacard as a signal process, then find $M$ and save the upper triangular component as an array in a `scaling.json` file with a syntax as follows: @@ -282,29 +294,29 @@ syntax as follows: ``` where the parameters are declared using RooFit's [factory -syntax](https://root.cern.ch/doc/v622/classRooWorkspace.html#a0ddded1d65f5c6c4732a7a3daa8d16b0). +syntax](https://root.cern.ch/doc/v622/classRooWorkspace.html#a0ddded1d65f5c6c4732a7a3daa8d16b0) +and each row of the `scaling` field represents the scaling information of a bin, e.g. if $y_0 = |A_b|^2$ +then each row would contain three entries: +$$|A_s|^2 / |A_b|^2,\quad \Re(A_s^* A_b)/|A_b|^2,\quad 1$$ + +For several coefficients, one would enumerate as follows: +```python +scaling = [] +for ibin in range(nbins): + binscaling = [] + for icoef in range(ncoef): + for jcoef in range(icoef, ncoef): + binscaling.append(amplitude_squared_for(ibin, icoef, jcoef)) + scaling.append(binscaling) +``` + Then, to construct the workspace, run ```bash text2workspace.py card.txt -P HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel \ --PO verbose --PO scalingData=scaling.json ``` -For large amounts of scaling data, you can optionally use gzipped json (`.json.gz`) or pickle (`.pkl.gz`) files with 2D numpy arrays for the scaling coefficients instead of lists. - -The above formulation, assuming the nominal template `B` has three bins, would -be equivalent to the previous section's if the `S` template is `[0.5, 0.6, -0.7]*B` and the `I` template is `[0.05, 0.1, 0.15]*B`. More explicitly, we are setting - -$$ -y_0 = A_b^2, \qquad -M = \frac{1}{A_b^2} \begin{bmatrix} - A_s^2 & A_s A_b \\ - A_s A_b & A_b^2 - \end{bmatrix}, \qquad \theta = \begin{bmatrix} - \sqrt{\mu} \\ - 1 - \end{bmatrix} -$$ +For large amounts of scaling data, you can optionally use gzipped json (`.json.gz`) or pickle (`.pkl.gz`) files with 2D numpy arrays for the scaling coefficients instead of lists. The function `numpy.triu_indices(ncoef)` is helpful for extracting the upper triangle of a square matrix. You could pick any nominal template, and adjust the scaling as appropriate. Generally it is From 171431ab6f4a6955b5c73d708439253c1b69513c Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 29 Aug 2023 15:37:49 -0500 Subject: [PATCH 12/24] Actually test the CMSInterferenceFunc against reference values --- test/test_interference.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_interference.py b/test/test_interference.py index d5681b195bc..918ef8a4921 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -281,4 +281,24 @@ def to_TH1(name, array): ] subprocess.call(t2wcmd) + +fws = ROOT.TFile.Open("card.root") +w = fws.Get("w") + +def setvars(x, kl, kv, k2v): + w.var("CMS_th1x").setVal(x) + w.var("kl").setVal(kl) + w.var("kv").setVal(kv) + w.var("k2v").setVal(k2v) + +func = w.function(f"shapeSig_ch1_VBFHH_morph_externalMorph") +assert func + +setvars(0, 1, 1, 1) +assert abs(func.getVal() - 1.0) < 1e-14, func.getVal() +setvars(1, 1.1, 1, 1) +assert abs(func.getVal() - 0.8586229062809139) < 1e-14, func.getVal() +setvars(2, 1.1, 0.9, 1.3) +assert abs(func.getVal() - 4.372110974178483) < 1e-14, func.getVal() + subprocess.call("combine -M MultiDimFit card.root -t 100".split(" ")) From 886335c3f69b17e5166aba0ca386c68455a634a8 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 29 Aug 2023 17:05:06 -0500 Subject: [PATCH 13/24] Switch to lower triangular ordering This allows to extend the array at the end when adding new terms --- interface/CMSInterferenceFunc.h | 4 +- src/CMSInterferenceFunc.cxx | 4 +- test/test_interference.py | 188 +++++--------------------------- 3 files changed, 31 insertions(+), 165 deletions(-) diff --git a/interface/CMSInterferenceFunc.h b/interface/CMSInterferenceFunc.h index 4d8525b5027..1cf7262418d 100644 --- a/interface/CMSInterferenceFunc.h +++ b/interface/CMSInterferenceFunc.h @@ -17,8 +17,8 @@ class CMSInterferenceFunc : public RooAbsReal { /* * For a coefficients list of length n and edges array of length b+1, * the binscaling nested vector should have b entries (for b bins) with - * each being of length n*(n+1)/2, corresponding to the upper triangular - * elements of the scaling matrix, i.e. (m_00, m_01, m_02, ..., m_11, m_12, ...) + * each being of length n*(n+1)/2, corresponding to the lower triangular + * elements of the scaling matrix, i.e. (m_00, m_10, m_11, m_20, m_21, m_22, ...) */ CMSInterferenceFunc( const char* name, diff --git a/src/CMSInterferenceFunc.cxx b/src/CMSInterferenceFunc.cxx index c9aad55f3ce..34e96097103 100644 --- a/src/CMSInterferenceFunc.cxx +++ b/src/CMSInterferenceFunc.cxx @@ -10,7 +10,7 @@ class _InterferenceEval { Eigen::MatrixXd mat(ncoef, ncoef); size_t k=0; for(size_t i=0; i Date: Tue, 29 Aug 2023 17:13:39 -0500 Subject: [PATCH 14/24] Also switch to lower triangular in docs --- docs/part2/physicsmodels.md | 12 +++++++----- test/test_interference.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/part2/physicsmodels.md b/docs/part2/physicsmodels.md index 61c30f1252c..a00cf0249d8 100644 --- a/docs/part2/physicsmodels.md +++ b/docs/part2/physicsmodels.md @@ -275,7 +275,7 @@ which leads to the same parameterization. At present, this technique only works encountered and the default when using [autoMCStats](https://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part2/bin-wise-stats). To use this model, for each bin find $y_0$ and put it into the datacard as a signal process, then find $M$ and -save the upper triangular component as an array in a `scaling.json` file with a +save the lower triangular component as an array in a `scaling.json` file with a syntax as follows: ```json @@ -305,7 +305,7 @@ scaling = [] for ibin in range(nbins): binscaling = [] for icoef in range(ncoef): - for jcoef in range(icoef, ncoef): + for jcoef in range(icoef + 1): binscaling.append(amplitude_squared_for(ibin, icoef, jcoef)) scaling.append(binscaling) ``` @@ -316,7 +316,9 @@ Then, to construct the workspace, run text2workspace.py card.txt -P HiggsAnalysis.CombinedLimit.InterferenceModels:interferenceModel \ --PO verbose --PO scalingData=scaling.json ``` -For large amounts of scaling data, you can optionally use gzipped json (`.json.gz`) or pickle (`.pkl.gz`) files with 2D numpy arrays for the scaling coefficients instead of lists. The function `numpy.triu_indices(ncoef)` is helpful for extracting the upper triangle of a square matrix. +For large amounts of scaling data, you can optionally use gzipped json (`.json.gz`) or pickle (`.pkl.gz`) +files with 2D numpy arrays for the scaling coefficients instead of lists. The function `numpy.tril_indices(ncoef)` +is helpful for extracting the lower triangle of a square matrix. You could pick any nominal template, and adjust the scaling as appropriate. Generally it is @@ -339,8 +341,8 @@ amplitude as follows: "process": "VBFHH", "parameters": ["expr::a0('@0*@1', kv[1,0,2], kl[1,0,2])", "expr::a1('@0*@0', kv[1,0,2])", "k2v[1,0,2]"], "scaling": [ - [3.303536746664150e00, -8.541709820382220e00, 4.235348320712800e00, 2.296464188467882e01, -1.107996258835088e01, 5.504469544697623e00], - [2.206443321428910e00, -7.076836641962523e00, 4.053185685866683e00, 2.350989689214267e01, -1.308569222837996e01, 7.502346155380032e00] + [3.30353674666415, -8.54170982038222, 22.96464188467882, 4.2353483207128, -11.07996258835088, 5.504469544697623], + [2.20644332142891, -7.076836641962523, 23.50989689214267, 4.053185685866683, -13.08569222837996, 7.502346155380032] ] } ] diff --git a/test/test_interference.py b/test/test_interference.py index 8605f3e93da..725bdd676d0 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -157,7 +157,7 @@ def setvars(x, kl, kv, k2v): w.var("k2v").setVal(k2v) -func = w.function(f"shapeSig_ch1_VBFHH_morph_externalMorph") +func = w.function("shapeSig_ch1_VBFHH_morph_externalMorph") assert func setvars(0, 1, 1, 1) From f99a2eec84e56e943b6a80727dbca64b72987e75 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 29 Aug 2023 17:59:57 -0500 Subject: [PATCH 15/24] =?UTF-8?q?Python2=20compatibility=20=F0=9F=98=A2?= =?UTF-8?q?=F0=9F=98=A2=F0=9F=98=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/InterferenceModels.py | 41 +++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py index f9289f30dd2..83589057fa5 100644 --- a/python/InterferenceModels.py +++ b/python/InterferenceModels.py @@ -8,7 +8,7 @@ from HiggsAnalysis.CombinedLimit.PhysicsModel import PhysicsModelBase_NiceSubclasses -def read_scaling(path: str): +def read_scaling(path): if path.endswith(".json"): with open(path) as fin: out = json.load(fin) @@ -19,7 +19,7 @@ def read_scaling(path: str): with gzip.open(path, "rb") as fin: out = pickle.load(fin) else: - raise RuntimeError(f"Unrecognized scaling data path {path}; must be either .json, .json.gz, or .pkl.gz") + raise RuntimeError("Unrecognized scaling data path {path}; must be either .json, .json.gz, or .pkl.gz".format(path=path)) # normalize if not isinstance(out, list): raise RuntimeError("Scaling data in invalid format: expected list") @@ -29,19 +29,20 @@ def read_scaling(path: str): raise RuntimeError("Scaling data in invalid format: expected each item in list to be a dict") missing = expected_fields - set(item) if missing: - raise RuntimeError(f"Missing fields in scaling item: {missing}") + raise RuntimeError("Missing fields in scaling item: {missing}".format(missing=missing)) shortname = item["channel"] + "/" + item["process"] if not all(isinstance(par, str) for par in item["parameters"]): - raise RuntimeError(f"Parameters must be a list of strings in {shortname}") + raise RuntimeError("Parameters must be a list of strings in {shortname}".format(shortname=shortname)) try: item["scaling"] = np.array(item["scaling"], dtype=float) - except ValueError as ex: - raise RuntimeError(f"Scaling data invalid: could not normalize array for {shortname}") from ex + except ValueError: + # python3: raise from ex + raise RuntimeError("Scaling data invalid: could not normalize array for {shortname}".format(shortname=shortname)) if len(item["scaling"].shape) != 2: - raise RuntimeError(f"Scaling data invalid: array shape incorrect for {shortname}") + raise RuntimeError("Scaling data invalid: array shape incorrect for {shortname}".format(shortname=shortname)) npar = len(item["parameters"]) if item["scaling"].shape[1] != npar * (npar + 1) // 2: - raise RuntimeError(f"Scaling data invalid: array has insufficent terms for parameters in {shortname}") + raise RuntimeError("Scaling data invalid: array has insufficent terms for parameters in {shortname}".format(shortname=shortname)) return out @@ -79,11 +80,11 @@ def getPOIList(self): print("Building explicitly requested POIs:") for poi in self.explict_pois: if self.verbose: - print(f" - {poi}") + print(" - {poi}".format(poi=poi)) self.modelBuilder.doVar(poi) poiname = re.sub(r"\[.*", "", poi) if not self.modelBuilder.out.var(poiname): - raise RuntimeError(f"Could not find POI {poiname} after factory call") + raise RuntimeError("Could not find POI {poiname} after factory call".format(poiname=poiname)) poiNames.append(poiname) # RooFit would also detect duplicate params with mismatched factory, but this is faster constructed = {} @@ -95,16 +96,16 @@ def getPOIList(self): else: rooparam = self.modelBuilder.factory_(param) if not rooparam: - raise RuntimeError(f"Failed to build {param}") + raise RuntimeError("Failed to build {param}".format(param=param)) if not self.explict_pois and isinstance(rooparam, ROOT.RooRealVar) and not rooparam.isConstant(): if self.verbose: - print(f"Assuming parameter {param} is to be a POI (name: {rooparam.GetName()})") + print("Assuming parameter {param} is to be a POI (name: {name})".format(param=param, name=rooparam.GetName())) poiNames.append(rooparam.GetName()) constructed[param] = rooparam # save for later use in making CMSInterferenceFunc item["parameters_inworkspace"].append(rooparam) if self.verbose: - print(f"All POIs: {poiNames}") + print("All POIs: {poiNames}".format(poiNames=poiNames)) return poiNames def getYieldScale(self, channel, process): @@ -113,9 +114,9 @@ def getYieldScale(self, channel, process): item = self.scaling_map[(channel, process)] except KeyError: if self.verbose: - print(f"Will scale {string} by 1") + print("Will scale {string} by 1".format(string=string)) return 1 - print(f"Will scale {string} using CMSInterferenceFunc dependent on parameters {item['parameters']}") + print("Will scale {string} using CMSInterferenceFunc dependent on parameters {parameters}".format(string=string, parameters=item["parameters"])) item["found"] = True # We don't actually scale the total via this mechanism # and we'll set up the shape effects in done() since shapes aren't available yet @@ -128,20 +129,22 @@ def done(self): string = "%s/%s" % (channel, process) if "found" not in item: if self.verbose: - print(f"Did not find {string} in workspace, even though it is in scaling data") + print("Did not find {string} in workspace, even though it is in scaling data".format(string=string)) continue - hfname = f"shapeSig_{channel}_{process}_morph" + hfname = "shapeSig_{channel}_{process}_morph".format(channel=channel, process=process) histfunc = self.modelBuilder.out.function(hfname) if not histfunc: # if there are no systematics, it ends up with a different name? - hfname = f"shapeSig_{process}_{channel}_rebinPdf" + hfname = "shapeSig_{process}_{channel}_rebinPdf".format(channel=channel, process=process) histfunc = self.modelBuilder.out.function(hfname) # TODO: support FastVerticalInterpHistPdf2 if not isinstance(histfunc, ROOT.CMSHistFunc): self.modelBuilder.out.Print("v") raise RuntimeError( - f"Could not locate the CMSHistFunc for {string}.\nNote that CMSInterferenceFunc currently only supports workspaces that use CMSHistFunc" + "Could not locate the CMSHistFunc for {string}.\nNote that CMSInterferenceFunc currently only supports workspaces that use CMSHistFunc".format( + string=string + ) ) funcname = hfname + "_externalMorph" From 9e40e3c1d12f1f5c2ea8aea0bbb52ebc8189184a Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 29 Aug 2023 21:23:52 -0500 Subject: [PATCH 16/24] Ensure contiguous --- python/InterferenceModels.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py index 83589057fa5..9726da3edfd 100644 --- a/python/InterferenceModels.py +++ b/python/InterferenceModels.py @@ -34,7 +34,8 @@ def read_scaling(path): if not all(isinstance(par, str) for par in item["parameters"]): raise RuntimeError("Parameters must be a list of strings in {shortname}".format(shortname=shortname)) try: - item["scaling"] = np.array(item["scaling"], dtype=float) + # coerce into numpy with C-contiguous memory (needed for fast std::vector copy) + item["scaling"] = np.ascontiguousarray(item["scaling"], dtype=float) except ValueError: # python3: raise from ex raise RuntimeError("Scaling data invalid: could not normalize array for {shortname}".format(shortname=shortname)) From 5b061b92e73945837a8511768e91756ace1a86f9 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 29 Aug 2023 22:01:27 -0500 Subject: [PATCH 17/24] Allow robustHesse without a fit --- src/RobustHesse.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/RobustHesse.cc b/src/RobustHesse.cc index 41371dc7b50..136682e83c5 100644 --- a/src/RobustHesse.cc +++ b/src/RobustHesse.cc @@ -101,6 +101,13 @@ int RobustHesse::setParameterStencil(unsigned i) { double boundLo = rrv->getMin(); double boundHi = rrv->getMax(); + // If we skip initial fit (e.g. to compute from asimov point) + // need a guess for the initial step size + if ( rrv->getError() == 0.0 ) { + valLo = x + 1e-3*(boundLo - x); + valHi = x + 1e-3*(boundHi - x); + } + bool closeToLo = valLo < boundLo; bool closeToHi = valHi > boundHi; From e80071c3b0d349e11d4d506d5bd810a3f84b4901 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Fri, 27 Oct 2023 14:42:28 -0500 Subject: [PATCH 18/24] Actually run test --- .github/workflows/ci.yml | 5 +++++ test/test_interference.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be3abdaab78..ed2078efb5a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,3 +65,8 @@ jobs: run: | cd HiggsAnalysis/CombinedLimit make CONDA=1 -j 2 + - name: Run tests + shell: bash -l {0} + run: | + cd HiggsAnalysis/CombinedLimit/test + python test_interference.py diff --git a/test/test_interference.py b/test/test_interference.py index 725bdd676d0..9712293cfd2 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -144,7 +144,8 @@ def to_TH1(name, array): "POIs=kl[1,0,2]:kv[1,0,2]:k2v[1,0,2]", ] -subprocess.call(t2wcmd) +ret = subprocess.call(t2wcmd) +assert ret == 0 fws = ROOT.TFile.Open("card.root") w = fws.Get("w") @@ -167,4 +168,5 @@ def setvars(x, kl, kv, k2v): setvars(2, 1.1, 0.9, 1.3) assert abs(func.getVal() - 4.372110974178483) < 1e-14, func.getVal() -subprocess.call("combine -M MultiDimFit card.root -t 100".split(" ")) +ret = subprocess.call("combine -M MultiDimFit card.root -t 100".split(" ")) +assert ret == 0 From 3e25193bffb3607785a82984b8b554b0ebb13c81 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 31 Oct 2023 16:55:40 -0500 Subject: [PATCH 19/24] Implement external morph for CMSHistSum as well --- interface/CMSHistSum.h | 10 +++++++- python/InterferenceModels.py | 23 ++++++++++++----- src/CMSHistFunc.cc | 4 +-- src/CMSHistSum.cc | 49 ++++++++++++++++++++++++++++++++++-- test/test_interference.py | 28 ++++++++++++++++++++- 5 files changed, 102 insertions(+), 12 deletions(-) diff --git a/interface/CMSHistSum.h b/interface/CMSHistSum.h index 4fe3e375ac9..791f737431a 100644 --- a/interface/CMSHistSum.h +++ b/interface/CMSHistSum.h @@ -73,9 +73,14 @@ class CMSHistSum : public RooAbsReal { RooArgList const& coefList() const { return coeffpars_; } // RooArgList const& funcList() const { return funcs_; } + RooAbsReal const& getXVar() const { return x_.arg(); } + static void EnableFastVertical(); friend class CMSHistV; + // TODO: allow any class that implements hasChanged() and batchGetBinValues() + void injectExternalMorph(int idx, CMSInterferenceFunc& morph); + protected: RooRealProxy x_; @@ -128,6 +133,9 @@ class CMSHistSum : public RooAbsReal { mutable int fast_mode_; //! not to be serialized static bool enable_fast_vertical_; //! not to be serialized + RooListProxy external_morphs_; + std::vector external_morph_indices_; + inline int& morphField(int const& ip, int const& iv) { return vmorph_fields_[ip * n_morphs_ + iv]; } @@ -143,7 +151,7 @@ class CMSHistSum : public RooAbsReal { private: - ClassDef(CMSHistSum,1) + ClassDef(CMSHistSum,2) }; #endif diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py index 9726da3edfd..88064370bde 100644 --- a/python/InterferenceModels.py +++ b/python/InterferenceModels.py @@ -139,13 +139,15 @@ def done(self): # if there are no systematics, it ends up with a different name? hfname = "shapeSig_{process}_{channel}_rebinPdf".format(channel=channel, process=process) histfunc = self.modelBuilder.out.function(hfname) + if not histfunc: + # assume this is a CMSHistSum workspace + hfname = f"prop_bin{channel}" + histfunc = self.modelBuilder.out.function(hfname) # TODO: support FastVerticalInterpHistPdf2 - if not isinstance(histfunc, ROOT.CMSHistFunc): - self.modelBuilder.out.Print("v") + if not isinstance(histfunc, (ROOT.CMSHistFunc, ROOT.CMSHistSum)): raise RuntimeError( - "Could not locate the CMSHistFunc for {string}.\nNote that CMSInterferenceFunc currently only supports workspaces that use CMSHistFunc".format( - string=string - ) + "Could not locate the CMSHistFunc or CMSHistSum for {string}.\n".format(string=string) + + "Note that CMSInterferenceFunc currently only supports workspaces that use these classes" ) funcname = hfname + "_externalMorph" @@ -164,7 +166,16 @@ def done(self): self.modelBuilder.out.safe_import(ROOT.CMSInterferenceFunc(funcname, "", histfunc.getXVar(), params, edges, scaling_array)) func = self.modelBuilder.out.function(funcname) - histfunc.injectExternalMorph(func) + if isinstance(histfunc, ROOT.CMSHistFunc): + histfunc.injectExternalMorph(func) + elif isinstance(histfunc, ROOT.CMSHistSum): + coefname = "n_exp_final_bin{channel}_proc_{process}".format(channel=channel, process=process) + for idx, coef in enumerate(histfunc.coefList()): + if coef.GetName() == coefname: + if self.verbose: + print("Injecting external morph for " + coefname) + histfunc.injectExternalMorph(idx, func) + break interferenceModel = InterferenceModel() diff --git a/src/CMSHistFunc.cc b/src/CMSHistFunc.cc index a05a5c1854f..2ef5504f082 100644 --- a/src/CMSHistFunc.cc +++ b/src/CMSHistFunc.cc @@ -323,7 +323,7 @@ void CMSHistFunc::updateCache() const { if (mcache_.size() == 0) mcache_.resize(storage_.size()); } - bool external_morph_updated = (external_morph_.getSize() && dynamic_cast(external_morph_.at(0))->hasChanged()); + bool external_morph_updated = (external_morph_.getSize() && static_cast(external_morph_.at(0))->hasChanged()); if (step1 || external_morph_updated) { fast_vertical_ = false; } @@ -567,7 +567,7 @@ void CMSHistFunc::updateCache() const { std::cout << "Template before external morph update:" << mcache_[idx].step2.Integral() << "\n"; mcache_[idx].step2.Dump(); #endif - auto& extdata = dynamic_cast(external_morph_.at(0))->batchGetBinValues(); + auto& extdata = static_cast(external_morph_.at(0))->batchGetBinValues(); for(size_t i=0; igetParameters({*x_}); + sentry_.addVars(*deps); + delete deps; + } + sentry_.setValueDirty(); binsentry_.setValueDirty(); @@ -250,6 +260,16 @@ void CMSHistSum::initialize() const { } void CMSHistSum::updateMorphs() const { + // set up pointers ahead of time for quick loop + std::vector process_morphs(compcache_.size(), nullptr); + // if any external morphs are dirty, disable fast_mode_ + for(size_t i=0; i < external_morph_indices_.size(); ++i) { + auto* morph = static_cast(external_morphs_.at(i)); + process_morphs[external_morph_indices_[i]] = morph; + if (morph->hasChanged()) { + fast_mode_ = 0; + } + } // If we're not in fast mode, need to reset all the compcache_ #if HFVERBOSE > 0 std::cout << "fast_mode_ = " << fast_mode_ << std::endl; @@ -257,6 +277,12 @@ void CMSHistSum::updateMorphs() const { for (unsigned ip = 0; ip < compcache_.size(); ++ip) { if (fast_mode_ == 0) { compcache_[ip].CopyValues(storage_[process_fields_[ip]]); + if ( process_morphs[ip] != nullptr ) { + auto& extdata = process_morphs[ip]->batchGetBinValues(); + for(size_t ibin=0; ibin= coeffpars_.getSize() ) { + throw std::runtime_error("Process index larger than number of processes in CMSHistSum"); + } + if ( morph.batchGetBinValues().size() != cache_.size() ) { + throw std::runtime_error("Mismatched binning between external morph and CMSHistSum"); + // equal edges are user responsibility for now + } + + for (auto other_idx : external_morph_indices_) { + if ( idx == other_idx ) { + external_morphs_.replace(external_morphs_[idx], morph); + return; + } + } + external_morph_indices_.push_back(idx); + external_morphs_.add(morph); +} + #undef HFVERBOSE diff --git a/test/test_interference.py b/test/test_interference.py index 9712293cfd2..1cb146909c5 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -147,6 +147,10 @@ def to_TH1(name, array): ret = subprocess.call(t2wcmd) assert ret == 0 +histsum_args = ["--for-fits", "--no-wrappers", "--use-histsum", "-o", "card_histsum.root"] +ret = subprocess.call(t2wcmd + histsum_args) +assert ret == 0 + fws = ROOT.TFile.Open("card.root") w = fws.Get("w") @@ -168,5 +172,27 @@ def setvars(x, kl, kv, k2v): setvars(2, 1.1, 0.9, 1.3) assert abs(func.getVal() - 4.372110974178483) < 1e-14, func.getVal() -ret = subprocess.call("combine -M MultiDimFit card.root -t 100".split(" ")) +# toy generation is different between the histsum and histfunc models, somehow +ntoys = 10 +ret = subprocess.call(f"combine -M GenerateOnly card.root -t {ntoys} --saveToys".split(" ")) assert ret == 0 + +ret = subprocess.call(f"combine -M MultiDimFit card.root -t {ntoys} --toysFile higgsCombineTest.GenerateOnly.mH120.123456.root -n HistFunc".split(" ")) +assert ret == 0 + +ret = subprocess.call(f"combine -M MultiDimFit card_histsum.root -t {ntoys} --toysFile higgsCombineTest.GenerateOnly.mH120.123456.root -n HistSum".split(" ")) +assert ret == 0 + +f_histfunc = ROOT.TFile.Open("higgsCombineHistFunc.MultiDimFit.mH120.123456.root") +f_histsum = ROOT.TFile.Open("higgsCombineHistSum.MultiDimFit.mH120.123456.root") + +ndiff = {"kl": 0, "kv": 0, "k2v": 0} +for row1, row2 in zip(f_histfunc.Get("limit"), f_histsum.Get("limit")): + if abs(row1.kl - row2.kl) > 1e-4: + ndiff["kl"] += 1 + if abs(row1.kv - row2.kv) > 1e-4: + ndiff["kv"] += 1 + if abs(row1.k2v - row2.k2v) > 1e-4: + ndiff["k2v"] += 1 + +print(f"Out of {ntoys} toys, {ndiff} are not matching (tolerance: 1e-4) between CMSHistFunc and CMSHistSum") From 6123b595e93f67b62cf815d0ff1b5548d9cb38e8 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 31 Oct 2023 22:12:39 -0500 Subject: [PATCH 20/24] Refactor CMSInterferenceFunc to use base class CMSExternalMorph This should make extending external morphs, e.g. to replace RooParametricHist, much simpler. --- interface/CMSExternalMorph.h | 45 +++++++++++++++++++++++++++++++++ interface/CMSHistFunc.h | 5 ++-- interface/CMSHistSum.h | 3 +-- interface/CMSInterferenceFunc.h | 20 +++++---------- python/InterferenceModels.py | 2 +- src/CMSExternalMorph.cc | 33 ++++++++++++++++++++++++ src/CMSHistFunc.cc | 6 ++--- src/CMSHistSum.cc | 6 ++--- src/CMSInterferenceFunc.cxx | 29 +++++++-------------- src/classes.h | 1 + src/classes_def.xml | 1 + test/test_interference.py | 12 ++++++--- 12 files changed, 113 insertions(+), 50 deletions(-) create mode 100644 interface/CMSExternalMorph.h create mode 100644 src/CMSExternalMorph.cc diff --git a/interface/CMSExternalMorph.h b/interface/CMSExternalMorph.h new file mode 100644 index 00000000000..8889a6e792c --- /dev/null +++ b/interface/CMSExternalMorph.h @@ -0,0 +1,45 @@ +#ifndef CMSExternalMorph_h +#define CMSExternalMorph_h +#include + +#include "RooAbsReal.h" +#include "RooRealVar.h" +#include "RooRealProxy.h" + +class CMSExternalMorph : public RooAbsReal { + public: + CMSExternalMorph(); + /* + * All subclasses need to provide an edges array of length nbins+1 + * of the observable (x) + * TODO: CMSHistFunc and CMSHistSum do not check the binning is compatible + * with their binning other than having the correct length + */ + CMSExternalMorph( + const char* name, + const char* title, + RooRealVar& x, + const std::vector& edges + ); + CMSExternalMorph(CMSExternalMorph const& other, const char* name = 0); + virtual ~CMSExternalMorph(); + + /* Batch accessor for CMSHistFunc / CMSHistSum, to be overriden by concrete + * implementations. hasChanged() should indicate whether or not + * batchGetBinValues() would return a new vector, given the state of + * any dependent variables. + */ + virtual bool hasChanged() const = 0; + virtual const std::vector& batchGetBinValues() const = 0; + + protected: + RooRealProxy x_; + std::vector edges_; + + double evaluate() const; + + private: + ClassDef(CMSExternalMorph, 1) +}; + +#endif // CMSExternalMorph_h diff --git a/interface/CMSHistFunc.h b/interface/CMSHistFunc.h index 2c8ee8b509a..28d0b5c6d86 100644 --- a/interface/CMSHistFunc.h +++ b/interface/CMSHistFunc.h @@ -15,7 +15,7 @@ #include "CMSHistV.h" #include "FastTemplate_Old.h" #include "SimpleCacheSentry.h" -#include "CMSInterferenceFunc.h" +#include "CMSExternalMorph.h" class CMSHistFuncWrapper; @@ -149,8 +149,7 @@ class CMSHistFunc : public RooAbsReal { friend class CMSHistV; friend class CMSHistSum; - // TODO: allow any class that implements hasChanged() and batchGetBinValues() - void injectExternalMorph(CMSInterferenceFunc& morph); + void injectExternalMorph(CMSExternalMorph& morph); /* – RooAbsArg::setVerboseEval(Int_t level) • Level 0 – No messages diff --git a/interface/CMSHistSum.h b/interface/CMSHistSum.h index 791f737431a..23b39c95635 100644 --- a/interface/CMSHistSum.h +++ b/interface/CMSHistSum.h @@ -78,8 +78,7 @@ class CMSHistSum : public RooAbsReal { static void EnableFastVertical(); friend class CMSHistV; - // TODO: allow any class that implements hasChanged() and batchGetBinValues() - void injectExternalMorph(int idx, CMSInterferenceFunc& morph); + void injectExternalMorph(int idx, CMSExternalMorph& morph); protected: RooRealProxy x_; diff --git a/interface/CMSInterferenceFunc.h b/interface/CMSInterferenceFunc.h index 1cf7262418d..14be41fcca2 100644 --- a/interface/CMSInterferenceFunc.h +++ b/interface/CMSInterferenceFunc.h @@ -1,16 +1,13 @@ #ifndef CMSInterferenceFunc_h #define CMSInterferenceFunc_h -#include - -#include "RooAbsReal.h" -#include "RooRealVar.h" -#include "RooRealProxy.h" #include "RooListProxy.h" #include "SimpleCacheSentry.h" +#include "CMSExternalMorph.h" + class _InterferenceEval; -class CMSInterferenceFunc : public RooAbsReal { +class CMSInterferenceFunc : public CMSExternalMorph { public: CMSInterferenceFunc(); CMSInterferenceFunc(CMSInterferenceFunc const& other, const char* name = 0); @@ -24,8 +21,8 @@ class CMSInterferenceFunc : public RooAbsReal { const char* name, const char* title, RooRealVar& x, - const RooArgList& coefficients, const std::vector& edges, + const RooArgList& coefficients, const std::vector> binscaling ); virtual ~CMSInterferenceFunc(); @@ -38,21 +35,16 @@ class CMSInterferenceFunc : public RooAbsReal { std::ostream& os, Int_t contents, Bool_t verbose, TString indent ) const override; - // batch accessor for CMSHistFunc / CMSHistSum - bool hasChanged() const { return !sentry_.good(); }; - const std::vector& batchGetBinValues() const; + bool hasChanged() const override { return !sentry_.good(); }; + const std::vector& batchGetBinValues() const override; protected: - RooRealProxy x_; RooListProxy coefficients_; - std::vector edges_; std::vector> binscaling_; mutable SimpleCacheSentry sentry_; //! mutable std::unique_ptr<_InterferenceEval> evaluator_; //! - double evaluate() const override; - private: void initialize() const; void updateCache() const; diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py index 88064370bde..ca70a1e5499 100644 --- a/python/InterferenceModels.py +++ b/python/InterferenceModels.py @@ -164,7 +164,7 @@ def done(self): for sbin in item["scaling"]: scaling_array.push_back(sbin) - self.modelBuilder.out.safe_import(ROOT.CMSInterferenceFunc(funcname, "", histfunc.getXVar(), params, edges, scaling_array)) + self.modelBuilder.out.safe_import(ROOT.CMSInterferenceFunc(funcname, "", histfunc.getXVar(), edges, params, scaling_array)) func = self.modelBuilder.out.function(funcname) if isinstance(histfunc, ROOT.CMSHistFunc): histfunc.injectExternalMorph(func) diff --git a/src/CMSExternalMorph.cc b/src/CMSExternalMorph.cc new file mode 100644 index 00000000000..95e7d25d10e --- /dev/null +++ b/src/CMSExternalMorph.cc @@ -0,0 +1,33 @@ +#include "../interface/CMSExternalMorph.h" + +CMSExternalMorph::CMSExternalMorph() {} + +CMSExternalMorph::CMSExternalMorph( + const char* name, + const char* title, + RooRealVar& x, + const std::vector& edges + ) : + RooAbsReal(name, title), + x_("x", "", this, x), + edges_(edges) +{ +} + +CMSExternalMorph::CMSExternalMorph(CMSExternalMorph const& other, const char* name) : + RooAbsReal(other, name), + x_("x", this, other.x_), + edges_(other.edges_) +{ +} + +CMSExternalMorph::~CMSExternalMorph() = default; + +double CMSExternalMorph::evaluate() const { + auto it = std::upper_bound(std::begin(edges_), std::end(edges_), x_->getVal()); + if ( (it == std::begin(edges_)) or (it == std::end(edges_)) ) { + return 0.0; + } + size_t idx = std::distance(std::begin(edges_), it) - 1; + return batchGetBinValues()[idx]; +} diff --git a/src/CMSHistFunc.cc b/src/CMSHistFunc.cc index 2ef5504f082..045f802e278 100644 --- a/src/CMSHistFunc.cc +++ b/src/CMSHistFunc.cc @@ -323,7 +323,7 @@ void CMSHistFunc::updateCache() const { if (mcache_.size() == 0) mcache_.resize(storage_.size()); } - bool external_morph_updated = (external_morph_.getSize() && static_cast(external_morph_.at(0))->hasChanged()); + bool external_morph_updated = (external_morph_.getSize() && static_cast(external_morph_.at(0))->hasChanged()); if (step1 || external_morph_updated) { fast_vertical_ = false; } @@ -567,7 +567,7 @@ void CMSHistFunc::updateCache() const { std::cout << "Template before external morph update:" << mcache_[idx].step2.Integral() << "\n"; mcache_[idx].step2.Dump(); #endif - auto& extdata = static_cast(external_morph_.at(0))->batchGetBinValues(); + auto& extdata = static_cast(external_morph_.at(0))->batchGetBinValues(); for(size_t i=0; i process_morphs(compcache_.size(), nullptr); + std::vector process_morphs(compcache_.size(), nullptr); // if any external morphs are dirty, disable fast_mode_ for(size_t i=0; i < external_morph_indices_.size(); ++i) { - auto* morph = static_cast(external_morphs_.at(i)); + auto* morph = static_cast(external_morphs_.at(i)); process_morphs[external_morph_indices_[i]] = morph; if (morph->hasChanged()) { fast_mode_ = 0; @@ -780,7 +780,7 @@ void CMSHistSum::EnableFastVertical() { enable_fast_vertical_ = true; } -void CMSHistSum::injectExternalMorph(int idx, CMSInterferenceFunc& morph) { +void CMSHistSum::injectExternalMorph(int idx, CMSExternalMorph& morph) { if ( idx >= coeffpars_.getSize() ) { throw std::runtime_error("Process index larger than number of processes in CMSHistSum"); } diff --git a/src/CMSInterferenceFunc.cxx b/src/CMSInterferenceFunc.cxx index 34e96097103..e0ecc8e056f 100644 --- a/src/CMSInterferenceFunc.cxx +++ b/src/CMSInterferenceFunc.cxx @@ -39,21 +39,24 @@ CMSInterferenceFunc::CMSInterferenceFunc() {}; CMSInterferenceFunc::CMSInterferenceFunc( CMSInterferenceFunc const& other, const char* name ) : - RooAbsReal(other, name), x_("x", this, other.x_), + CMSExternalMorph(other, name), coefficients_("coefficients", this, other.coefficients_), - edges_(other.edges_), binscaling_(other.binscaling_), + binscaling_(other.binscaling_), sentry_(name ? TString(name) + "_sentry" : TString(other.GetName())+"_sentry", "") { } CMSInterferenceFunc::CMSInterferenceFunc( - const char* name, const char* title, RooRealVar& x, - RooArgList const& coefficients, const std::vector& edges, + const char* name, + const char* title, + RooRealVar& x, + const std::vector& edges, + RooArgList const& coefficients, const std::vector> binscaling ) : - RooAbsReal(name, title), x_("x", "", this, x), + CMSExternalMorph(name, title, x, edges), coefficients_("coefficients", "", this), - edges_(edges), binscaling_(binscaling), + binscaling_(binscaling), sentry_(TString(name) + "_sentry", "") { coefficients_.add(coefficients); @@ -117,21 +120,7 @@ void CMSInterferenceFunc::updateCache() const { sentry_.reset(); } -double CMSInterferenceFunc::evaluate() const { - if ( not evaluator_ ) initialize(); - if ( not sentry_.good() ) updateCache(); - - auto it = std::upper_bound(std::begin(edges_), std::end(edges_), x_->getVal()); - if ( (it == std::begin(edges_)) or (it == std::end(edges_)) ) { - return 0.0; - } - size_t idx = std::distance(std::begin(edges_), it) - 1; - return evaluator_->getValues()[idx]; -} - const std::vector& CMSInterferenceFunc::batchGetBinValues() const { - // we don't really expect the cache to be valid, as upstream callers are - // managing their own and calling this only when dirty, but let's check anyway if ( not evaluator_ ) initialize(); if ( not sentry_.good() ) updateCache(); return evaluator_->getValues(); diff --git a/src/classes.h b/src/classes.h index dfd8e1d38d8..867ffeee1c9 100644 --- a/src/classes.h +++ b/src/classes.h @@ -66,5 +66,6 @@ #include "HiggsAnalysis/CombinedLimit/interface/RooCheapProduct.h" #include "HiggsAnalysis/CombinedLimit/interface/CMSHggFormula.h" #include "HiggsAnalysis/CombinedLimit/interface/SimpleProdPdf.h" +#include "HiggsAnalysis/CombinedLimit/interface/CMSExternalMorph.h" #include "HiggsAnalysis/CombinedLimit/interface/CMSInterferenceFunc.h" #include "HiggsAnalysis/CombinedLimit/interface/RooEFTScalingFunction.h" diff --git a/src/classes_def.xml b/src/classes_def.xml index da5e4d7fbcd..3bf3eb4f689 100644 --- a/src/classes_def.xml +++ b/src/classes_def.xml @@ -222,6 +222,7 @@ + diff --git a/test/test_interference.py b/test/test_interference.py index 1cb146909c5..a97e2414093 100755 --- a/test/test_interference.py +++ b/test/test_interference.py @@ -174,13 +174,17 @@ def setvars(x, kl, kv, k2v): # toy generation is different between the histsum and histfunc models, somehow ntoys = 10 -ret = subprocess.call(f"combine -M GenerateOnly card.root -t {ntoys} --saveToys".split(" ")) +ret = subprocess.call("combine -M GenerateOnly card.root -t {ntoys} --saveToys".format(ntoys=ntoys).split(" ")) assert ret == 0 -ret = subprocess.call(f"combine -M MultiDimFit card.root -t {ntoys} --toysFile higgsCombineTest.GenerateOnly.mH120.123456.root -n HistFunc".split(" ")) +ret = subprocess.call( + "combine -M MultiDimFit card.root -t {ntoys} --toysFile higgsCombineTest.GenerateOnly.mH120.123456.root -n HistFunc".format(ntoys=ntoys).split(" ") +) assert ret == 0 -ret = subprocess.call(f"combine -M MultiDimFit card_histsum.root -t {ntoys} --toysFile higgsCombineTest.GenerateOnly.mH120.123456.root -n HistSum".split(" ")) +ret = subprocess.call( + "combine -M MultiDimFit card_histsum.root -t {ntoys} --toysFile higgsCombineTest.GenerateOnly.mH120.123456.root -n HistSum".format(ntoys=ntoys).split(" ") +) assert ret == 0 f_histfunc = ROOT.TFile.Open("higgsCombineHistFunc.MultiDimFit.mH120.123456.root") @@ -195,4 +199,4 @@ def setvars(x, kl, kv, k2v): if abs(row1.k2v - row2.k2v) > 1e-4: ndiff["k2v"] += 1 -print(f"Out of {ntoys} toys, {ndiff} are not matching (tolerance: 1e-4) between CMSHistFunc and CMSHistSum") +print("Out of {ntoys} toys, {ndiff} are not matching (tolerance: 1e-4) between CMSHistFunc and CMSHistSum".format(ntoys=ntoys, ndiff=ndiff)) From 88579163116abf444c53526ff4b062e9073c0ce0 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Tue, 31 Oct 2023 22:33:31 -0500 Subject: [PATCH 21/24] Only run tests in python3 --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7c52b9c0409..81a571f14a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,6 +66,7 @@ jobs: cd HiggsAnalysis/CombinedLimit make CONDA=1 -j 2 - name: Run tests + if: startsWith(matrix.python, '3.') shell: bash -l {0} run: | cd HiggsAnalysis/CombinedLimit/test From 334e052dc792f14b5053170e595156534679a300 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Wed, 1 Nov 2023 12:03:47 -0500 Subject: [PATCH 22/24] Remove f-string --- python/InterferenceModels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py index ca70a1e5499..c7af8204730 100644 --- a/python/InterferenceModels.py +++ b/python/InterferenceModels.py @@ -141,7 +141,7 @@ def done(self): histfunc = self.modelBuilder.out.function(hfname) if not histfunc: # assume this is a CMSHistSum workspace - hfname = f"prop_bin{channel}" + hfname = "prop_bin{channel}".format(channel=channel) histfunc = self.modelBuilder.out.function(hfname) # TODO: support FastVerticalInterpHistPdf2 if not isinstance(histfunc, (ROOT.CMSHistFunc, ROOT.CMSHistSum)): From 484a45ec16ab10371056433506c4600d943ac966 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Wed, 1 Nov 2023 12:22:01 -0500 Subject: [PATCH 23/24] Unique external morph name --- python/InterferenceModels.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py index c7af8204730..cfbf8221f7e 100644 --- a/python/InterferenceModels.py +++ b/python/InterferenceModels.py @@ -141,8 +141,11 @@ def done(self): histfunc = self.modelBuilder.out.function(hfname) if not histfunc: # assume this is a CMSHistSum workspace - hfname = "prop_bin{channel}".format(channel=channel) - histfunc = self.modelBuilder.out.function(hfname) + histsum_name = "prop_bin{channel}".format(channel=channel) + histfunc = self.modelBuilder.out.function(histsum_name) + # in this workspace there is no object representing the process morphing + # make one up so that funcname is still unique + hfname = "prop_bin{channel}_process{process}".format(channel=channel, process=process) # TODO: support FastVerticalInterpHistPdf2 if not isinstance(histfunc, (ROOT.CMSHistFunc, ROOT.CMSHistSum)): raise RuntimeError( From 6ca71960a4aa13eea4c537fc9bc5f358df8c9e92 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Thu, 2 Nov 2023 13:43:42 -0500 Subject: [PATCH 24/24] Sometimes coefficient has a different name --- python/InterferenceModels.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/InterferenceModels.py b/python/InterferenceModels.py index cfbf8221f7e..16d6ba3bcfe 100644 --- a/python/InterferenceModels.py +++ b/python/InterferenceModels.py @@ -172,11 +172,11 @@ def done(self): if isinstance(histfunc, ROOT.CMSHistFunc): histfunc.injectExternalMorph(func) elif isinstance(histfunc, ROOT.CMSHistSum): - coefname = "n_exp_final_bin{channel}_proc_{process}".format(channel=channel, process=process) + postfix = "bin{channel}_proc_{process}".format(channel=channel, process=process) for idx, coef in enumerate(histfunc.coefList()): - if coef.GetName() == coefname: + if coef.GetName() in ("n_exp_" + postfix, "n_exp_final_" + postfix): if self.verbose: - print("Injecting external morph for " + coefname) + print("Injecting external morph for " + funcname) histfunc.injectExternalMorph(idx, func) break