Skip to content

Commit

Permalink
Merge pull request #936 from cms-analysis/nckw_fix_Pdf_ordering
Browse files Browse the repository at this point in the history
Moved to ordered collections
  • Loading branch information
anigamova authored Apr 11, 2024
2 parents 4b5f188 + 8209cb5 commit b7dc99d
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 45 deletions.
33 changes: 17 additions & 16 deletions python/Datacard.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import print_function

import six
from collections import OrderedDict


class Datacard:
Expand All @@ -16,57 +17,57 @@ def __init__(self):
## list of [bins in datacard]
self.bins = []
## dict of {bin : number of observed events}
self.obs = {}
self.obs = OrderedDict()
## list of [processes]
self.processes = []
## list of [signal processes]
self.signals = []
## dict of {processes : boolean to indicate whether process is signal or not}
self.isSignal = {}
self.isSignal = OrderedDict()
## list of [(bin, process, boolean to indicate whether process is signal or not)]
self.keyline = []
## dict of {bin : {process : yield}}
self.exp = {}
self.exp = OrderedDict()
## list of [(name of uncert, boolean to indicate whether to float this nuisance or not, type, list of what additional arguments (e.g. for gmN), keyline element)]
self.systs = []
## list of [{bin : {process : [input file, path to shape, path to shape for uncertainty]}}]
self.shapeMap = {}
self.shapeMap = OrderedDict()
## boolean that indicates whether the datacard contains shapes or not
self.hasShapes = False
## dirct of {name of uncert, boolean to indicate whether it is a flat parametric uncertainty or not}
self.flatParamNuisances = {}
self.flatParamNuisances = OrderedDict()
## dict of rateParam, key is f"{bin}AND{process}", per bin/process they are a list
self.rateParams = {}
self.rateParams = OrderedDict()
## dict of extArgs
self.extArgs = {}
self.extArgs = OrderedDict()
## maintain the names of rate modifiers
self.rateParamsOrder = set()
## dirct of {name of uncert, boolean to indicate whether this nuisance is floating or not}
self.frozenNuisances = set()

# Allows for nuisance renaming of "shape" systematics
self.systematicsShapeMap = {}
self.systematicsShapeMap = OrderedDict()

# Allows for nuisance renaming of "param" systematics
self.systematicsParamMap = {}
self.systematicsParamMap = OrderedDict()

# Allow to pick out entry in self.systs.
self.systIDMap = {}
self.systIDMap = OrderedDict()

# Keep edits
self.nuisanceEditLines = []

# map of which bins should have automated Barlow-Beeston parameters
self.binParFlags = {}
self.binParFlags = OrderedDict()

self.groups = {}
self.groups = OrderedDict()
self.discretes = []

# list of parameters called _norm in user input workspace
self.pdfnorms = {}
self.pdfnorms = OrderedDict()

# collection of nuisances to auto-produce flat priors for
self.toCreateFlatParam = {}
self.toCreateFlatParam = OrderedDict()

def print_structure(self):
"""
Expand Down Expand Up @@ -170,7 +171,7 @@ def print_structure(self):
)

# map of which bins should have automated Barlow-Beeston parameters
self.binParFlags = {}
self.binParFlags = OrderedDict()

def list_of_bins(self):
"""
Expand Down Expand Up @@ -311,7 +312,7 @@ def renameNuisanceParameter(self, oldname, newname, process_list=[], channel_lis
for specific channels/processes, then you should specify a
process (list or leave empty for all) and channel (list or leave empty for all)
"""
existingclashes = {}
existingclashes = OrderedDict()
for lsyst, nofloat, pdf0, args0, errline0 in self.systs[:]:
if lsyst == newname: # found the nuisance exists
existingclashes[lsyst] = (nofloat, pdf0, args0, errline0)
Expand Down
16 changes: 9 additions & 7 deletions python/DatacardParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from HiggsAnalysis.CombinedLimit.Datacard import Datacard
from HiggsAnalysis.CombinedLimit.NuisanceModifier import doEditNuisance

from collections import OrderedDict

globalNuisances = re.compile("(lumi|pdf_(qqbar|gg|qg)|QCDscale_(ggH|qqH|VH|ggH1in|ggH2in|VV)|UEPS|FakeRate|CMS_(eff|fake|trigger|scale|res)_([gemtjb]|met))")


Expand Down Expand Up @@ -350,7 +352,7 @@ def parseCard(file, options):

# resetting these here to defaults, parseCard will fill them up
ret.discretes = []
ret.groups = {}
ret.groups = OrderedDict()

#
nbins = -1
Expand Down Expand Up @@ -386,7 +388,7 @@ def parseCard(file, options):
if len(f) < 4:
raise RuntimeError("Malformed shapes line")
if f[2] not in ret.shapeMap:
ret.shapeMap[f[2]] = {}
ret.shapeMap[f[2]] = OrderedDict()
if f[1] in ret.shapeMap[f[2]]:
raise RuntimeError("Duplicate definition for process '%s', channel '%s'" % (f[1], f[2]))
ret.shapeMap[f[2]][f[1]] = f[3:]
Expand All @@ -404,7 +406,7 @@ def parseCard(file, options):
if len(binline) != len(ret.obs):
raise RuntimeError("Found %d bins (%s) but %d bins have been declared" % (len(ret.bins), ret.bins, nbins))
ret.bins = binline
ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
ret.obs = OrderedDict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
binline = []
if f[0] == "bin":
binline = []
Expand Down Expand Up @@ -445,10 +447,10 @@ def parseCard(file, options):
raise RuntimeError("Found %d processes (%s), declared jmax = %d" % (len(ret.processes), ret.processes, nprocesses))
if nbins != len(ret.bins):
raise RuntimeError("Found %d bins (%s), declared imax = %d" % (len(ret.bins), ret.bins, nbins))
ret.exp = dict([(b, {}) for b in ret.bins])
ret.isSignal = dict([(p, None) for p in ret.processes])
ret.exp = OrderedDict([(b, OrderedDict()) for b in ret.bins])
ret.isSignal = OrderedDict([(p, None) for p in ret.processes])
if ret.obs != [] and type(ret.obs) == list: # still as list, must change into map with bin names
ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
ret.obs = OrderedDict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)])
for b, p, s in ret.keyline:
if ret.isSignal[p] == None:
ret.isSignal[p] = s
Expand Down Expand Up @@ -631,7 +633,7 @@ def parseCard(file, options):
raise RuntimeError(
"Malformed systematics line %s of length %d: while bins and process lines have length %d" % (lsyst, len(numbers), len(ret.keyline))
)
errline = dict([(b, {}) for b in ret.bins])
errline = OrderedDict([(b, OrderedDict()) for b in ret.bins])
nonNullEntries = 0
for (b, p, s), r in zip(ret.keyline, numbers):
if "/" in r: # "number/number"
Expand Down
14 changes: 8 additions & 6 deletions python/ModelTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import six
from six.moves import range

from collections import OrderedDict

import ROOT

ROOFIT_EXPR = "expr"
Expand Down Expand Up @@ -54,7 +56,7 @@ def __init__(self, options):
self.out = ROOT.RooWorkspace("w", "w")
# self.out.safe_import = getattr(self.out,"import") # workaround: import is a python keyword
self.out.safe_import = SafeWorkspaceImporter(self.out)
self.objstore = {}
self.objstore = OrderedDict()
self.out.dont_delete = []
if options.verbose == 0:
ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR)
Expand Down Expand Up @@ -221,7 +223,7 @@ def runPostProcesses(self):
self.out.arg(n).setConstant(True)

def doExtArgs(self):
open_files = {}
open_files = OrderedDict()
for rp in self.DC.extArgs.keys():
if self.out.arg(rp):
continue
Expand Down Expand Up @@ -277,7 +279,7 @@ def doExtArgs(self):
def doRateParams(self):
# First support external functions/parameters
# keep a map of open files/workspaces
open_files = {}
open_files = OrderedDict()

for rp in self.DC.rateParams.keys():
for rk in range(len(self.DC.rateParams[rp])):
Expand Down Expand Up @@ -808,7 +810,7 @@ def doFillNuisPdfsAndSets(self):
if p != "constr":
nuisVars.add(self.out.var(c_param_name))
setNuisPdf.append(c_param_name)
setNuisPdf = set(setNuisPdf)
setNuisPdf = list(dict.fromkeys((setNuisPdf)))
for n in setNuisPdf:
nuisPdfs.add(self.out.pdf(n + "_Pdf"))
self.out.defineSet("nuisances", nuisVars)
Expand All @@ -821,7 +823,7 @@ def doFillNuisPdfsAndSets(self):
self.out.defineSet("globalObservables", gobsVars)
else: # doesn't work for too many nuisances :-(
# avoid duplicating _Pdf in list
setNuisPdf = set([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])
setNuisPdf = list(dict.fromkeys(keywords([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])))
self.doSet("nuisances", ",".join(["%s" % self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]))
self.doObj("nuisancePdf", "PROD", ",".join(["%s_Pdf" % n for n in setNuisPdf]))
self.doSet("globalObservables", ",".join(self.globalobs))
Expand All @@ -846,7 +848,7 @@ def doAutoFlatNuisancePriors(self):

def doNuisancesGroups(self):
# Prepare a dictionary of which group a certain nuisance belongs to
groupsFor = {}
groupsFor = OrderedDict()
# existingNuisanceNames = tuple(set([syst[0] for syst in self.DC.systs]+self.DC.flatParamNuisances.keys()+self.DC.rateParams.keys()+self.DC.extArgs.keys()+self.DC.discretes))
existingNuisanceNames = self.DC.getAllVariables()
for groupName, nuisanceNames in six.iteritems(self.DC.groups):
Expand Down
6 changes: 4 additions & 2 deletions python/NuisanceModifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import sys
from math import exp, hypot, log

from collections import OrderedDict


def appendMap(tmap, k, thing):
if k in list(tmap.keys()):
Expand Down Expand Up @@ -77,7 +79,7 @@ def doAddNuisance(datacard, args):
cchannel = re.compile(channel.replace("+", r"\+"))
opts = args[5:]
found = False
errline = dict([(b, dict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
errline = OrderedDict([(b, OrderedDict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
for lsyst, nofloat, pdf0, args0, errline0 in datacard.systs:
if lsyst == name:
if pdf != pdf0:
Expand Down Expand Up @@ -226,7 +228,7 @@ def doRenameNuisance(datacard, args):
if pdf0 == "param":
continue
# for dcs in datacard.systs: print " --> ", dcs
errline2 = dict([(b, dict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
errline2 = OrderedDict([(b, OrderedDict([(p, 0) for p in datacard.exp[b]])) for b in datacard.bins])
found = False
if newname in list(datacard.systIDMap.keys()):
for id2 in datacard.systIDMap[newname]:
Expand Down
30 changes: 16 additions & 14 deletions python/ShapeTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import six
from six.moves import range

from collections import OrderedDict

import ROOT
from HiggsAnalysis.CombinedLimit.ModelTools import ModelBuilder

Expand All @@ -31,7 +33,7 @@ class FileCache:
def __init__(self, basedir, maxsize=250):
self._basedir = basedir
self._maxsize = maxsize
self._files = {}
self._files = OrderedDict()
self._hits = defaultdict(int)
self._total = 0

Expand Down Expand Up @@ -71,10 +73,10 @@ def __init__(self, datacard, options):
if options.libs:
for lib in options.libs:
ROOT.gSystem.Load(lib)
self.wspnames = {}
self.wspnames = OrderedDict()
self.wsp = None
self.extraImports = []
self.norm_rename_map = {}
self.norm_rename_map = OrderedDict()
self._fileCache = FileCache(self.options.baseDir)

## ------------------------------------------
Expand Down Expand Up @@ -468,12 +470,12 @@ def RenameDupObjs(self, dupObjs, dupNames, newObj, postFix):
## --------------------------------------
def prepareAllShapes(self):
shapeTypes = []
shapeBins = {}
shapeObs = {}
self.pdfModes = {}
shapeBins = OrderedDict()
shapeObs = OrderedDict()
self.pdfModes = OrderedDict()
for ib, b in enumerate(self.DC.bins):
databins = {}
bgbins = {}
databins = OrderedDict()
bgbins = OrderedDict()
channelBinParFlag = b in list(self.DC.binParFlags.keys())
for p in [self.options.dataname] + list(self.DC.exp[b].keys()):
if len(self.DC.obs) == 0 and p == self.options.dataname:
Expand Down Expand Up @@ -561,7 +563,7 @@ def prepareAllShapes(self):
if i not in bgbins:
stderr.write("Channel %s has bin %d filled in data but empty in all backgrounds\n" % (b, i))
if shapeTypes.count("TH1"):
self.TH1Observables = {}
self.TH1Observables = OrderedDict()
self.out.binVars = ROOT.RooArgSet()
self.out.maxbins = max([shapeBins[k] for k in shapeBins.keys()])
if self.options.optimizeTemplateBins:
Expand Down Expand Up @@ -662,7 +664,7 @@ def doCombinedDataset(self):
## -------------------------------------
## -------- Low level helpers ----------
## -------------------------------------
def getShape(self, channel, process, syst="", _cache={}, allowNoSyst=False):
def getShape(self, channel, process, syst="", _cache=OrderedDict(), allowNoSyst=False):
if (channel, process, syst) in _cache:
if self.options.verbose > 2:
print(
Expand Down Expand Up @@ -851,10 +853,10 @@ def getShape(self, channel, process, syst="", _cache={}, allowNoSyst=False):
_cache[(channel, process, syst)] = ret
return ret

def getData(self, channel, process, syst="", _cache={}):
def getData(self, channel, process, syst="", _cache=OrderedDict()):
return self.shape2Data(self.getShape(channel, process, syst), channel, process)

def getPdf(self, channel, process, _cache={}):
def getPdf(self, channel, process, _cache=OrderedDict()):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
if (channel, process) in _cache:
return _cache[(channel, process)]
Expand Down Expand Up @@ -1212,7 +1214,7 @@ def rebinH1(self, shape):
rebinh1._original_bins = shapeNbins
return rebinh1

def shape2Data(self, shape, channel, process, _cache={}):
def shape2Data(self, shape, channel, process, _cache=OrderedDict()):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
if shape == None:
name = "shape%s_%s_%s" % (postFix, channel, process)
Expand Down Expand Up @@ -1248,7 +1250,7 @@ def shape2Data(self, shape, channel, process, _cache={}):
raise RuntimeError("shape2Data not implemented for %s" % shape.ClassName())
return _cache[shape.GetName()]

def shape2Pdf(self, shape, channel, process, _cache={}):
def shape2Pdf(self, shape, channel, process, _cache=OrderedDict()):
postFix = "Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg"
channelBinParFlag = channel in list(self.DC.binParFlags.keys())
if shape == None:
Expand Down

0 comments on commit b7dc99d

Please sign in to comment.