From dfcc7dd4c89fda9dcc5f0ae10b8cf234e462b363 Mon Sep 17 00:00:00 2001 From: t7phy Date: Mon, 27 May 2024 00:05:07 +0200 Subject: [PATCH 01/30] for x-q2 map for ttb --- validphys2/src/validphys/process_options.py | 22 +++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index c895d6373c..9af5d23951 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -10,7 +10,7 @@ import numpy as np from validobj.custom import Parser -TMASS = 173.3 +TMASS = 172.5 class _Vars: @@ -189,6 +189,12 @@ def _hqp_ptq_xq2map(kin_info): Q = np.sqrt(QMASS2 + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) + kin_info[_Vars.pT_t] return Q / kin_info[_Vars.sqrts], Q * Q +def _hqp_mqq_xq2map(kin_info): + # Compute x, Q2 + QQMASS2 = (2 * TMASS) * (2 * TMASS) + Q = np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar]) + kin_info[_Vars.m_ttBar] + return Q / kin_info[_Vars.sqrts], Q * Q + def _displusjet_xq2map(kin_info): """Computes x and q2 mapping for a DIS + J (J) process @@ -252,25 +258,32 @@ def _dyncpt_xq2map(kin_info): HQP_YQ = _Process( "HQP_YQ", - "Normalized differential cross section w.r.t. absolute rapidity of t", + "(absolute) rapidity of top quark in top pair production", accepted_variables=(_Vars.y_t, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar), xq2map_function=_hqp_yq_xq2map, ) HQP_YQQ = _Process( "HQP_YQQ", - "Differential cross section w.r.t. absolute rapidity of ttBar", + "(absolute) rapidity of top quark pair in top pair production", accepted_variables=(_Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar), xq2map_function=_hqp_yqq_xq2map, ) HQP_PTQ = _Process( "HQP_PTQ", - "Normalized double differential cross section w.r.t. absolute rapidity and transverse momentum of t", + "Transverse momentum of top quark in top pair production", accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.sqrts, _Vars.m_t2), xq2map_function=_hqp_ptq_xq2map, ) +HQP_MQQ = _Process( + "HQP_MQQ", + "Invariant mass of top quark pair in top pair production", + accepted_variables=(_Vars.m_ttBar, _Vars.y_ttBar, _Vars.sqrts, _Vars.m_t2), + xq2map_function=_hqp_mqq_xq2map, +) + HERAJET = _Process( "HERAJET", @@ -304,6 +317,7 @@ def _dyncpt_xq2map(kin_info): "HQP_YQ": HQP_YQ, "HQP_YQQ": HQP_YQQ, "HQP_PTQ": HQP_PTQ, + "HQP_MQQ": HQP_MQQ, "HERAJET": HERAJET, "HERADIJET": dataclasses.replace(HERAJET, name="HERADIJET", description="DIS + jj production"), "DY_W_ETA": DY_W_ETA, From 28ded46c3abd56230d6dc7513c0c6261c651fc82 Mon Sep 17 00:00:00 2001 From: t7phy Date: Mon, 27 May 2024 00:08:24 +0200 Subject: [PATCH 02/30] BLACK --- validphys2/src/validphys/process_options.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 9af5d23951..6f36c6a457 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -24,7 +24,7 @@ class _Vars: ystar = "ystar" ydiff = "ydiff" m_jj = "m_jj" - p_T2 = "p_T2" # This one is wrong, should be pT2 + p_T2 = "p_T2" # This one is wrong, should be pT2 y_t = "y_t" y_ttBar = "y_ttBar" m_t2 = "m_t2" @@ -38,7 +38,7 @@ class _Vars: class _KinematicsInformation: """Read the 3 columns dataframe corresponding to the values set in the ``kinematic_coverage`` field into a dictionary defining the name of the variables. - + Adds the special "sqrts" key unless it is already part of the kinematic coverage. Provides a ``.get_one_of`` method that accepts any number of variables @@ -189,10 +189,14 @@ def _hqp_ptq_xq2map(kin_info): Q = np.sqrt(QMASS2 + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) + kin_info[_Vars.pT_t] return Q / kin_info[_Vars.sqrts], Q * Q + def _hqp_mqq_xq2map(kin_info): # Compute x, Q2 QQMASS2 = (2 * TMASS) * (2 * TMASS) - Q = np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar]) + kin_info[_Vars.m_ttBar] + Q = ( + np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar]) + + kin_info[_Vars.m_ttBar] + ) return Q / kin_info[_Vars.sqrts], Q * Q @@ -208,12 +212,13 @@ def _displusjet_xq2map(kin_info): x = q2 * q2 / s / (pt**2 - q2) return x, q2 + def _dywboson_xq2map(kin_dict): """ Computes x and q2 mapping for pseudo rapidity observables originating from a W boson DY process. """ - mass2 = kin_dict[_Vars.m_W2] + mass2 = kin_dict[_Vars.m_W2] sqrts = kin_dict[_Vars.sqrts] eta = kin_dict[_Vars.eta] @@ -223,6 +228,7 @@ def _dywboson_xq2map(kin_dict): x = np.concatenate((x1, x2)) return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((mass2, mass2)) + def _dyncpt_xq2map(kin_info): """ Computes x and q2 mapping for DY NC dilepton @@ -230,7 +236,7 @@ def _dyncpt_xq2map(kin_info): """ q2 = kin_info[_Vars.m_Z2] pt = kin_info[_Vars.pT] - s = kin_info[_Vars.sqrts]**2 + s = kin_info[_Vars.sqrts] ** 2 x = q2 * q2 / s / (pt**2 - q2) return x, q2 From c2d1d7018aaddcd01977d612711baf9a9662bdce Mon Sep 17 00:00:00 2001 From: t7phy Date: Mon, 27 May 2024 23:21:32 +0200 Subject: [PATCH 03/30] implement suggestions --- validphys2/src/validphys/process_options.py | 27 ++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 6f36c6a457..6bfc39d38d 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -10,8 +10,6 @@ import numpy as np from validobj.custom import Parser -TMASS = 172.5 - class _Vars: x = "x" @@ -162,7 +160,7 @@ def _hqp_yq_xq2map(kin_info): kin_info[_Vars.m_t2] = kin_info["k2"] kin_info[_Vars.sqrts] = kin_info["k3"] - mass2 = kin_info.get_one_of(_Vars.m_t2, _Vars.m_ttBar) + mass2 = kin_info[_Vars.m_t2] ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts] x1 = ratio * np.exp(kin_info[_Vars.y_t]) @@ -174,7 +172,7 @@ def _hqp_yq_xq2map(kin_info): def _hqp_yqq_xq2map(kin_info): # Compute x, Q2 - mass2 = kin_info.get_one_of(_Vars.m_t2, _Vars.m_ttBar) + mass2 = kin_info[_Vars.m_t2] ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts] x1 = ratio * np.exp(kin_info[_Vars.y_ttBar]) x2 = ratio * np.exp(-kin_info[_Vars.y_ttBar]) @@ -185,14 +183,14 @@ def _hqp_yqq_xq2map(kin_info): def _hqp_ptq_xq2map(kin_info): # Compute x, Q2 - QMASS2 = TMASS * TMASS + QMASS2 = kin_info[_Vars.m_t2] Q = np.sqrt(QMASS2 + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) + kin_info[_Vars.pT_t] return Q / kin_info[_Vars.sqrts], Q * Q def _hqp_mqq_xq2map(kin_info): # Compute x, Q2 - QQMASS2 = (2 * TMASS) * (2 * TMASS) + QQMASS2 = 4 * kin_info[_Vars.m_t2] Q = ( np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar]) + kin_info[_Vars.m_ttBar] @@ -200,6 +198,16 @@ def _hqp_mqq_xq2map(kin_info): return Q / kin_info[_Vars.sqrts], Q * Q +def _inc_xq2map(kin_info): + # Compute x, Q2 + if {"k1", "k2", "k3"} <= kin_info.keys(): + kin_info[_Vars.m_X2] = kin_info["k2"] + kin_info[_Vars.sqrts] = kin_info["k3"] + + mass2 = kin_info.get_one_of(m_W2, m_Z2, m_t2, m_X2) + return np.sqrt(mass2) / kin_info[_Vars.sqrts], mass2 + + def _displusjet_xq2map(kin_info): """Computes x and q2 mapping for a DIS + J (J) process Uses Q2 as provided by the dictionary of kinematics variables @@ -290,6 +298,12 @@ def _dyncpt_xq2map(kin_info): xq2map_function=_hqp_mqq_xq2map, ) +INC = _Process( + "INC", + "Inclusive cross section", + accepted_variables=(_Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2, _Vars.m_X2), + xq2map_function=_inc_xq2map, +) HERAJET = _Process( "HERAJET", @@ -324,6 +338,7 @@ def _dyncpt_xq2map(kin_info): "HQP_YQQ": HQP_YQQ, "HQP_PTQ": HQP_PTQ, "HQP_MQQ": HQP_MQQ, + "INC": INC, "HERAJET": HERAJET, "HERADIJET": dataclasses.replace(HERAJET, name="HERADIJET", description="DIS + jj production"), "DY_W_ETA": DY_W_ETA, From d9c67e0b4de180510e088b40abfdf26aa51f9427 Mon Sep 17 00:00:00 2001 From: t7phy Date: Tue, 28 May 2024 20:17:47 +0200 Subject: [PATCH 04/30] implement suggestions v2 --- validphys2/src/validphys/process_options.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 6bfc39d38d..313a41c508 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -201,10 +201,11 @@ def _hqp_mqq_xq2map(kin_info): def _inc_xq2map(kin_info): # Compute x, Q2 if {"k1", "k2", "k3"} <= kin_info.keys(): - kin_info[_Vars.m_X2] = kin_info["k2"] + mass2 = kin_info["k2"] kin_info[_Vars.sqrts] = kin_info["k3"] + else: + mass2 = kin_info.get_one_of(m_W2, m_Z2, m_t2) - mass2 = kin_info.get_one_of(m_W2, m_Z2, m_t2, m_X2) return np.sqrt(mass2) / kin_info[_Vars.sqrts], mass2 From c39e015a03b8b2ffe37a1366da97816577c8470e Mon Sep 17 00:00:00 2001 From: t7phy Date: Tue, 28 May 2024 20:26:05 +0200 Subject: [PATCH 05/30] fix error in cad1200 --- validphys2/src/validphys/process_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 313a41c508..88803bae9f 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -302,7 +302,7 @@ def _dyncpt_xq2map(kin_info): INC = _Process( "INC", "Inclusive cross section", - accepted_variables=(_Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2, _Vars.m_X2), + accepted_variables=(_Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2), xq2map_function=_inc_xq2map, ) From 501a19ac9c062c1209065b94d38afa31b1992ed3 Mon Sep 17 00:00:00 2001 From: t7phy Date: Tue, 28 May 2024 20:49:21 +0200 Subject: [PATCH 06/30] take into account the zero var used in inc datasets --- validphys2/src/validphys/process_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 88803bae9f..f89ecab0f1 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -302,7 +302,7 @@ def _dyncpt_xq2map(kin_info): INC = _Process( "INC", "Inclusive cross section", - accepted_variables=(_Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2), + accepted_variables=(_Vars.zero, _Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2), xq2map_function=_inc_xq2map, ) From 7d720f93bcc4071de253ee312f21efae86219da8 Mon Sep 17 00:00:00 2001 From: t7phy Date: Tue, 28 May 2024 20:58:41 +0200 Subject: [PATCH 07/30] atry to ccount for zero without using _Vars --- validphys2/src/validphys/process_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index f89ecab0f1..d5d42621f1 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -302,7 +302,7 @@ def _dyncpt_xq2map(kin_info): INC = _Process( "INC", "Inclusive cross section", - accepted_variables=(_Vars.zero, _Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2), + accepted_variables=("zero", _Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2), xq2map_function=_inc_xq2map, ) From 1e1bec273e100d4aa0838da2b40c1113b52c9c4e Mon Sep 17 00:00:00 2001 From: t7phy Date: Wed, 29 May 2024 14:07:20 +0200 Subject: [PATCH 08/30] drop support for k1,k2,k3 in rzpidity distributions --- validphys2/src/validphys/process_options.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index d5d42621f1..53a1ed897b 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -154,11 +154,6 @@ def _dijets_xq2map(kin_info): def _hqp_yq_xq2map(kin_info): - # Compute x, Q2 - if {"k1", "k2", "k3"} <= kin_info.keys(): - kin_info[_Vars.y_t] = kin_info["k1"] - kin_info[_Vars.m_t2] = kin_info["k2"] - kin_info[_Vars.sqrts] = kin_info["k3"] mass2 = kin_info[_Vars.m_t2] @@ -199,12 +194,12 @@ def _hqp_mqq_xq2map(kin_info): def _inc_xq2map(kin_info): - # Compute x, Q2 + Compute x, Q2 if {"k1", "k2", "k3"} <= kin_info.keys(): mass2 = kin_info["k2"] kin_info[_Vars.sqrts] = kin_info["k3"] else: - mass2 = kin_info.get_one_of(m_W2, m_Z2, m_t2) + mass2 = kin_info.get_one_of("m_W2", "m_Z2", "m_t2") return np.sqrt(mass2) / kin_info[_Vars.sqrts], mass2 From 91027559e20efb6e9ca0dfd0fabc4eb5780dbdaf Mon Sep 17 00:00:00 2001 From: t7phy Date: Wed, 29 May 2024 14:27:18 +0200 Subject: [PATCH 09/30] fix ttb process options --- validphys2/src/validphys/process_options.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 53a1ed897b..7e5647d874 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -154,47 +154,43 @@ def _dijets_xq2map(kin_info): def _hqp_yq_xq2map(kin_info): - + # Compute x, Q2 + # Theory predictions computed with HT/4 see 1906.06535 mass2 = kin_info[_Vars.m_t2] - ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts] x1 = ratio * np.exp(kin_info[_Vars.y_t]) x2 = ratio * np.exp(-kin_info[_Vars.y_t]) q2 = mass2 x = np.concatenate((x1, x2)) - return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) + return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) / 4 def _hqp_yqq_xq2map(kin_info): # Compute x, Q2 + # Theory predictions computed with HT/4 see 1906.06535 mass2 = kin_info[_Vars.m_t2] ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts] x1 = ratio * np.exp(kin_info[_Vars.y_ttBar]) x2 = ratio * np.exp(-kin_info[_Vars.y_ttBar]) q2 = kin_info[_Vars.m_t2] x = np.concatenate((x1, x2)) - return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) + return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) / 4 def _hqp_ptq_xq2map(kin_info): # Compute x, Q2 - QMASS2 = kin_info[_Vars.m_t2] - Q = np.sqrt(QMASS2 + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) + kin_info[_Vars.pT_t] + Q = (kin_info[_Vars.m_t2] + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) ** 0.5 / 2 return Q / kin_info[_Vars.sqrts], Q * Q def _hqp_mqq_xq2map(kin_info): # Compute x, Q2 - QQMASS2 = 4 * kin_info[_Vars.m_t2] - Q = ( - np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar]) - + kin_info[_Vars.m_ttBar] - ) + Q = kin_info[_Vars.m_ttBar] / 4 return Q / kin_info[_Vars.sqrts], Q * Q def _inc_xq2map(kin_info): - Compute x, Q2 + # Compute x, Q2 if {"k1", "k2", "k3"} <= kin_info.keys(): mass2 = kin_info["k2"] kin_info[_Vars.sqrts] = kin_info["k3"] From dd2e122eab0246d2e512f7720c7ed09bf160722c Mon Sep 17 00:00:00 2001 From: t7phy Date: Wed, 29 May 2024 14:35:56 +0200 Subject: [PATCH 10/30] sqrts -> m_t2 in kin.cov. for 2d dists --- .../new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml | 4 ++-- .../new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml | 4 ++-- .../new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml | 6 +++--- validphys2/src/validphys/process_options.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nnpdf_data/nnpdf_data/new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml index 2e0c26371a..512c6e0dd2 100644 --- a/nnpdf_data/nnpdf_data/new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml +++ b/nnpdf_data/nnpdf_data/new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml @@ -138,7 +138,7 @@ implemented_observables: data_central: data_d2Sig_dmttBar_dyttBar.yaml data_uncertainties: - uncertainties_d2Sig_dmttBar_dyttBar.yaml - kinematic_coverage: [y_ttBar, m_ttBar, sqrts] + kinematic_coverage: [y_ttBar, m_ttBar, m_t2] plotting: dataset_label: 'ATLAS 13 TeV top quark pair in hadronic channel: $\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$' kinematics_override: identity @@ -167,7 +167,7 @@ implemented_observables: data_central: data_d2Sig_dmttBar_dyttBar_norm.yaml data_uncertainties: - uncertainties_d2Sig_dmttBar_dyttBar_norm.yaml - kinematic_coverage: [y_ttBar, m_ttBar, sqrts] + kinematic_coverage: [y_ttBar, m_ttBar, m_t2] plotting: dataset_label: 'ATLAS 13 TeV top quark pair in hadronic channel: $\frac{1}{\sigma}\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$' kinematics_override: identity diff --git a/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml index 8ccf38a40c..473890ebd9 100644 --- a/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml +++ b/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml @@ -140,7 +140,7 @@ implemented_observables: data_central: data_d2Sig_dyttBar_dmttBar.yaml data_uncertainties: - uncertainties_d2Sig_dyttBar_dmttBar.yaml - kinematic_coverage: [y_ttBar, m_ttBar, sqrts] + kinematic_coverage: [y_ttBar, m_ttBar, m_t2] plotting: dataset_label: 'CMS 13 TeV top quark pair l+j channel: $\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$' kinematics_override: identity @@ -170,7 +170,7 @@ implemented_observables: data_central: data_d2Sig_dyttBar_dmttBar_norm.yaml data_uncertainties: - uncertainties_d2Sig_dyttBar_dmttBar_norm.yaml - kinematic_coverage: [y_ttBar, m_ttBar, sqrts] + kinematic_coverage: [y_ttBar, m_ttBar, m_t2] plotting: dataset_label: 'CMS 13 TeV top quark pair l+j channel: $\frac{1}{\sigma}\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$' kinematics_override: identity diff --git a/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml index 8ccc833e2c..165913f197 100644 --- a/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml +++ b/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml @@ -47,7 +47,7 @@ implemented_observables: kinematic_coverage: - pT_t - y_t - - sqrts + - m_t2 plotting: dataset_label: 'CMS TTB 8 TeV: $\frac{1}{\sigma}\frac{d^2\sigma}{d|y_{t}|dpT_{t}}$' kinematics_override: identity @@ -95,7 +95,7 @@ implemented_observables: kinematic_coverage: - y_t - m_ttBar - - sqrts + - m_t2 plotting: dataset_label: 'CMS TTB 8 TeV: $\frac{1}{\sigma}\frac{d^2\sigma}{d|y_{t}|dm_{t\bar{t}}}$' kinematics_override: identity @@ -154,7 +154,7 @@ implemented_observables: kinematic_coverage: - y_ttBar - m_ttBar - - sqrts + - m_t2 plotting: dataset_label: 'CMS TTB 8 TeV: $\frac{1}{\sigma}\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$' kinematics_override: identity diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 7e5647d874..a0f523bc04 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -286,7 +286,7 @@ def _dyncpt_xq2map(kin_info): HQP_MQQ = _Process( "HQP_MQQ", "Invariant mass of top quark pair in top pair production", - accepted_variables=(_Vars.m_ttBar, _Vars.y_ttBar, _Vars.sqrts, _Vars.m_t2), + accepted_variables=(_Vars.m_ttBar, _Vars.y_t, _Vars.y_ttBar, _Vars.sqrts, _Vars.m_t2), xq2map_function=_hqp_mqq_xq2map, ) From c068eaee02b39db64845bdb86544154ed253b075 Mon Sep 17 00:00:00 2001 From: Tanishq Sharma <99052355+t7phy@users.noreply.github.com> Date: Wed, 29 May 2024 16:36:44 +0200 Subject: [PATCH 11/30] Update validphys2/src/validphys/process_options.py Co-authored-by: Juan M. Cruz-Martinez --- validphys2/src/validphys/process_options.py | 1 - 1 file changed, 1 deletion(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index a0f523bc04..4d1aed2a5e 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -193,7 +193,6 @@ def _inc_xq2map(kin_info): # Compute x, Q2 if {"k1", "k2", "k3"} <= kin_info.keys(): mass2 = kin_info["k2"] - kin_info[_Vars.sqrts] = kin_info["k3"] else: mass2 = kin_info.get_one_of("m_W2", "m_Z2", "m_t2") From 5eca1d00d3a3b328c574029603548ec0ed3ab85a Mon Sep 17 00:00:00 2001 From: Tanishq Sharma <99052355+t7phy@users.noreply.github.com> Date: Thu, 30 May 2024 12:30:09 +0200 Subject: [PATCH 12/30] Update validphys2/src/validphys/process_options.py Co-authored-by: Juan M. Cruz-Martinez --- validphys2/src/validphys/process_options.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 4d1aed2a5e..1eca6e6179 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -191,10 +191,8 @@ def _hqp_mqq_xq2map(kin_info): def _inc_xq2map(kin_info): # Compute x, Q2 - if {"k1", "k2", "k3"} <= kin_info.keys(): - mass2 = kin_info["k2"] - else: - mass2 = kin_info.get_one_of("m_W2", "m_Z2", "m_t2") + # k2 necessary to take the mass for DY inclusive cross sections still not migrated + mass2 = kin_info.get_one_of(_Vars.m_W2, _Vars.m_Z2, _Vars.m_t2, "k2") return np.sqrt(mass2) / kin_info[_Vars.sqrts], mass2 From 594c90716381057324c92533acb0b40e1c5a38eb Mon Sep 17 00:00:00 2001 From: Tanishq Sharma <99052355+t7phy@users.noreply.github.com> Date: Sun, 2 Jun 2024 12:42:39 +0200 Subject: [PATCH 13/30] Update validphys2/src/validphys/process_options.py Co-authored-by: Juan M. Cruz-Martinez --- validphys2/src/validphys/process_options.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 1eca6e6179..2e0ec2da4a 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -155,7 +155,8 @@ def _dijets_xq2map(kin_info): def _hqp_yq_xq2map(kin_info): # Compute x, Q2 - # Theory predictions computed with HT/4 see 1906.06535 + # Theory predictions computed with HT/4 ~ mt/2 for rapidity distr. + # see section 3 from 1906.06535 mass2 = kin_info[_Vars.m_t2] ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts] x1 = ratio * np.exp(kin_info[_Vars.y_t]) From c85c2969d145445c758ca048a8b164b0b30b68c4 Mon Sep 17 00:00:00 2001 From: t7phy Date: Mon, 3 Jun 2024 21:39:34 +0200 Subject: [PATCH 14/30] impl suggestions --- validphys2/src/validphys/process_options.py | 34 +++++++-------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index 2e0ec2da4a..deb7ef87ec 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -155,24 +155,14 @@ def _dijets_xq2map(kin_info): def _hqp_yq_xq2map(kin_info): # Compute x, Q2 + # # Theory predictions computed with HT/4 ~ mt/2 for rapidity distr. # see section 3 from 1906.06535 - mass2 = kin_info[_Vars.m_t2] + # HT defined in Eqn. (1) of 1611.08609 + rapidity = kin_info.get_one_of(_Vars.y_t, _Vars.y_ttBar) ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts] - x1 = ratio * np.exp(kin_info[_Vars.y_t]) - x2 = ratio * np.exp(-kin_info[_Vars.y_t]) - q2 = mass2 - x = np.concatenate((x1, x2)) - return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) / 4 - - -def _hqp_yqq_xq2map(kin_info): - # Compute x, Q2 - # Theory predictions computed with HT/4 see 1906.06535 - mass2 = kin_info[_Vars.m_t2] - ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts] - x1 = ratio * np.exp(kin_info[_Vars.y_ttBar]) - x2 = ratio * np.exp(-kin_info[_Vars.y_ttBar]) + x1 = ratio * np.exp(rapidity) + x2 = ratio * np.exp(-rapidity) q2 = kin_info[_Vars.m_t2] x = np.concatenate((x1, x2)) return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) / 4 @@ -180,12 +170,17 @@ def _hqp_yqq_xq2map(kin_info): def _hqp_ptq_xq2map(kin_info): # Compute x, Q2 + # + # At LO pt ~ ptb + # ht = 2.*sqrt(m_t2 + pT_t2) Q = (kin_info[_Vars.m_t2] + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) ** 0.5 / 2 return Q / kin_info[_Vars.sqrts], Q * Q def _hqp_mqq_xq2map(kin_info): # Compute x, Q2 + # + # Theory predictions computed with HT/4 ~ m_ttbar/4 Q = kin_info[_Vars.m_ttBar] / 4 return Q / kin_info[_Vars.sqrts], Q * Q @@ -267,13 +262,6 @@ def _dyncpt_xq2map(kin_info): xq2map_function=_hqp_yq_xq2map, ) -HQP_YQQ = _Process( - "HQP_YQQ", - "(absolute) rapidity of top quark pair in top pair production", - accepted_variables=(_Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar), - xq2map_function=_hqp_yqq_xq2map, -) - HQP_PTQ = _Process( "HQP_PTQ", "Transverse momentum of top quark in top pair production", @@ -325,7 +313,7 @@ def _dyncpt_xq2map(kin_info): "JET": JET, "DIJET": DIJET, "HQP_YQ": HQP_YQ, - "HQP_YQQ": HQP_YQQ, + "HQP_YQQ": dataclasses.replace(HQP_YQ, name="HQP_YQQ"), "HQP_PTQ": HQP_PTQ, "HQP_MQQ": HQP_MQQ, "INC": INC, From 7797d0fa4da90590c6d5af2c785ae87e243f588d Mon Sep 17 00:00:00 2001 From: t7phy Date: Mon, 3 Jun 2024 21:43:13 +0200 Subject: [PATCH 15/30] accepted vars modified --- validphys2/src/validphys/process_options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py index deb7ef87ec..a06ec15baa 100644 --- a/validphys2/src/validphys/process_options.py +++ b/validphys2/src/validphys/process_options.py @@ -258,14 +258,14 @@ def _dyncpt_xq2map(kin_info): HQP_YQ = _Process( "HQP_YQ", "(absolute) rapidity of top quark in top pair production", - accepted_variables=(_Vars.y_t, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar), + accepted_variables=(_Vars.y_t, _Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar, _Vars.pT_t), xq2map_function=_hqp_yq_xq2map, ) HQP_PTQ = _Process( "HQP_PTQ", "Transverse momentum of top quark in top pair production", - accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.sqrts, _Vars.m_t2), + accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.y_ttBar, _Vars.sqrts, _Vars.m_t2), xq2map_function=_hqp_ptq_xq2map, ) From 7505b5f4181f7114a810311e94980dbc4b320af0 Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Tue, 4 Jun 2024 09:37:01 +0200 Subject: [PATCH 16/30] remove special case from tests --- validphys2/src/validphys/tests/test_datafiles.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/validphys2/src/validphys/tests/test_datafiles.py b/validphys2/src/validphys/tests/test_datafiles.py index 05b3246a89..37bebeaa20 100644 --- a/validphys2/src/validphys/tests/test_datafiles.py +++ b/validphys2/src/validphys/tests/test_datafiles.py @@ -49,10 +49,7 @@ def test_all_datasets(dataset_name): # and if so, check that the process_type is not simply a string kin_override = main_cd.metadata.plotting.kinematics_override if isinstance(kin_override, kintransform_identity) and isinstance(process_type, str): - # Skip for the time being the processes for which there is no implementation but have been - # merged to master: issue #1991 - if process_type not in ("HQP_MQQ", "INC"): - raise NotImplementedError(f"The {process_type=} is not implemented in process_options") + raise NotImplementedError(f"The {process_type=} is not implemented in process_options") elif not isinstance(process_type, str): if not process_type.are_accepted_variables(kin_cov): From ea77eaa1490e1a4fe46d7669996b4c9b40a54a27 Mon Sep 17 00:00:00 2001 From: Aron Date: Wed, 20 Mar 2024 11:12:12 +0100 Subject: [PATCH 17/30] Change default replica statistic from average to average over 90% best replicas --- n3fit/src/n3fit/hyper_optimization/rewards.py | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py index b51cf7f629..8494874740 100644 --- a/n3fit/src/n3fit/hyper_optimization/rewards.py +++ b/n3fit/src/n3fit/hyper_optimization/rewards.py @@ -44,6 +44,28 @@ log = logging.getLogger(__name__) +def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = 0) -> float: + """ + Compute the average of the input array along the specified axis, among the best `percentage` + of replicas. + + Parameters + ---------- + fold_losses: np.ndarray + Input array. + float: The percentage of best replicas to take into account. + axis: int, optional + Axis along which the mean is computed. Default is 0. + + Returns + ------- + float: The average along the specified axis. + """ + sorted_losses = np.sort(fold_losses, axis=axis) + best_losses = sorted_losses[: int(percentage * len(sorted_losses))] + return np.average(best_losses, axis=axis).item() + + def _average(fold_losses: np.ndarray, axis: int = 0) -> float: """ Compute the average of the input array along the specified axis. @@ -98,7 +120,12 @@ def _std(fold_losses: np.ndarray, axis: int = 0) -> float: return np.std(fold_losses, axis=axis).item() -IMPLEMENTED_STATS = {"average": _average, "best_worst": _best_worst, "std": _std} +IMPLEMENTED_STATS = { + "average": _average, + "average_best": _average_best, + "best_worst": _best_worst, + "std": _std, +} IMPLEMENTED_LOSSES = ["chi2", "phi2"] @@ -130,12 +157,15 @@ class HyperLoss: def __init__( self, loss_type: str = None, replica_statistic: str = None, fold_statistic: str = None ): - self._default_statistic = "average" self._default_loss = "chi2" self.loss_type = self._parse_loss(loss_type) - self.reduce_over_replicas = self._parse_statistic(replica_statistic, "replica_statistic") - self.reduce_over_folds = self._parse_statistic(fold_statistic, "fold_statistic") + self.reduce_over_replicas = self._parse_statistic( + replica_statistic, "replica_statistic", default="average_best" + ) + self.reduce_over_folds = self._parse_statistic( + fold_statistic, "fold_statistic", default="average" + ) self.phi_vector = [] self.chi2_matrix = [] @@ -280,7 +310,7 @@ def _parse_loss(self, loss_type: str) -> str: return loss_type - def _parse_statistic(self, statistic: str, name: str) -> Callable: + def _parse_statistic(self, statistic: str, name: str, default: str) -> Callable: """ Parse the statistic and return the default if None. @@ -304,7 +334,7 @@ def _parse_statistic(self, statistic: str, name: str) -> Callable: For loss type equal to phi2, the applied fold statistics is always the reciprocal of the selected stats. """ if statistic is None: - statistic = self._default_statistic + statistic = default log.warning(f"No {name} selected in HyperLoss, defaulting to {statistic}") else: if statistic not in IMPLEMENTED_STATS: From 761b5f1f39065225a4bb9d8828130951f861904c Mon Sep 17 00:00:00 2001 From: Aron Date: Tue, 2 Apr 2024 11:47:21 +0200 Subject: [PATCH 18/30] Round up rather than down for number of best replicas --- n3fit/src/n3fit/hyper_optimization/rewards.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py index 8494874740..e820cd5993 100644 --- a/n3fit/src/n3fit/hyper_optimization/rewards.py +++ b/n3fit/src/n3fit/hyper_optimization/rewards.py @@ -62,7 +62,8 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = float: The average along the specified axis. """ sorted_losses = np.sort(fold_losses, axis=axis) - best_losses = sorted_losses[: int(percentage * len(sorted_losses))] + num_best = int(np.ceil(percentage * len(sorted_losses))) + best_losses = sorted_losses[:num_best] return np.average(best_losses, axis=axis).item() From 666b95e355d51f86d587d69336be9d40d50ae14a Mon Sep 17 00:00:00 2001 From: Aron Date: Tue, 2 Apr 2024 11:48:03 +0200 Subject: [PATCH 19/30] Make sure seed is int --- n3fit/src/n3fit/backends/keras_backend/multi_dense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py index df579cc846..6d789de90e 100644 --- a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py +++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py @@ -187,7 +187,7 @@ def __call__(self, shape, dtype=None, **kwargs): per_replica_weights = [] for replica_seed in self.replica_seeds: if "seed" in self.initializer_config: - self.initializer_config["seed"] = self.base_seed + replica_seed + self.initializer_config["seed"] = int(self.base_seed + replica_seed) single_initializer = self.initializer_class.from_config(self.initializer_config) per_replica_weights.append(single_initializer(shape, dtype, **kwargs)) From 8ba460fabd1e342e6df39f62b2ec48d16f4f8625 Mon Sep 17 00:00:00 2001 From: Aron Date: Tue, 2 Apr 2024 13:01:02 +0200 Subject: [PATCH 20/30] Add warnings on replicas not passing --- n3fit/src/n3fit/model_trainer.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 0074a5692f..884733787a 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -744,15 +744,23 @@ def _train_and_fit(self, training_model, stopping_object, epochs=100) -> bool: update_freq=PUSH_INTEGRABILITY_EACH, ) - training_model.perform_fit( + loss_dict = training_model.perform_fit( epochs=epochs, verbose=False, callbacks=self.callbacks + [callback_st, callback_pos, callback_integ], ) + training_losses = loss_dict['loss'] + + if np.isnan(training_losses).any(): + log.warning(f"{np.isnan(training_losses).sum()} replicas have NaN losses") + + passed_replicas = [bool(i) for i in stopping_object.e_best_chi2] + if not all(passed_replicas): + log.warning(f"{len(passed_replicas) - sum(passed_replicas)} replicas have not passed") # TODO: in order to use multireplica in hyperopt is is necessary to define what "passing" means # for now consider the run as good if any replica passed - fit_has_passed = any(bool(i) for i in stopping_object.e_best_chi2) + fit_has_passed = any(passed_replicas) return fit_has_passed def _hyperopt_override(self, params): From 029f869216a433d02b4bcc5c03ed1f81c7c65988 Mon Sep 17 00:00:00 2001 From: Aron Date: Wed, 3 Apr 2024 12:24:10 +0200 Subject: [PATCH 21/30] Add selection based on average best loss --- n3fit/src/n3fit/hyper_optimization/rewards.py | 10 ++++-- n3fit/src/n3fit/model_trainer.py | 32 ++++++++----------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py index e820cd5993..fd9a0d965f 100644 --- a/n3fit/src/n3fit/hyper_optimization/rewards.py +++ b/n3fit/src/n3fit/hyper_optimization/rewards.py @@ -52,8 +52,9 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = Parameters ---------- fold_losses: np.ndarray - Input array. - float: The percentage of best replicas to take into account. + Per replica losses for a single fold. + percentage: float + The percentage of best replicas to take into account (rounded up). axis: int, optional Axis along which the mean is computed. Default is 0. @@ -61,8 +62,11 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = ------- float: The average along the specified axis. """ - sorted_losses = np.sort(fold_losses, axis=axis) num_best = int(np.ceil(percentage * len(sorted_losses))) + + if np.isnan(fold_losses).any(): + log.warning(f"{np.isnan(fold_losses).sum()} replicas have NaNs losses") + sorted_losses = np.sort(fold_losses, axis=axis) best_losses = sorted_losses[:num_best] return np.average(best_losses, axis=axis).item() diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 884733787a..65b148f0a8 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -744,24 +744,11 @@ def _train_and_fit(self, training_model, stopping_object, epochs=100) -> bool: update_freq=PUSH_INTEGRABILITY_EACH, ) - loss_dict = training_model.perform_fit( + training_model.perform_fit( epochs=epochs, verbose=False, callbacks=self.callbacks + [callback_st, callback_pos, callback_integ], ) - training_losses = loss_dict['loss'] - - if np.isnan(training_losses).any(): - log.warning(f"{np.isnan(training_losses).sum()} replicas have NaN losses") - - passed_replicas = [bool(i) for i in stopping_object.e_best_chi2] - if not all(passed_replicas): - log.warning(f"{len(passed_replicas) - sum(passed_replicas)} replicas have not passed") - - # TODO: in order to use multireplica in hyperopt is is necessary to define what "passing" means - # for now consider the run as good if any replica passed - fit_has_passed = any(passed_replicas) - return fit_has_passed def _hyperopt_override(self, params): """Unrolls complicated hyperopt structures into very simple dictionaries""" @@ -992,13 +979,9 @@ def hyperparametrizable(self, params): for model in models.values(): model.compile(**params["optimizer"]) - passed = self._train_and_fit(models["training"], stopping_object, epochs=epochs) + self._train_and_fit(models["training"], stopping_object, epochs=epochs) if self.mode_hyperopt: - if not passed: - log.info("Hyperparameter combination fail to find a good fit, breaking") - break - validation_loss = stopping_object.vl_chi2 # number of active points in this fold @@ -1018,6 +1001,15 @@ def hyperparametrizable(self, params): for penalty in self.hyper_penalties } + hyper_loss_per_replica = experimental_loss + sum(penalties.values()) + fold_loss = self._hyper_loss.reduce_over_replicas(hyper_loss_per_replica) + passed = fold_loss < self.hyper_threshold + if not passed: + log.info( + f"Hyperparameter combination failed to find a good fit (loss={fold_loss} > {self.hyper_threshold})" + ) + break + # Extracting the necessary data to compute phi # First, create a list of `validphys.core.DataGroupSpec` # containing only exp datasets within the held out fold @@ -1105,5 +1097,7 @@ def hyperparametrizable(self, params): # In a normal run, the only information we need to output is the stopping object # (which contains metadata about the stopping) # and the pdf model (which are used to generate the PDF grids and compute arclengths) + if not self.mode_hyperopt: + passed = any(bool(i) for i in stopping_object.e_best_chi2) dict_out = {"status": passed, "stopping_object": stopping_object, "pdf_model": pdf_model} return dict_out From abefcea9a3a6f6eca6e3b0c1e22c3cf6765befe7 Mon Sep 17 00:00:00 2001 From: Aron Date: Wed, 3 Apr 2024 13:06:54 +0200 Subject: [PATCH 22/30] bugfix --- n3fit/src/n3fit/hyper_optimization/rewards.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py index fd9a0d965f..ae49a4559c 100644 --- a/n3fit/src/n3fit/hyper_optimization/rewards.py +++ b/n3fit/src/n3fit/hyper_optimization/rewards.py @@ -62,7 +62,7 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = ------- float: The average along the specified axis. """ - num_best = int(np.ceil(percentage * len(sorted_losses))) + num_best = int(np.ceil(percentage * len(fold_losses))) if np.isnan(fold_losses).any(): log.warning(f"{np.isnan(fold_losses).sum()} replicas have NaNs losses") From cbce54ad588969904c806d8f603ab5c85b4345d2 Mon Sep 17 00:00:00 2001 From: Aron Date: Wed, 3 Apr 2024 13:57:07 +0200 Subject: [PATCH 23/30] increase hyper threshold in quickcard --- n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml b/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml index f3387ef211..fe53ccf44b 100644 --- a/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml +++ b/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml @@ -62,7 +62,7 @@ kfold: - saturation - patience - integrability - threshold: 2.0 + threshold: 1e3 partitions: - datasets: - NMC_NC_NOTFIXED_P_EM-SIGMARED From 23a765401c64487b8ceba21eb52cda6a371e2410 Mon Sep 17 00:00:00 2001 From: Aron Date: Tue, 23 Apr 2024 09:16:52 +0200 Subject: [PATCH 24/30] Use average_best replica statistic in hyperopt runcard --- n3fit/runcards/hyperopt_studies/renew_hyperopt.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml b/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml index f4c5487704..8c17b6ecb9 100644 --- a/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml +++ b/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml @@ -161,7 +161,7 @@ hyperscan_config: kfold: loss_type: chi2 - replica_statistic: average + replica_statistic: average_best fold_statistic: average penalties: - saturation From 23df9b5bea50101b685697f430e0bd73abd4b64b Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 30 May 2024 11:04:18 +0200 Subject: [PATCH 25/30] apply own review comments: remove double check of hyper loss; percentage -> proportion; update docstr --- n3fit/src/n3fit/hyper_optimization/rewards.py | 40 ++++++++++--------- n3fit/src/n3fit/model_trainer.py | 15 ++----- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py index ae49a4559c..50ceabaa85 100644 --- a/n3fit/src/n3fit/hyper_optimization/rewards.py +++ b/n3fit/src/n3fit/hyper_optimization/rewards.py @@ -44,17 +44,17 @@ log = logging.getLogger(__name__) -def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = 0) -> float: +def _average_best(fold_losses: np.ndarray, proportion: float = 0.9, axis: int = 0) -> float: """ - Compute the average of the input array along the specified axis, among the best `percentage` + Compute the average of the input array along the specified axis, among the best `proportion` of replicas. Parameters ---------- fold_losses: np.ndarray Per replica losses for a single fold. - percentage: float - The percentage of best replicas to take into account (rounded up). + proportion: float + The proportion of best replicas to take into account (rounded up). axis: int, optional Axis along which the mean is computed. Default is 0. @@ -62,13 +62,14 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = ------- float: The average along the specified axis. """ - num_best = int(np.ceil(percentage * len(fold_losses))) + # TODO: use directly `validphys.fitveto.determine_vetoes` + num_best = int(np.ceil(proportion * len(fold_losses))) if np.isnan(fold_losses).any(): log.warning(f"{np.isnan(fold_losses).sum()} replicas have NaNs losses") sorted_losses = np.sort(fold_losses, axis=axis) best_losses = sorted_losses[:num_best] - return np.average(best_losses, axis=axis).item() + return _average(best_losses, axis=axis) def _average(fold_losses: np.ndarray, axis: int = 0) -> float: @@ -146,6 +147,13 @@ class HyperLoss: Computes the statistic over the replicas and then over the folds, both statistics default to the average. + The ``compute_loss`` method saves intermediate metrics such as the + chi2 of the folds or the phi regardless of the loss type that has been selected. + These metrics are saved in the properties + ``phi_vector``: list of phi per fold + ``chi2_matrix``: list of chi2 per fold, per replica + + Parameters ---------- loss_type: str @@ -304,12 +312,10 @@ def _parse_loss(self, loss_type: str) -> str: if loss_type is None: loss_type = self._default_loss log.warning(f"No loss_type selected in HyperLoss, defaulting to {loss_type}") - else: - if loss_type not in IMPLEMENTED_LOSSES: - valid_options = ", ".join(IMPLEMENTED_LOSSES) - raise ValueError( - f"Invalid loss type '{loss_type}'. Valid options are: {valid_options}" - ) + + if loss_type not in IMPLEMENTED_LOSSES: + valid_options = ", ".join(IMPLEMENTED_LOSSES) + raise ValueError(f"Invalid loss type '{loss_type}'. Valid options are: {valid_options}") log.info(f"Setting '{loss_type}' as the loss type for hyperoptimization") @@ -341,12 +347,10 @@ def _parse_statistic(self, statistic: str, name: str, default: str) -> Callable: if statistic is None: statistic = default log.warning(f"No {name} selected in HyperLoss, defaulting to {statistic}") - else: - if statistic not in IMPLEMENTED_STATS: - valid_options = ", ".join(IMPLEMENTED_STATS.keys()) - raise ValueError( - f"Invalid {name} '{statistic}'. Valid options are: {valid_options}" - ) + + if statistic not in IMPLEMENTED_STATS: + valid_options = ", ".join(IMPLEMENTED_STATS.keys()) + raise ValueError(f"Invalid {name} '{statistic}'. Valid options are: {valid_options}") log.info(f"Using '{statistic}' as the {name} for hyperoptimization") diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 65b148f0a8..14a8b5bcd0 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -1001,15 +1001,6 @@ def hyperparametrizable(self, params): for penalty in self.hyper_penalties } - hyper_loss_per_replica = experimental_loss + sum(penalties.values()) - fold_loss = self._hyper_loss.reduce_over_replicas(hyper_loss_per_replica) - passed = fold_loss < self.hyper_threshold - if not passed: - log.info( - f"Hyperparameter combination failed to find a good fit (loss={fold_loss} > {self.hyper_threshold})" - ) - break - # Extracting the necessary data to compute phi # First, create a list of `validphys.core.DataGroupSpec` # containing only exp datasets within the held out fold @@ -1024,8 +1015,6 @@ def hyperparametrizable(self, params): fold_idx=k, ) - log.info("Fold %d finished, loss=%.1f, pass=%s", k + 1, hyper_loss, passed) - # Create another list of `validphys.core.DataGroupSpec` # containing now exp datasets that are included in the training/validation dataset trvl_partitions = list(self.kpartitions) @@ -1054,7 +1043,11 @@ def hyperparametrizable(self, params): # Apply a penalty proportional to the number of folds not computed pen_mul = len(self.kpartitions) - k l_hyper = [i * pen_mul for i in l_hyper] + passed = False break + else: + passed = True + log.info("Fold %d finished, loss=%.1f, pass=%s", k + 1, hyper_loss, passed) # endfor From 011bdf8e9363f0f18c2a00519012188e610fc76a Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 30 May 2024 12:15:36 +0200 Subject: [PATCH 26/30] dont include penalties by default in the calculation of the loss --- n3fit/src/n3fit/hyper_optimization/rewards.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py index 50ceabaa85..b0b4aba767 100644 --- a/n3fit/src/n3fit/hyper_optimization/rewards.py +++ b/n3fit/src/n3fit/hyper_optimization/rewards.py @@ -192,10 +192,14 @@ def compute_loss( pdf_model: MetaModel, experimental_data: List[DataGroupSpec], fold_idx: int = 0, + include_penalties=False, ) -> float: """ Compute the loss, including added penalties, for a single fold. + Save the phi of the assemble and the chi2 of the separate replicas, + and the penalties into the ``phi_vector``, ``chi2_matrix`` and ``penalties`` attributes. + Parameters ---------- penalties: Dict[str, NDArray(replicas)] @@ -210,6 +214,8 @@ def compute_loss( List of tuples containing `validphys.core.DataGroupSpec` instances for each group data set fold_idx: int k-fold index. Defaults to 0. + include_penalties: float + Whether to include the penalties in the returned loss value Returns ------- @@ -238,17 +244,19 @@ def compute_loss( # these are saved in the phi_vector and chi2_matrix attributes, excluding penalties self._save_hyperopt_metrics(phi_per_fold, experimental_loss, penalties, fold_idx) - # include penalties to experimental loss - # this allows introduction of statistics also to penalties - experimental_loss_w_penalties = experimental_loss + sum(penalties.values()) + # Prepare the output loss, including penalties if necessary + + if include_penalties: + # include penalties to experimental loss + experimental_loss += sum(penalties.values()) - # add penalties to phi in the form of a sum of per-replicas averages - phi_per_fold += sum(np.mean(penalty) for penalty in penalties.values()) + # add penalties to phi in the form of a sum of per-replicas averages + phi_per_fold += sum(np.mean(penalty) for penalty in penalties.values()) # define loss for hyperopt according to the chosen loss_type if self.loss_type == "chi2": # calculate statistics of chi2 over replicas for a given k-fold - loss = self.reduce_over_replicas(experimental_loss_w_penalties) + loss = self.reduce_over_replicas(experimental_loss) elif self.loss_type == "phi2": loss = phi_per_fold**2 From 456c6cfce0c9625cdbc016c0a36a5e8e33c98d6c Mon Sep 17 00:00:00 2001 From: juacrumar Date: Fri, 31 May 2024 09:33:59 +0200 Subject: [PATCH 27/30] add `kfold::penalties_in_loss: bool` as a runcard option --- n3fit/src/n3fit/hyper_optimization/rewards.py | 13 +++++++++---- n3fit/src/n3fit/model_trainer.py | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py index b0b4aba767..bba8057d89 100644 --- a/n3fit/src/n3fit/hyper_optimization/rewards.py +++ b/n3fit/src/n3fit/hyper_optimization/rewards.py @@ -165,12 +165,19 @@ class HyperLoss: fold_statistic: str the statistic over the folds to use. Options are "average", "best_worst", and "std". + penalties_in_loss: bool + whether the penalties should be included in the output of ``compute_loss`` """ def __init__( - self, loss_type: str = None, replica_statistic: str = None, fold_statistic: str = None + self, + loss_type: str = None, + replica_statistic: str = None, + fold_statistic: str = None, + penalties_in_loss: bool = False, ): self._default_loss = "chi2" + self._penalties_in_loss = penalties_in_loss self.loss_type = self._parse_loss(loss_type) self.reduce_over_replicas = self._parse_statistic( @@ -192,7 +199,6 @@ def compute_loss( pdf_model: MetaModel, experimental_data: List[DataGroupSpec], fold_idx: int = 0, - include_penalties=False, ) -> float: """ Compute the loss, including added penalties, for a single fold. @@ -245,8 +251,7 @@ def compute_loss( self._save_hyperopt_metrics(phi_per_fold, experimental_loss, penalties, fold_idx) # Prepare the output loss, including penalties if necessary - - if include_penalties: + if self._penalties_in_loss: # include penalties to experimental loss experimental_loss += sum(penalties.values()) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 14a8b5bcd0..df2e0be917 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -192,6 +192,7 @@ def __init__( loss_type=loss_type, replica_statistic=replica_statistic, fold_statistic=fold_statistic, + penalties_in_loss=kfold_parameters.get("penalties_in_loss", False), ) # Initialize the dictionaries which contain all fitting information From 335c8351669c53740d3c6b87576c7f188881e7db Mon Sep 17 00:00:00 2001 From: juacrumar Date: Sun, 2 Jun 2024 12:25:41 +0200 Subject: [PATCH 28/30] add docs for the `penalties_in_loss` key --- doc/sphinx/source/n3fit/hyperopt.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/sphinx/source/n3fit/hyperopt.rst b/doc/sphinx/source/n3fit/hyperopt.rst index 7c7ba903e9..7de6c5c894 100644 --- a/doc/sphinx/source/n3fit/hyperopt.rst +++ b/doc/sphinx/source/n3fit/hyperopt.rst @@ -401,8 +401,9 @@ In NNPDF, this hyperoptimisation metrics is selected via the following generic r kfold: loss_type: chi2 - replica_statistic: average + replica_statistic: average_best fold_statistic: average + penalties_in_loss: False partitions: - datasets: ... @@ -411,6 +412,16 @@ In NNPDF, this hyperoptimisation metrics is selected via the following generic r parallel_models: true + +The key ``replica_statistic`` defines how to combine all replicas when perform a multireplica hyperopt. +With ``average`` a simple average will be taken, ``average_best`` instead will take the 90% best replicas, +mimicking what is done in a real post-fit selection. + +The ``fold_statistic`` instead defines how to combine the loss of the different folds. +While the values for the ``penalties`` are always saved during the hyperopt run, by default they are not +considered by the hyoperoptimizaton algorithm. +If they are to be considered the key ``penalties_in_loss`` needs to be set to ``True``. + By combining the ``average``, ``best_worst``, and ``std`` figures of merit discussed in :ref:`hyperkfolding-label`, several alternatives may arise. For example, one approach could involve minimizing the maximum value of the set of averaged-over-replicas :math:`\chi^2`, From 7b977652df98ad880e2ade860854a2ede4bcc0ba Mon Sep 17 00:00:00 2001 From: juacrumar Date: Mon, 3 Jun 2024 11:58:30 +0200 Subject: [PATCH 29/30] remove comment about gpu not being compatible with hyperopt :) --- doc/sphinx/source/n3fit/runcard_detailed.rst | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst index 58aa9b94b6..8f31d98af0 100644 --- a/doc/sphinx/source/n3fit/runcard_detailed.rst +++ b/doc/sphinx/source/n3fit/runcard_detailed.rst @@ -319,10 +319,9 @@ Running in parallel can be quite hard on memory and it is only advantageous when fitting on a GPU, where one can find a speed up equal to the number of models run in parallel (each model being a different replica). -Running in parallel leverages the fact that the only difference between two replicas -is the output data the prediction is compared to. -In order to ensure this is indeed the case it is necessary to also -use the `same_trvl_per_replica` flag in the runcard. +When running in parallel it might be advantageous (e.g., for debugging) +to set the training validation split to be equal for all replicas, +this can be done with the `same_trvl_per_replica: true` runcard flag. In other words, in order to run several replicas in parallel in a machine (be it a big CPU or, most likely, a GPU) @@ -332,7 +331,6 @@ top-level options: .. code-block:: yaml parallel_models: true - same_trvl_per_replica: true And then run ``n3fit`` with a replica range to be parallelized @@ -348,8 +346,6 @@ should run by setting the environment variable ``CUDA_VISIBLE_DEVICES`` to the right index (usually ``0, 1, 2``) or leaving it explicitly empty to avoid running on GPU: ``export CUDA_VISIBLE_DEVICES=""`` -Note that at present it cannot be used together with the ``hyperopt`` module. - .. _otheroptions-label: From 613bfbc651c4439ba6b555e6179e5a7b797dcdcf Mon Sep 17 00:00:00 2001 From: Cmurilochem Date: Tue, 4 Jun 2024 16:44:45 +0200 Subject: [PATCH 30/30] Add 'average_best' and non-default 'penalties_in_loss' in hyperopt_studies runcards --- n3fit/runcards/hyperopt_studies/renew_hyperopt.yml | 1 + .../restricted_search_space_renew_hyperopt.yml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml b/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml index 8c17b6ecb9..30008f22b3 100644 --- a/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml +++ b/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml @@ -163,6 +163,7 @@ kfold: loss_type: chi2 replica_statistic: average_best fold_statistic: average + penalties_in_loss: True penalties: - saturation - patience diff --git a/n3fit/runcards/hyperopt_studies/restricted_search_space_renew_hyperopt.yml b/n3fit/runcards/hyperopt_studies/restricted_search_space_renew_hyperopt.yml index 02b34d7cce..332247100d 100644 --- a/n3fit/runcards/hyperopt_studies/restricted_search_space_renew_hyperopt.yml +++ b/n3fit/runcards/hyperopt_studies/restricted_search_space_renew_hyperopt.yml @@ -129,8 +129,9 @@ hyperscan_config: kfold: loss_type: chi2 - replica_statistic: average + replica_statistic: average_best fold_statistic: average + penalties_in_loss: True penalties: - saturation - patience