From dfcc7dd4c89fda9dcc5f0ae10b8cf234e462b363 Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Mon, 27 May 2024 00:05:07 +0200
Subject: [PATCH 01/30] for x-q2 map for ttb

---
 validphys2/src/validphys/process_options.py | 22 +++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index c895d6373c..9af5d23951 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -10,7 +10,7 @@
 import numpy as np
 from validobj.custom import Parser
 
-TMASS = 173.3
+TMASS = 172.5
 
 
 class _Vars:
@@ -189,6 +189,12 @@ def _hqp_ptq_xq2map(kin_info):
     Q = np.sqrt(QMASS2 + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) + kin_info[_Vars.pT_t]
     return Q / kin_info[_Vars.sqrts], Q * Q
 
+def _hqp_mqq_xq2map(kin_info):
+    # Compute x, Q2
+    QQMASS2 = (2 * TMASS) * (2 * TMASS)
+    Q = np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar]) + kin_info[_Vars.m_ttBar]
+    return Q / kin_info[_Vars.sqrts], Q * Q
+
 
 def _displusjet_xq2map(kin_info):
     """Computes x and q2 mapping for a DIS + J (J) process
@@ -252,25 +258,32 @@ def _dyncpt_xq2map(kin_info):
 
 HQP_YQ = _Process(
     "HQP_YQ",
-    "Normalized differential cross section w.r.t. absolute rapidity of t",
+    "(absolute) rapidity of top quark in top pair production",
     accepted_variables=(_Vars.y_t, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar),
     xq2map_function=_hqp_yq_xq2map,
 )
 
 HQP_YQQ = _Process(
     "HQP_YQQ",
-    "Differential cross section w.r.t. absolute rapidity of ttBar",
+    "(absolute) rapidity of top quark pair in top pair production",
     accepted_variables=(_Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar),
     xq2map_function=_hqp_yqq_xq2map,
 )
 
 HQP_PTQ = _Process(
     "HQP_PTQ",
-    "Normalized double differential cross section w.r.t. absolute rapidity and transverse momentum of t",
+    "Transverse momentum of top quark in top pair production",
     accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.sqrts, _Vars.m_t2),
     xq2map_function=_hqp_ptq_xq2map,
 )
 
+HQP_MQQ = _Process(
+    "HQP_MQQ",
+    "Invariant mass of top quark pair in top pair production",
+    accepted_variables=(_Vars.m_ttBar, _Vars.y_ttBar, _Vars.sqrts, _Vars.m_t2),
+    xq2map_function=_hqp_mqq_xq2map,
+)
+
 
 HERAJET = _Process(
     "HERAJET",
@@ -304,6 +317,7 @@ def _dyncpt_xq2map(kin_info):
     "HQP_YQ": HQP_YQ,
     "HQP_YQQ": HQP_YQQ,
     "HQP_PTQ": HQP_PTQ,
+    "HQP_MQQ": HQP_MQQ,
     "HERAJET": HERAJET,
     "HERADIJET": dataclasses.replace(HERAJET, name="HERADIJET", description="DIS + jj production"),
     "DY_W_ETA": DY_W_ETA,

From 28ded46c3abd56230d6dc7513c0c6261c651fc82 Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Mon, 27 May 2024 00:08:24 +0200
Subject: [PATCH 02/30] BLACK

---
 validphys2/src/validphys/process_options.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 9af5d23951..6f36c6a457 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -24,7 +24,7 @@ class _Vars:
     ystar = "ystar"
     ydiff = "ydiff"
     m_jj = "m_jj"
-    p_T2 = "p_T2" # This one is wrong, should be pT2
+    p_T2 = "p_T2"  # This one is wrong, should be pT2
     y_t = "y_t"
     y_ttBar = "y_ttBar"
     m_t2 = "m_t2"
@@ -38,7 +38,7 @@ class _Vars:
 class _KinematicsInformation:
     """Read the 3 columns dataframe corresponding to the values set in the
     ``kinematic_coverage`` field into a dictionary defining the name of the variables.
-    
+
     Adds the special "sqrts" key unless it is already part of the kinematic coverage.
 
     Provides a ``.get_one_of`` method that accepts any number of variables
@@ -189,10 +189,14 @@ def _hqp_ptq_xq2map(kin_info):
     Q = np.sqrt(QMASS2 + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) + kin_info[_Vars.pT_t]
     return Q / kin_info[_Vars.sqrts], Q * Q
 
+
 def _hqp_mqq_xq2map(kin_info):
     # Compute x, Q2
     QQMASS2 = (2 * TMASS) * (2 * TMASS)
-    Q = np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar]) + kin_info[_Vars.m_ttBar]
+    Q = (
+        np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar])
+        + kin_info[_Vars.m_ttBar]
+    )
     return Q / kin_info[_Vars.sqrts], Q * Q
 
 
@@ -208,12 +212,13 @@ def _displusjet_xq2map(kin_info):
     x = q2 * q2 / s / (pt**2 - q2)
     return x, q2
 
+
 def _dywboson_xq2map(kin_dict):
     """
     Computes x and q2 mapping for pseudo rapidity observables
     originating from a W boson DY process.
     """
-    mass2 = kin_dict[_Vars.m_W2]    
+    mass2 = kin_dict[_Vars.m_W2]
     sqrts = kin_dict[_Vars.sqrts]
     eta = kin_dict[_Vars.eta]
 
@@ -223,6 +228,7 @@ def _dywboson_xq2map(kin_dict):
     x = np.concatenate((x1, x2))
     return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((mass2, mass2))
 
+
 def _dyncpt_xq2map(kin_info):
     """
     Computes x and q2 mapping for DY NC dilepton
@@ -230,7 +236,7 @@ def _dyncpt_xq2map(kin_info):
     """
     q2 = kin_info[_Vars.m_Z2]
     pt = kin_info[_Vars.pT]
-    s = kin_info[_Vars.sqrts]**2
+    s = kin_info[_Vars.sqrts] ** 2
     x = q2 * q2 / s / (pt**2 - q2)
     return x, q2
 

From c2d1d7018aaddcd01977d612711baf9a9662bdce Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Mon, 27 May 2024 23:21:32 +0200
Subject: [PATCH 03/30] implement suggestions

---
 validphys2/src/validphys/process_options.py | 27 ++++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 6f36c6a457..6bfc39d38d 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -10,8 +10,6 @@
 import numpy as np
 from validobj.custom import Parser
 
-TMASS = 172.5
-
 
 class _Vars:
     x = "x"
@@ -162,7 +160,7 @@ def _hqp_yq_xq2map(kin_info):
         kin_info[_Vars.m_t2] = kin_info["k2"]
         kin_info[_Vars.sqrts] = kin_info["k3"]
 
-    mass2 = kin_info.get_one_of(_Vars.m_t2, _Vars.m_ttBar)
+    mass2 = kin_info[_Vars.m_t2]
 
     ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts]
     x1 = ratio * np.exp(kin_info[_Vars.y_t])
@@ -174,7 +172,7 @@ def _hqp_yq_xq2map(kin_info):
 
 def _hqp_yqq_xq2map(kin_info):
     # Compute x, Q2
-    mass2 = kin_info.get_one_of(_Vars.m_t2, _Vars.m_ttBar)
+    mass2 = kin_info[_Vars.m_t2]
     ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts]
     x1 = ratio * np.exp(kin_info[_Vars.y_ttBar])
     x2 = ratio * np.exp(-kin_info[_Vars.y_ttBar])
@@ -185,14 +183,14 @@ def _hqp_yqq_xq2map(kin_info):
 
 def _hqp_ptq_xq2map(kin_info):
     # Compute x, Q2
-    QMASS2 = TMASS * TMASS
+    QMASS2 = kin_info[_Vars.m_t2]
     Q = np.sqrt(QMASS2 + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) + kin_info[_Vars.pT_t]
     return Q / kin_info[_Vars.sqrts], Q * Q
 
 
 def _hqp_mqq_xq2map(kin_info):
     # Compute x, Q2
-    QQMASS2 = (2 * TMASS) * (2 * TMASS)
+    QQMASS2 = 4 * kin_info[_Vars.m_t2]
     Q = (
         np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar])
         + kin_info[_Vars.m_ttBar]
@@ -200,6 +198,16 @@ def _hqp_mqq_xq2map(kin_info):
     return Q / kin_info[_Vars.sqrts], Q * Q
 
 
+def _inc_xq2map(kin_info):
+    # Compute x, Q2
+    if {"k1", "k2", "k3"} <= kin_info.keys():
+        kin_info[_Vars.m_X2] = kin_info["k2"]
+        kin_info[_Vars.sqrts] = kin_info["k3"]
+
+    mass2 = kin_info.get_one_of(m_W2, m_Z2, m_t2, m_X2)
+    return np.sqrt(mass2) / kin_info[_Vars.sqrts], mass2
+
+
 def _displusjet_xq2map(kin_info):
     """Computes x and q2 mapping for a DIS + J (J) process
     Uses Q2 as provided by the dictionary of kinematics variables
@@ -290,6 +298,12 @@ def _dyncpt_xq2map(kin_info):
     xq2map_function=_hqp_mqq_xq2map,
 )
 
+INC = _Process(
+    "INC",
+    "Inclusive cross section",
+    accepted_variables=(_Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2, _Vars.m_X2),
+    xq2map_function=_inc_xq2map,
+)
 
 HERAJET = _Process(
     "HERAJET",
@@ -324,6 +338,7 @@ def _dyncpt_xq2map(kin_info):
     "HQP_YQQ": HQP_YQQ,
     "HQP_PTQ": HQP_PTQ,
     "HQP_MQQ": HQP_MQQ,
+    "INC": INC,
     "HERAJET": HERAJET,
     "HERADIJET": dataclasses.replace(HERAJET, name="HERADIJET", description="DIS + jj production"),
     "DY_W_ETA": DY_W_ETA,

From d9c67e0b4de180510e088b40abfdf26aa51f9427 Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Tue, 28 May 2024 20:17:47 +0200
Subject: [PATCH 04/30] implement suggestions v2

---
 validphys2/src/validphys/process_options.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 6bfc39d38d..313a41c508 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -201,10 +201,11 @@ def _hqp_mqq_xq2map(kin_info):
 def _inc_xq2map(kin_info):
     # Compute x, Q2
     if {"k1", "k2", "k3"} <= kin_info.keys():
-        kin_info[_Vars.m_X2] = kin_info["k2"]
+        mass2 = kin_info["k2"]
         kin_info[_Vars.sqrts] = kin_info["k3"]
+    else:
+        mass2 = kin_info.get_one_of(m_W2, m_Z2, m_t2)
 
-    mass2 = kin_info.get_one_of(m_W2, m_Z2, m_t2, m_X2)
     return np.sqrt(mass2) / kin_info[_Vars.sqrts], mass2
 
 

From c39e015a03b8b2ffe37a1366da97816577c8470e Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Tue, 28 May 2024 20:26:05 +0200
Subject: [PATCH 05/30] fix error in cad1200

---
 validphys2/src/validphys/process_options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 313a41c508..88803bae9f 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -302,7 +302,7 @@ def _dyncpt_xq2map(kin_info):
 INC = _Process(
     "INC",
     "Inclusive cross section",
-    accepted_variables=(_Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2, _Vars.m_X2),
+    accepted_variables=(_Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2),
     xq2map_function=_inc_xq2map,
 )
 

From 501a19ac9c062c1209065b94d38afa31b1992ed3 Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Tue, 28 May 2024 20:49:21 +0200
Subject: [PATCH 06/30] take into account the zero var used in inc datasets

---
 validphys2/src/validphys/process_options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 88803bae9f..f89ecab0f1 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -302,7 +302,7 @@ def _dyncpt_xq2map(kin_info):
 INC = _Process(
     "INC",
     "Inclusive cross section",
-    accepted_variables=(_Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2),
+    accepted_variables=(_Vars.zero, _Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2),
     xq2map_function=_inc_xq2map,
 )
 

From 7d720f93bcc4071de253ee312f21efae86219da8 Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Tue, 28 May 2024 20:58:41 +0200
Subject: [PATCH 07/30] atry to ccount for zero without using _Vars

---
 validphys2/src/validphys/process_options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index f89ecab0f1..d5d42621f1 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -302,7 +302,7 @@ def _dyncpt_xq2map(kin_info):
 INC = _Process(
     "INC",
     "Inclusive cross section",
-    accepted_variables=(_Vars.zero, _Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2),
+    accepted_variables=("zero", _Vars.sqrts, _Vars.m_W2, _Vars.m_Z2, _Vars.m_t2),
     xq2map_function=_inc_xq2map,
 )
 

From 1e1bec273e100d4aa0838da2b40c1113b52c9c4e Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Wed, 29 May 2024 14:07:20 +0200
Subject: [PATCH 08/30] drop support for k1,k2,k3 in rzpidity distributions

---
 validphys2/src/validphys/process_options.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index d5d42621f1..53a1ed897b 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -154,11 +154,6 @@ def _dijets_xq2map(kin_info):
 
 
 def _hqp_yq_xq2map(kin_info):
-    # Compute x, Q2
-    if {"k1", "k2", "k3"} <= kin_info.keys():
-        kin_info[_Vars.y_t] = kin_info["k1"]
-        kin_info[_Vars.m_t2] = kin_info["k2"]
-        kin_info[_Vars.sqrts] = kin_info["k3"]
 
     mass2 = kin_info[_Vars.m_t2]
 
@@ -199,12 +194,12 @@ def _hqp_mqq_xq2map(kin_info):
 
 
 def _inc_xq2map(kin_info):
-    # Compute x, Q2
+    Compute x, Q2
     if {"k1", "k2", "k3"} <= kin_info.keys():
         mass2 = kin_info["k2"]
         kin_info[_Vars.sqrts] = kin_info["k3"]
     else:
-        mass2 = kin_info.get_one_of(m_W2, m_Z2, m_t2)
+        mass2 = kin_info.get_one_of("m_W2", "m_Z2", "m_t2")
 
     return np.sqrt(mass2) / kin_info[_Vars.sqrts], mass2
 

From 91027559e20efb6e9ca0dfd0fabc4eb5780dbdaf Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Wed, 29 May 2024 14:27:18 +0200
Subject: [PATCH 09/30] fix ttb process options

---
 validphys2/src/validphys/process_options.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 53a1ed897b..7e5647d874 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -154,47 +154,43 @@ def _dijets_xq2map(kin_info):
 
 
 def _hqp_yq_xq2map(kin_info):
-
+    # Compute x, Q2
+    # Theory predictions computed with HT/4 see 1906.06535
     mass2 = kin_info[_Vars.m_t2]
-
     ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts]
     x1 = ratio * np.exp(kin_info[_Vars.y_t])
     x2 = ratio * np.exp(-kin_info[_Vars.y_t])
     q2 = mass2
     x = np.concatenate((x1, x2))
-    return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2))
+    return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) / 4
 
 
 def _hqp_yqq_xq2map(kin_info):
     # Compute x, Q2
+    # Theory predictions computed with HT/4 see 1906.06535
     mass2 = kin_info[_Vars.m_t2]
     ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts]
     x1 = ratio * np.exp(kin_info[_Vars.y_ttBar])
     x2 = ratio * np.exp(-kin_info[_Vars.y_ttBar])
     q2 = kin_info[_Vars.m_t2]
     x = np.concatenate((x1, x2))
-    return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2))
+    return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) / 4
 
 
 def _hqp_ptq_xq2map(kin_info):
     # Compute x, Q2
-    QMASS2 = kin_info[_Vars.m_t2]
-    Q = np.sqrt(QMASS2 + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) + kin_info[_Vars.pT_t]
+    Q = (kin_info[_Vars.m_t2] + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) ** 0.5 / 2
     return Q / kin_info[_Vars.sqrts], Q * Q
 
 
 def _hqp_mqq_xq2map(kin_info):
     # Compute x, Q2
-    QQMASS2 = 4 * kin_info[_Vars.m_t2]
-    Q = (
-        np.sqrt(QQMASS2 + kin_info[_Vars.m_ttBar] * kin_info[_Vars.m_ttBar])
-        + kin_info[_Vars.m_ttBar]
-    )
+    Q = kin_info[_Vars.m_ttBar] / 4
     return Q / kin_info[_Vars.sqrts], Q * Q
 
 
 def _inc_xq2map(kin_info):
-    Compute x, Q2
+    # Compute x, Q2
     if {"k1", "k2", "k3"} <= kin_info.keys():
         mass2 = kin_info["k2"]
         kin_info[_Vars.sqrts] = kin_info["k3"]

From dd2e122eab0246d2e512f7720c7ed09bf160722c Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Wed, 29 May 2024 14:35:56 +0200
Subject: [PATCH 10/30] sqrts -> m_t2 in kin.cov. for 2d dists

---
 .../new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml | 4 ++--
 .../new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml     | 4 ++--
 .../new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml      | 6 +++---
 validphys2/src/validphys/process_options.py                 | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/nnpdf_data/nnpdf_data/new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml
index 2e0c26371a..512c6e0dd2 100644
--- a/nnpdf_data/nnpdf_data/new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml
+++ b/nnpdf_data/nnpdf_data/new_commondata/ATLAS_TTBAR_13TEV_HADR_DIF/metadata.yaml
@@ -138,7 +138,7 @@ implemented_observables:
     data_central: data_d2Sig_dmttBar_dyttBar.yaml
     data_uncertainties:
       - uncertainties_d2Sig_dmttBar_dyttBar.yaml
-    kinematic_coverage: [y_ttBar, m_ttBar, sqrts]
+    kinematic_coverage: [y_ttBar, m_ttBar, m_t2]
     plotting:
       dataset_label: 'ATLAS 13 TeV top quark pair in hadronic channel: $\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$'
       kinematics_override: identity
@@ -167,7 +167,7 @@ implemented_observables:
     data_central: data_d2Sig_dmttBar_dyttBar_norm.yaml
     data_uncertainties:
       - uncertainties_d2Sig_dmttBar_dyttBar_norm.yaml
-    kinematic_coverage: [y_ttBar, m_ttBar, sqrts]
+    kinematic_coverage: [y_ttBar, m_ttBar, m_t2]
     plotting:
       dataset_label: 'ATLAS 13 TeV top quark pair in hadronic channel: $\frac{1}{\sigma}\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$'
       kinematics_override: identity
diff --git a/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml
index 8ccf38a40c..473890ebd9 100644
--- a/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml
+++ b/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_13TEV_LJ_DIF/metadata.yaml
@@ -140,7 +140,7 @@ implemented_observables:
     data_central: data_d2Sig_dyttBar_dmttBar.yaml
     data_uncertainties:
       - uncertainties_d2Sig_dyttBar_dmttBar.yaml
-    kinematic_coverage: [y_ttBar, m_ttBar, sqrts]
+    kinematic_coverage: [y_ttBar, m_ttBar, m_t2]
     plotting:
       dataset_label: 'CMS 13 TeV top quark pair l+j channel: $\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$'
       kinematics_override: identity
@@ -170,7 +170,7 @@ implemented_observables:
     data_central: data_d2Sig_dyttBar_dmttBar_norm.yaml
     data_uncertainties:
       - uncertainties_d2Sig_dyttBar_dmttBar_norm.yaml
-    kinematic_coverage: [y_ttBar, m_ttBar, sqrts]
+    kinematic_coverage: [y_ttBar, m_ttBar, m_t2]
     plotting:
       dataset_label: 'CMS 13 TeV top quark pair l+j channel: $\frac{1}{\sigma}\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$'
       kinematics_override: identity
diff --git a/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml
index 8ccc833e2c..165913f197 100644
--- a/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml
+++ b/nnpdf_data/nnpdf_data/new_commondata/CMS_TTBAR_8TEV_2L_DIF/metadata.yaml
@@ -47,7 +47,7 @@ implemented_observables:
   kinematic_coverage:
   - pT_t
   - y_t
-  - sqrts
+  - m_t2
   plotting:
     dataset_label: 'CMS TTB 8 TeV: $\frac{1}{\sigma}\frac{d^2\sigma}{d|y_{t}|dpT_{t}}$'
     kinematics_override: identity
@@ -95,7 +95,7 @@ implemented_observables:
   kinematic_coverage:
   - y_t
   - m_ttBar
-  - sqrts
+  - m_t2
   plotting:
     dataset_label: 'CMS TTB 8 TeV: $\frac{1}{\sigma}\frac{d^2\sigma}{d|y_{t}|dm_{t\bar{t}}}$'
     kinematics_override: identity
@@ -154,7 +154,7 @@ implemented_observables:
   kinematic_coverage:
   - y_ttBar
   - m_ttBar
-  - sqrts
+  - m_t2
   plotting:
     dataset_label: 'CMS TTB 8 TeV: $\frac{1}{\sigma}\frac{d^2\sigma}{dm_{t\bar{t}}d|y_{t\bar{t}}|}$'
     kinematics_override: identity
diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 7e5647d874..a0f523bc04 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -286,7 +286,7 @@ def _dyncpt_xq2map(kin_info):
 HQP_MQQ = _Process(
     "HQP_MQQ",
     "Invariant mass of top quark pair in top pair production",
-    accepted_variables=(_Vars.m_ttBar, _Vars.y_ttBar, _Vars.sqrts, _Vars.m_t2),
+    accepted_variables=(_Vars.m_ttBar, _Vars.y_t, _Vars.y_ttBar, _Vars.sqrts, _Vars.m_t2),
     xq2map_function=_hqp_mqq_xq2map,
 )
 

From c068eaee02b39db64845bdb86544154ed253b075 Mon Sep 17 00:00:00 2001
From: Tanishq Sharma <99052355+t7phy@users.noreply.github.com>
Date: Wed, 29 May 2024 16:36:44 +0200
Subject: [PATCH 11/30] Update validphys2/src/validphys/process_options.py

Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu>
---
 validphys2/src/validphys/process_options.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index a0f523bc04..4d1aed2a5e 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -193,7 +193,6 @@ def _inc_xq2map(kin_info):
     # Compute x, Q2
     if {"k1", "k2", "k3"} <= kin_info.keys():
         mass2 = kin_info["k2"]
-        kin_info[_Vars.sqrts] = kin_info["k3"]
     else:
         mass2 = kin_info.get_one_of("m_W2", "m_Z2", "m_t2")
 

From 5eca1d00d3a3b328c574029603548ec0ed3ab85a Mon Sep 17 00:00:00 2001
From: Tanishq Sharma <99052355+t7phy@users.noreply.github.com>
Date: Thu, 30 May 2024 12:30:09 +0200
Subject: [PATCH 12/30] Update validphys2/src/validphys/process_options.py

Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu>
---
 validphys2/src/validphys/process_options.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 4d1aed2a5e..1eca6e6179 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -191,10 +191,8 @@ def _hqp_mqq_xq2map(kin_info):
 
 def _inc_xq2map(kin_info):
     # Compute x, Q2
-    if {"k1", "k2", "k3"} <= kin_info.keys():
-        mass2 = kin_info["k2"]
-    else:
-        mass2 = kin_info.get_one_of("m_W2", "m_Z2", "m_t2")
+    # k2 necessary to take the mass for DY inclusive cross sections still not migrated
+    mass2 = kin_info.get_one_of(_Vars.m_W2, _Vars.m_Z2, _Vars.m_t2, "k2")
 
     return np.sqrt(mass2) / kin_info[_Vars.sqrts], mass2
 

From 594c90716381057324c92533acb0b40e1c5a38eb Mon Sep 17 00:00:00 2001
From: Tanishq Sharma <99052355+t7phy@users.noreply.github.com>
Date: Sun, 2 Jun 2024 12:42:39 +0200
Subject: [PATCH 13/30] Update validphys2/src/validphys/process_options.py

Co-authored-by: Juan M. Cruz-Martinez <juacrumar@lairen.eu>
---
 validphys2/src/validphys/process_options.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 1eca6e6179..2e0ec2da4a 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -155,7 +155,8 @@ def _dijets_xq2map(kin_info):
 
 def _hqp_yq_xq2map(kin_info):
     # Compute x, Q2
-    # Theory predictions computed with HT/4 see 1906.06535
+    # Theory predictions computed with HT/4 ~ mt/2 for rapidity distr.
+    # see section 3 from 1906.06535
     mass2 = kin_info[_Vars.m_t2]
     ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts]
     x1 = ratio * np.exp(kin_info[_Vars.y_t])

From c85c2969d145445c758ca048a8b164b0b30b68c4 Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Mon, 3 Jun 2024 21:39:34 +0200
Subject: [PATCH 14/30] impl suggestions

---
 validphys2/src/validphys/process_options.py | 34 +++++++--------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index 2e0ec2da4a..deb7ef87ec 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -155,24 +155,14 @@ def _dijets_xq2map(kin_info):
 
 def _hqp_yq_xq2map(kin_info):
     # Compute x, Q2
+    #
     # Theory predictions computed with HT/4 ~ mt/2 for rapidity distr.
     # see section 3 from 1906.06535
-    mass2 = kin_info[_Vars.m_t2]
+    # HT defined in Eqn. (1) of 1611.08609
+    rapidity = kin_info.get_one_of(_Vars.y_t, _Vars.y_ttBar)
     ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts]
-    x1 = ratio * np.exp(kin_info[_Vars.y_t])
-    x2 = ratio * np.exp(-kin_info[_Vars.y_t])
-    q2 = mass2
-    x = np.concatenate((x1, x2))
-    return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) / 4
-
-
-def _hqp_yqq_xq2map(kin_info):
-    # Compute x, Q2
-    # Theory predictions computed with HT/4 see 1906.06535
-    mass2 = kin_info[_Vars.m_t2]
-    ratio = np.sqrt(mass2) / kin_info[_Vars.sqrts]
-    x1 = ratio * np.exp(kin_info[_Vars.y_ttBar])
-    x2 = ratio * np.exp(-kin_info[_Vars.y_ttBar])
+    x1 = ratio * np.exp(rapidity)
+    x2 = ratio * np.exp(-rapidity)
     q2 = kin_info[_Vars.m_t2]
     x = np.concatenate((x1, x2))
     return np.clip(x, a_min=None, a_max=1, out=x), np.concatenate((q2, q2)) / 4
@@ -180,12 +170,17 @@ def _hqp_yqq_xq2map(kin_info):
 
 def _hqp_ptq_xq2map(kin_info):
     # Compute x, Q2
+    #
+    # At LO pt ~ ptb
+    # ht = 2.*sqrt(m_t2 + pT_t2)
     Q = (kin_info[_Vars.m_t2] + kin_info[_Vars.pT_t] * kin_info[_Vars.pT_t]) ** 0.5 / 2
     return Q / kin_info[_Vars.sqrts], Q * Q
 
 
 def _hqp_mqq_xq2map(kin_info):
     # Compute x, Q2
+    #
+    # Theory predictions computed with HT/4 ~ m_ttbar/4 
     Q = kin_info[_Vars.m_ttBar] / 4
     return Q / kin_info[_Vars.sqrts], Q * Q
 
@@ -267,13 +262,6 @@ def _dyncpt_xq2map(kin_info):
     xq2map_function=_hqp_yq_xq2map,
 )
 
-HQP_YQQ = _Process(
-    "HQP_YQQ",
-    "(absolute) rapidity of top quark pair in top pair production",
-    accepted_variables=(_Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar),
-    xq2map_function=_hqp_yqq_xq2map,
-)
-
 HQP_PTQ = _Process(
     "HQP_PTQ",
     "Transverse momentum of top quark in top pair production",
@@ -325,7 +313,7 @@ def _dyncpt_xq2map(kin_info):
     "JET": JET,
     "DIJET": DIJET,
     "HQP_YQ": HQP_YQ,
-    "HQP_YQQ": HQP_YQQ,
+    "HQP_YQQ": dataclasses.replace(HQP_YQ, name="HQP_YQQ"),
     "HQP_PTQ": HQP_PTQ,
     "HQP_MQQ": HQP_MQQ,
     "INC": INC,

From 7797d0fa4da90590c6d5af2c785ae87e243f588d Mon Sep 17 00:00:00 2001
From: t7phy <t7phy.in@gmail.com>
Date: Mon, 3 Jun 2024 21:43:13 +0200
Subject: [PATCH 15/30] accepted vars modified

---
 validphys2/src/validphys/process_options.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
index deb7ef87ec..a06ec15baa 100644
--- a/validphys2/src/validphys/process_options.py
+++ b/validphys2/src/validphys/process_options.py
@@ -258,14 +258,14 @@ def _dyncpt_xq2map(kin_info):
 HQP_YQ = _Process(
     "HQP_YQ",
     "(absolute) rapidity of top quark in top pair production",
-    accepted_variables=(_Vars.y_t, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar),
+    accepted_variables=(_Vars.y_t, _Vars.y_ttBar, _Vars.m_t2, _Vars.sqrts, _Vars.m_ttBar, _Vars.pT_t),
     xq2map_function=_hqp_yq_xq2map,
 )
 
 HQP_PTQ = _Process(
     "HQP_PTQ",
     "Transverse momentum of top quark in top pair production",
-    accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.sqrts, _Vars.m_t2),
+    accepted_variables=(_Vars.pT_t, _Vars.y_t, _Vars.y_ttBar, _Vars.sqrts, _Vars.m_t2),
     xq2map_function=_hqp_ptq_xq2map,
 )
 

From 7505b5f4181f7114a810311e94980dbc4b320af0 Mon Sep 17 00:00:00 2001
From: "Juan M. Cruz-Martinez" <juacrumar@lairen.eu>
Date: Tue, 4 Jun 2024 09:37:01 +0200
Subject: [PATCH 16/30] remove special case from tests

---
 validphys2/src/validphys/tests/test_datafiles.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/validphys2/src/validphys/tests/test_datafiles.py b/validphys2/src/validphys/tests/test_datafiles.py
index 05b3246a89..37bebeaa20 100644
--- a/validphys2/src/validphys/tests/test_datafiles.py
+++ b/validphys2/src/validphys/tests/test_datafiles.py
@@ -49,10 +49,7 @@ def test_all_datasets(dataset_name):
     # and if so, check that the process_type is not simply a string
     kin_override = main_cd.metadata.plotting.kinematics_override
     if isinstance(kin_override, kintransform_identity) and isinstance(process_type, str):
-        # Skip for the time being the processes for which there is no implementation but have been
-        # merged to master: issue #1991
-        if process_type not in ("HQP_MQQ", "INC"):
-            raise NotImplementedError(f"The {process_type=} is not implemented in process_options")
+        raise NotImplementedError(f"The {process_type=} is not implemented in process_options")
 
     elif not isinstance(process_type, str):
         if not process_type.are_accepted_variables(kin_cov):

From ea77eaa1490e1a4fe46d7669996b4c9b40a54a27 Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Wed, 20 Mar 2024 11:12:12 +0100
Subject: [PATCH 17/30] Change default replica statistic from average to
 average over 90% best replicas

---
 n3fit/src/n3fit/hyper_optimization/rewards.py | 42 ++++++++++++++++---
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
index b51cf7f629..8494874740 100644
--- a/n3fit/src/n3fit/hyper_optimization/rewards.py
+++ b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -44,6 +44,28 @@
 log = logging.getLogger(__name__)
 
 
+def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = 0) -> float:
+    """
+    Compute the average of the input array along the specified axis, among the best `percentage`
+    of replicas.
+
+    Parameters
+    ----------
+        fold_losses: np.ndarray
+            Input array.
+        float: The percentage of best replicas to take into account.
+        axis: int, optional
+            Axis along which the mean is computed. Default is 0.
+
+    Returns
+    -------
+        float: The average along the specified axis.
+    """
+    sorted_losses = np.sort(fold_losses, axis=axis)
+    best_losses = sorted_losses[: int(percentage * len(sorted_losses))]
+    return np.average(best_losses, axis=axis).item()
+
+
 def _average(fold_losses: np.ndarray, axis: int = 0) -> float:
     """
     Compute the average of the input array along the specified axis.
@@ -98,7 +120,12 @@ def _std(fold_losses: np.ndarray, axis: int = 0) -> float:
     return np.std(fold_losses, axis=axis).item()
 
 
-IMPLEMENTED_STATS = {"average": _average, "best_worst": _best_worst, "std": _std}
+IMPLEMENTED_STATS = {
+    "average": _average,
+    "average_best": _average_best,
+    "best_worst": _best_worst,
+    "std": _std,
+}
 IMPLEMENTED_LOSSES = ["chi2", "phi2"]
 
 
@@ -130,12 +157,15 @@ class HyperLoss:
     def __init__(
         self, loss_type: str = None, replica_statistic: str = None, fold_statistic: str = None
     ):
-        self._default_statistic = "average"
         self._default_loss = "chi2"
 
         self.loss_type = self._parse_loss(loss_type)
-        self.reduce_over_replicas = self._parse_statistic(replica_statistic, "replica_statistic")
-        self.reduce_over_folds = self._parse_statistic(fold_statistic, "fold_statistic")
+        self.reduce_over_replicas = self._parse_statistic(
+            replica_statistic, "replica_statistic", default="average_best"
+        )
+        self.reduce_over_folds = self._parse_statistic(
+            fold_statistic, "fold_statistic", default="average"
+        )
 
         self.phi_vector = []
         self.chi2_matrix = []
@@ -280,7 +310,7 @@ def _parse_loss(self, loss_type: str) -> str:
 
         return loss_type
 
-    def _parse_statistic(self, statistic: str, name: str) -> Callable:
+    def _parse_statistic(self, statistic: str, name: str, default: str) -> Callable:
         """
         Parse the statistic and return the default if None.
 
@@ -304,7 +334,7 @@ def _parse_statistic(self, statistic: str, name: str) -> Callable:
             For loss type equal to phi2, the applied fold statistics is always the reciprocal of the selected stats.
         """
         if statistic is None:
-            statistic = self._default_statistic
+            statistic = default
             log.warning(f"No {name} selected in HyperLoss, defaulting to {statistic}")
         else:
             if statistic not in IMPLEMENTED_STATS:

From 761b5f1f39065225a4bb9d8828130951f861904c Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Tue, 2 Apr 2024 11:47:21 +0200
Subject: [PATCH 18/30] Round up rather than down for number of best replicas

---
 n3fit/src/n3fit/hyper_optimization/rewards.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
index 8494874740..e820cd5993 100644
--- a/n3fit/src/n3fit/hyper_optimization/rewards.py
+++ b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -62,7 +62,8 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int =
         float: The average along the specified axis.
     """
     sorted_losses = np.sort(fold_losses, axis=axis)
-    best_losses = sorted_losses[: int(percentage * len(sorted_losses))]
+    num_best = int(np.ceil(percentage * len(sorted_losses)))
+    best_losses = sorted_losses[:num_best]
     return np.average(best_losses, axis=axis).item()
 
 

From 666b95e355d51f86d587d69336be9d40d50ae14a Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Tue, 2 Apr 2024 11:48:03 +0200
Subject: [PATCH 19/30] Make sure seed is int

---
 n3fit/src/n3fit/backends/keras_backend/multi_dense.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
index df579cc846..6d789de90e 100644
--- a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
+++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
@@ -187,7 +187,7 @@ def __call__(self, shape, dtype=None, **kwargs):
         per_replica_weights = []
         for replica_seed in self.replica_seeds:
             if "seed" in self.initializer_config:
-                self.initializer_config["seed"] = self.base_seed + replica_seed
+                self.initializer_config["seed"] = int(self.base_seed + replica_seed)
             single_initializer = self.initializer_class.from_config(self.initializer_config)
 
             per_replica_weights.append(single_initializer(shape, dtype, **kwargs))

From 8ba460fabd1e342e6df39f62b2ec48d16f4f8625 Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Tue, 2 Apr 2024 13:01:02 +0200
Subject: [PATCH 20/30] Add warnings on replicas not passing

---
 n3fit/src/n3fit/model_trainer.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index 0074a5692f..884733787a 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -744,15 +744,23 @@ def _train_and_fit(self, training_model, stopping_object, epochs=100) -> bool:
             update_freq=PUSH_INTEGRABILITY_EACH,
         )
 
-        training_model.perform_fit(
+        loss_dict = training_model.perform_fit(
             epochs=epochs,
             verbose=False,
             callbacks=self.callbacks + [callback_st, callback_pos, callback_integ],
         )
+        training_losses = loss_dict['loss']
+
+        if np.isnan(training_losses).any():
+            log.warning(f"{np.isnan(training_losses).sum()} replicas have NaN losses")
+
+        passed_replicas = [bool(i) for i in stopping_object.e_best_chi2]
+        if not all(passed_replicas):
+            log.warning(f"{len(passed_replicas) - sum(passed_replicas)} replicas have not passed")
 
         # TODO: in order to use multireplica in hyperopt is is necessary to define what "passing" means
         # for now consider the run as good if any replica passed
-        fit_has_passed = any(bool(i) for i in stopping_object.e_best_chi2)
+        fit_has_passed = any(passed_replicas)
         return fit_has_passed
 
     def _hyperopt_override(self, params):

From 029f869216a433d02b4bcc5c03ed1f81c7c65988 Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Wed, 3 Apr 2024 12:24:10 +0200
Subject: [PATCH 21/30] Add selection based on average best loss

---
 n3fit/src/n3fit/hyper_optimization/rewards.py | 10 ++++--
 n3fit/src/n3fit/model_trainer.py              | 32 ++++++++-----------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
index e820cd5993..fd9a0d965f 100644
--- a/n3fit/src/n3fit/hyper_optimization/rewards.py
+++ b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -52,8 +52,9 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int =
     Parameters
     ----------
         fold_losses: np.ndarray
-            Input array.
-        float: The percentage of best replicas to take into account.
+            Per replica losses for a single fold.
+        percentage: float
+            The percentage of best replicas to take into account (rounded up).
         axis: int, optional
             Axis along which the mean is computed. Default is 0.
 
@@ -61,8 +62,11 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int =
     -------
         float: The average along the specified axis.
     """
-    sorted_losses = np.sort(fold_losses, axis=axis)
     num_best = int(np.ceil(percentage * len(sorted_losses)))
+
+    if np.isnan(fold_losses).any():
+        log.warning(f"{np.isnan(fold_losses).sum()} replicas have NaNs losses")
+    sorted_losses = np.sort(fold_losses, axis=axis)
     best_losses = sorted_losses[:num_best]
     return np.average(best_losses, axis=axis).item()
 
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index 884733787a..65b148f0a8 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -744,24 +744,11 @@ def _train_and_fit(self, training_model, stopping_object, epochs=100) -> bool:
             update_freq=PUSH_INTEGRABILITY_EACH,
         )
 
-        loss_dict = training_model.perform_fit(
+        training_model.perform_fit(
             epochs=epochs,
             verbose=False,
             callbacks=self.callbacks + [callback_st, callback_pos, callback_integ],
         )
-        training_losses = loss_dict['loss']
-
-        if np.isnan(training_losses).any():
-            log.warning(f"{np.isnan(training_losses).sum()} replicas have NaN losses")
-
-        passed_replicas = [bool(i) for i in stopping_object.e_best_chi2]
-        if not all(passed_replicas):
-            log.warning(f"{len(passed_replicas) - sum(passed_replicas)} replicas have not passed")
-
-        # TODO: in order to use multireplica in hyperopt is is necessary to define what "passing" means
-        # for now consider the run as good if any replica passed
-        fit_has_passed = any(passed_replicas)
-        return fit_has_passed
 
     def _hyperopt_override(self, params):
         """Unrolls complicated hyperopt structures into very simple dictionaries"""
@@ -992,13 +979,9 @@ def hyperparametrizable(self, params):
             for model in models.values():
                 model.compile(**params["optimizer"])
 
-            passed = self._train_and_fit(models["training"], stopping_object, epochs=epochs)
+            self._train_and_fit(models["training"], stopping_object, epochs=epochs)
 
             if self.mode_hyperopt:
-                if not passed:
-                    log.info("Hyperparameter combination fail to find a good fit, breaking")
-                    break
-
                 validation_loss = stopping_object.vl_chi2
 
                 # number of active points in this fold
@@ -1018,6 +1001,15 @@ def hyperparametrizable(self, params):
                     for penalty in self.hyper_penalties
                 }
 
+                hyper_loss_per_replica = experimental_loss + sum(penalties.values())
+                fold_loss = self._hyper_loss.reduce_over_replicas(hyper_loss_per_replica)
+                passed = fold_loss < self.hyper_threshold
+                if not passed:
+                    log.info(
+                        f"Hyperparameter combination failed to find a good fit (loss={fold_loss} > {self.hyper_threshold})"
+                    )
+                    break
+
                 # Extracting the necessary data to compute phi
                 # First, create a list of `validphys.core.DataGroupSpec`
                 # containing only exp datasets within the held out fold
@@ -1105,5 +1097,7 @@ def hyperparametrizable(self, params):
         # In a normal run, the only information we need to output is the stopping object
         # (which contains metadata about the stopping)
         # and the pdf model (which are used to generate the PDF grids and compute arclengths)
+        if not self.mode_hyperopt:
+            passed = any(bool(i) for i in stopping_object.e_best_chi2)
         dict_out = {"status": passed, "stopping_object": stopping_object, "pdf_model": pdf_model}
         return dict_out

From abefcea9a3a6f6eca6e3b0c1e22c3cf6765befe7 Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Wed, 3 Apr 2024 13:06:54 +0200
Subject: [PATCH 22/30] bugfix

---
 n3fit/src/n3fit/hyper_optimization/rewards.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
index fd9a0d965f..ae49a4559c 100644
--- a/n3fit/src/n3fit/hyper_optimization/rewards.py
+++ b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -62,7 +62,7 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int =
     -------
         float: The average along the specified axis.
     """
-    num_best = int(np.ceil(percentage * len(sorted_losses)))
+    num_best = int(np.ceil(percentage * len(fold_losses)))
 
     if np.isnan(fold_losses).any():
         log.warning(f"{np.isnan(fold_losses).sum()} replicas have NaNs losses")

From cbce54ad588969904c806d8f603ab5c85b4345d2 Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Wed, 3 Apr 2024 13:57:07 +0200
Subject: [PATCH 23/30] increase hyper threshold in quickcard

---
 n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml b/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml
index f3387ef211..fe53ccf44b 100644
--- a/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml
+++ b/n3fit/src/n3fit/tests/regressions/hyper-quickcard.yml
@@ -62,7 +62,7 @@ kfold:
       - saturation
       - patience
       - integrability
-    threshold: 2.0
+    threshold: 1e3
     partitions:
         - datasets:
             - NMC_NC_NOTFIXED_P_EM-SIGMARED

From 23a765401c64487b8ceba21eb52cda6a371e2410 Mon Sep 17 00:00:00 2001
From: Aron <aronpjansen@gmail.com>
Date: Tue, 23 Apr 2024 09:16:52 +0200
Subject: [PATCH 24/30] Use average_best replica statistic in hyperopt runcard

---
 n3fit/runcards/hyperopt_studies/renew_hyperopt.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml b/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml
index f4c5487704..8c17b6ecb9 100644
--- a/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml
+++ b/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml
@@ -161,7 +161,7 @@ hyperscan_config:
 
 kfold:
       loss_type: chi2
-      replica_statistic: average
+      replica_statistic: average_best
       fold_statistic: average
       penalties:
         - saturation

From 23df9b5bea50101b685697f430e0bd73abd4b64b Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Thu, 30 May 2024 11:04:18 +0200
Subject: [PATCH 25/30] apply own review comments: remove double check of hyper
 loss; percentage -> proportion; update docstr

---
 n3fit/src/n3fit/hyper_optimization/rewards.py | 40 ++++++++++---------
 n3fit/src/n3fit/model_trainer.py              | 15 ++-----
 2 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
index ae49a4559c..50ceabaa85 100644
--- a/n3fit/src/n3fit/hyper_optimization/rewards.py
+++ b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -44,17 +44,17 @@
 log = logging.getLogger(__name__)
 
 
-def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int = 0) -> float:
+def _average_best(fold_losses: np.ndarray, proportion: float = 0.9, axis: int = 0) -> float:
     """
-    Compute the average of the input array along the specified axis, among the best `percentage`
+    Compute the average of the input array along the specified axis, among the best `proportion`
     of replicas.
 
     Parameters
     ----------
         fold_losses: np.ndarray
             Per replica losses for a single fold.
-        percentage: float
-            The percentage of best replicas to take into account (rounded up).
+        proportion: float
+            The proportion of best replicas to take into account (rounded up).
         axis: int, optional
             Axis along which the mean is computed. Default is 0.
 
@@ -62,13 +62,14 @@ def _average_best(fold_losses: np.ndarray, percentage: float = 0.9, axis: int =
     -------
         float: The average along the specified axis.
     """
-    num_best = int(np.ceil(percentage * len(fold_losses)))
+    # TODO: use directly `validphys.fitveto.determine_vetoes`
+    num_best = int(np.ceil(proportion * len(fold_losses)))
 
     if np.isnan(fold_losses).any():
         log.warning(f"{np.isnan(fold_losses).sum()} replicas have NaNs losses")
     sorted_losses = np.sort(fold_losses, axis=axis)
     best_losses = sorted_losses[:num_best]
-    return np.average(best_losses, axis=axis).item()
+    return _average(best_losses, axis=axis)
 
 
 def _average(fold_losses: np.ndarray, axis: int = 0) -> float:
@@ -146,6 +147,13 @@ class HyperLoss:
     Computes the statistic over the replicas and then over the folds, both
     statistics default to the average.
 
+    The ``compute_loss`` method saves intermediate metrics such as the
+    chi2 of the folds or the phi regardless of the loss type that has been selected.
+    These metrics are saved in the properties
+        ``phi_vector``: list of phi per fold
+        ``chi2_matrix``: list of chi2 per fold, per replica
+
+
     Parameters
     ----------
         loss_type: str
@@ -304,12 +312,10 @@ def _parse_loss(self, loss_type: str) -> str:
         if loss_type is None:
             loss_type = self._default_loss
             log.warning(f"No loss_type selected in HyperLoss, defaulting to {loss_type}")
-        else:
-            if loss_type not in IMPLEMENTED_LOSSES:
-                valid_options = ", ".join(IMPLEMENTED_LOSSES)
-                raise ValueError(
-                    f"Invalid loss type '{loss_type}'. Valid options are: {valid_options}"
-                )
+
+        if loss_type not in IMPLEMENTED_LOSSES:
+            valid_options = ", ".join(IMPLEMENTED_LOSSES)
+            raise ValueError(f"Invalid loss type '{loss_type}'. Valid options are: {valid_options}")
 
         log.info(f"Setting '{loss_type}' as the loss type for hyperoptimization")
 
@@ -341,12 +347,10 @@ def _parse_statistic(self, statistic: str, name: str, default: str) -> Callable:
         if statistic is None:
             statistic = default
             log.warning(f"No {name} selected in HyperLoss, defaulting to {statistic}")
-        else:
-            if statistic not in IMPLEMENTED_STATS:
-                valid_options = ", ".join(IMPLEMENTED_STATS.keys())
-                raise ValueError(
-                    f"Invalid {name} '{statistic}'. Valid options are: {valid_options}"
-                )
+
+        if statistic not in IMPLEMENTED_STATS:
+            valid_options = ", ".join(IMPLEMENTED_STATS.keys())
+            raise ValueError(f"Invalid {name} '{statistic}'. Valid options are: {valid_options}")
 
         log.info(f"Using '{statistic}' as the {name} for hyperoptimization")
 
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index 65b148f0a8..14a8b5bcd0 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -1001,15 +1001,6 @@ def hyperparametrizable(self, params):
                     for penalty in self.hyper_penalties
                 }
 
-                hyper_loss_per_replica = experimental_loss + sum(penalties.values())
-                fold_loss = self._hyper_loss.reduce_over_replicas(hyper_loss_per_replica)
-                passed = fold_loss < self.hyper_threshold
-                if not passed:
-                    log.info(
-                        f"Hyperparameter combination failed to find a good fit (loss={fold_loss} > {self.hyper_threshold})"
-                    )
-                    break
-
                 # Extracting the necessary data to compute phi
                 # First, create a list of `validphys.core.DataGroupSpec`
                 # containing only exp datasets within the held out fold
@@ -1024,8 +1015,6 @@ def hyperparametrizable(self, params):
                     fold_idx=k,
                 )
 
-                log.info("Fold %d finished, loss=%.1f, pass=%s", k + 1, hyper_loss, passed)
-
                 # Create another list of `validphys.core.DataGroupSpec`
                 # containing now exp datasets that are included in the training/validation dataset
                 trvl_partitions = list(self.kpartitions)
@@ -1054,7 +1043,11 @@ def hyperparametrizable(self, params):
                     # Apply a penalty proportional to the number of folds not computed
                     pen_mul = len(self.kpartitions) - k
                     l_hyper = [i * pen_mul for i in l_hyper]
+                    passed = False
                     break
+                else:
+                    passed = True
+                    log.info("Fold %d finished, loss=%.1f, pass=%s", k + 1, hyper_loss, passed)
 
             # endfor
 

From 011bdf8e9363f0f18c2a00519012188e610fc76a Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Thu, 30 May 2024 12:15:36 +0200
Subject: [PATCH 26/30] dont include penalties by default in the calculation of
 the loss

---
 n3fit/src/n3fit/hyper_optimization/rewards.py | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
index 50ceabaa85..b0b4aba767 100644
--- a/n3fit/src/n3fit/hyper_optimization/rewards.py
+++ b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -192,10 +192,14 @@ def compute_loss(
         pdf_model: MetaModel,
         experimental_data: List[DataGroupSpec],
         fold_idx: int = 0,
+        include_penalties=False,
     ) -> float:
         """
         Compute the loss, including added penalties, for a single fold.
 
+        Save the phi of the assemble and the chi2 of the separate replicas,
+        and the penalties into the ``phi_vector``, ``chi2_matrix`` and ``penalties`` attributes.
+
         Parameters
         ----------
             penalties: Dict[str, NDArray(replicas)]
@@ -210,6 +214,8 @@ def compute_loss(
                 List of tuples containing `validphys.core.DataGroupSpec` instances for each group data set
             fold_idx: int
                 k-fold index. Defaults to 0.
+            include_penalties: float
+                Whether to include the penalties in the returned loss value
 
         Returns
         -------
@@ -238,17 +244,19 @@ def compute_loss(
         # these are saved in the phi_vector and chi2_matrix attributes, excluding penalties
         self._save_hyperopt_metrics(phi_per_fold, experimental_loss, penalties, fold_idx)
 
-        # include penalties to experimental loss
-        # this allows introduction of statistics also to penalties
-        experimental_loss_w_penalties = experimental_loss + sum(penalties.values())
+        # Prepare the output loss, including penalties if necessary
+
+        if include_penalties:
+            # include penalties to experimental loss
+            experimental_loss += sum(penalties.values())
 
-        # add penalties to phi in the form of a sum of per-replicas averages
-        phi_per_fold += sum(np.mean(penalty) for penalty in penalties.values())
+            # add penalties to phi in the form of a sum of per-replicas averages
+            phi_per_fold += sum(np.mean(penalty) for penalty in penalties.values())
 
         # define loss for hyperopt according to the chosen loss_type
         if self.loss_type == "chi2":
             # calculate statistics of chi2 over replicas for a given k-fold
-            loss = self.reduce_over_replicas(experimental_loss_w_penalties)
+            loss = self.reduce_over_replicas(experimental_loss)
         elif self.loss_type == "phi2":
             loss = phi_per_fold**2
 

From 456c6cfce0c9625cdbc016c0a36a5e8e33c98d6c Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Fri, 31 May 2024 09:33:59 +0200
Subject: [PATCH 27/30] add `kfold::penalties_in_loss: bool` as a runcard
 option

---
 n3fit/src/n3fit/hyper_optimization/rewards.py | 13 +++++++++----
 n3fit/src/n3fit/model_trainer.py              |  1 +
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
index b0b4aba767..bba8057d89 100644
--- a/n3fit/src/n3fit/hyper_optimization/rewards.py
+++ b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -165,12 +165,19 @@ class HyperLoss:
         fold_statistic: str
             the statistic over the folds to use.
             Options are "average", "best_worst", and "std".
+        penalties_in_loss: bool
+            whether the penalties should be included in the output of ``compute_loss``
     """
 
     def __init__(
-        self, loss_type: str = None, replica_statistic: str = None, fold_statistic: str = None
+        self,
+        loss_type: str = None,
+        replica_statistic: str = None,
+        fold_statistic: str = None,
+        penalties_in_loss: bool = False,
     ):
         self._default_loss = "chi2"
+        self._penalties_in_loss = penalties_in_loss
 
         self.loss_type = self._parse_loss(loss_type)
         self.reduce_over_replicas = self._parse_statistic(
@@ -192,7 +199,6 @@ def compute_loss(
         pdf_model: MetaModel,
         experimental_data: List[DataGroupSpec],
         fold_idx: int = 0,
-        include_penalties=False,
     ) -> float:
         """
         Compute the loss, including added penalties, for a single fold.
@@ -245,8 +251,7 @@ def compute_loss(
         self._save_hyperopt_metrics(phi_per_fold, experimental_loss, penalties, fold_idx)
 
         # Prepare the output loss, including penalties if necessary
-
-        if include_penalties:
+        if self._penalties_in_loss:
             # include penalties to experimental loss
             experimental_loss += sum(penalties.values())
 
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index 14a8b5bcd0..df2e0be917 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -192,6 +192,7 @@ def __init__(
                 loss_type=loss_type,
                 replica_statistic=replica_statistic,
                 fold_statistic=fold_statistic,
+                penalties_in_loss=kfold_parameters.get("penalties_in_loss", False),
             )
 
         # Initialize the dictionaries which contain all fitting information

From 335c8351669c53740d3c6b87576c7f188881e7db Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Sun, 2 Jun 2024 12:25:41 +0200
Subject: [PATCH 28/30] add docs for the `penalties_in_loss` key

---
 doc/sphinx/source/n3fit/hyperopt.rst | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx/source/n3fit/hyperopt.rst b/doc/sphinx/source/n3fit/hyperopt.rst
index 7c7ba903e9..7de6c5c894 100644
--- a/doc/sphinx/source/n3fit/hyperopt.rst
+++ b/doc/sphinx/source/n3fit/hyperopt.rst
@@ -401,8 +401,9 @@ In NNPDF, this hyperoptimisation metrics is selected via the following generic r
 
         kfold:
           loss_type: chi2
-          replica_statistic: average
+          replica_statistic: average_best
           fold_statistic: average
+          penalties_in_loss: False
           partitions:
           - datasets:
             ...
@@ -411,6 +412,16 @@ In NNPDF, this hyperoptimisation metrics is selected via the following generic r
 
         parallel_models: true
 
+
+The key ``replica_statistic`` defines how to combine all replicas when perform a multireplica hyperopt.
+With ``average`` a simple average will be taken, ``average_best`` instead will take the 90% best replicas,
+mimicking what is done in a real post-fit selection.
+
+The ``fold_statistic`` instead defines how to combine the loss of the different folds.
+While the values for the ``penalties`` are always saved during the hyperopt run, by default they are not
+considered by the hyoperoptimizaton algorithm.
+If they are to be considered the key ``penalties_in_loss`` needs to be set to ``True``.
+
 By combining the ``average``, ``best_worst``, and ``std`` figures of merit discussed in :ref:`hyperkfolding-label`,
 several alternatives may arise. For example, one approach could involve minimizing
 the maximum value of the set of averaged-over-replicas :math:`\chi^2`,

From 7b977652df98ad880e2ade860854a2ede4bcc0ba Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Mon, 3 Jun 2024 11:58:30 +0200
Subject: [PATCH 29/30] remove comment about gpu not being compatible with
 hyperopt :)

---
 doc/sphinx/source/n3fit/runcard_detailed.rst | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst
index 58aa9b94b6..8f31d98af0 100644
--- a/doc/sphinx/source/n3fit/runcard_detailed.rst
+++ b/doc/sphinx/source/n3fit/runcard_detailed.rst
@@ -319,10 +319,9 @@ Running in parallel can be quite hard on memory and it is only advantageous when
 fitting on a GPU, where one can find a speed up equal to the number of models run
 in parallel (each model being a different replica).
 
-Running in parallel leverages the fact that the only difference between two replicas
-is the output data the prediction is compared to.
-In order to ensure this is indeed the case it is necessary to also
-use the `same_trvl_per_replica` flag in the runcard.
+When running in parallel it might be advantageous (e.g., for debugging)
+to set the training validation split to be equal for all replicas,
+this can be done with the `same_trvl_per_replica: true` runcard flag.
 
 In other words, in order to run several replicas in parallel in a machine
 (be it a big CPU or, most likely, a GPU)
@@ -332,7 +331,6 @@ top-level options:
 .. code-block:: yaml
 
   parallel_models: true
-  same_trvl_per_replica: true
 
 
 And then run ``n3fit`` with a replica range to be parallelized
@@ -348,8 +346,6 @@ should run by setting the environment variable ``CUDA_VISIBLE_DEVICES``
 to the right index (usually ``0, 1, 2``) or leaving it explicitly empty
 to avoid running on GPU: ``export CUDA_VISIBLE_DEVICES=""``
 
-Note that at present it cannot be used together with the ``hyperopt`` module.
-
 
 .. _otheroptions-label:
 

From 613bfbc651c4439ba6b555e6179e5a7b797dcdcf Mon Sep 17 00:00:00 2001
From: Cmurilochem <carlosmurilorocha@gmail.com>
Date: Tue, 4 Jun 2024 16:44:45 +0200
Subject: [PATCH 30/30] Add 'average_best' and non-default 'penalties_in_loss'
 in hyperopt_studies runcards

---
 n3fit/runcards/hyperopt_studies/renew_hyperopt.yml             | 1 +
 .../restricted_search_space_renew_hyperopt.yml                 | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml b/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml
index 8c17b6ecb9..30008f22b3 100644
--- a/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml
+++ b/n3fit/runcards/hyperopt_studies/renew_hyperopt.yml
@@ -163,6 +163,7 @@ kfold:
       loss_type: chi2
       replica_statistic: average_best
       fold_statistic: average
+      penalties_in_loss: True
       penalties:
         - saturation
         - patience
diff --git a/n3fit/runcards/hyperopt_studies/restricted_search_space_renew_hyperopt.yml b/n3fit/runcards/hyperopt_studies/restricted_search_space_renew_hyperopt.yml
index 02b34d7cce..332247100d 100644
--- a/n3fit/runcards/hyperopt_studies/restricted_search_space_renew_hyperopt.yml
+++ b/n3fit/runcards/hyperopt_studies/restricted_search_space_renew_hyperopt.yml
@@ -129,8 +129,9 @@ hyperscan_config:
 
 kfold:
       loss_type: chi2
-      replica_statistic: average
+      replica_statistic: average_best
       fold_statistic: average
+      penalties_in_loss: True
       penalties:
         - saturation
         - patience