Added 'HyperLoss' class and phi2 loss evaluation

NNPDF · Mar 4, 2024 · c499725 · c499725
1 parent 625372d
commit c499725
Show file tree

Hide file tree

Showing 10 changed files with 638 additions and 85 deletions.
diff --git a/n3fit/runcards/examples/Basic_hyperopt.yml b/n3fit/runcards/examples/Basic_hyperopt.yml
@@ -81,7 +81,7 @@ hyperscan_config:
       activations: ['sigmoid', 'tanh']
 
 kfold:
-  target: average
+  fold_statistic: average
   penalties:
     - saturation
     - patience

diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py
@@ -9,6 +9,7 @@
 
 from n3fit.hyper_optimization import penalties as penalties_module
 from n3fit.hyper_optimization import rewards as rewards_module
+from n3fit.hyper_optimization.rewards import HyperLoss
 from reportengine.checks import CheckError, make_argcheck
 from validphys.core import PDF
 from validphys.pdfbases import check_basis
@@ -255,15 +256,33 @@ def check_kfold_options(kfold):
             raise CheckError(
                 f"The penalty '{penalty}' is not recognized, ensure it is implemented in hyper_optimization/penalties.py"
             )
-    loss_target = kfold.get("target")
-    if loss_target is not None:
-        if not hasattr(rewards_module, loss_target):
+
+    loss_type = kfold.get("loss_type")
+    if loss_type is not None:
+        if loss_type not in HyperLoss().implemented_losses:
+            raise CheckError(
+                f"Loss type '{loss_type}' is not recognized, "
+                "ensure it is implemented in the HyperLoss class in hyper_optimization/rewards.py."
+                "Options so far are 'chi2' or 'phi2'."
+            )
+    replica_statistic = kfold.get("replica_statistic")
+    if replica_statistic is not None:
+        if replica_statistic not in HyperLoss().implemented_stats:
             raise CheckError(
-                f"The hyperoptimization target '{loss_target}' loss is not recognized, "
-                "ensure it is implemented in hyper_optimization/rewards.py"
+                f"The replica statistic '{replica_statistic}' is not recognized, "
+                "ensure it is implemented in the HyperLoss class in hyper_optimization/rewards.py"
             )
+    fold_statistic = kfold.get("fold_statistic")
+    if fold_statistic is not None:
+        if fold_statistic not in HyperLoss().implemented_stats:
+            raise CheckError(
+                f"The fold statistic '{fold_statistic}' is not recognized, "
+                "ensure it is implemented in the HyperLoss class in hyper_optimization/rewards.py"
+            )
+
     partitions = kfold["partitions"]
     # Check specific errors for specific targets
+    loss_target = kfold.get("fold_statistic")  # TODO: haven't updated this
     if loss_target == "fit_future_tests":
         if len(partitions) == 1:
             raise CheckError("Cannot use target 'fit_future_tests' with just one partition")

diff --git a/n3fit/src/n3fit/hyper_optimization/hyper_scan.py b/n3fit/src/n3fit/hyper_optimization/hyper_scan.py
@@ -14,6 +14,7 @@
 """
 import copy
 import logging
+from typing import Callable
 
 import hyperopt
 import numpy as np
@@ -23,6 +24,11 @@
 
 log = logging.getLogger(__name__)
 
+# Hyperopt uses these strings for a passed and failed run
+# it also has statusses "new", "running" and "suspended", but we don't use them
+HYPEROPT_STATUSSES = {True: "ok", False: "fail"}
+
+
 HYPEROPT_SEED = 42
 
 
@@ -113,7 +119,7 @@ def hyper_scan_wrapper(replica_path_set, model_trainer, hyperscanner, max_evals=
         parameters of the best trial as found by ``hyperopt``
     """
     # Tell the trainer we are doing hpyeropt
-    model_trainer.set_hyperopt(True, keys=hyperscanner.hyper_keys, status_ok=hyperopt.STATUS_OK)
+    model_trainer.set_hyperopt(True, keys=hyperscanner.hyper_keys)
     # Generate the trials object
     trials = FileTrials(replica_path_set, parameters=hyperscanner.as_dict())
     # Initialize seed for hyperopt
@@ -127,7 +133,7 @@ def hyper_scan_wrapper(replica_path_set, model_trainer, hyperscanner, max_evals=
 
     # Perform the scan
     best = hyperopt.fmin(
-        fn=model_trainer.hyperparametrizable,
+        fn=_status_wrapper(model_trainer.hyperparametrizable),
         space=hyperscanner.as_dict(),
         algo=hyperopt.tpe.suggest,
         max_evals=max_evals,
@@ -139,6 +145,19 @@ def hyper_scan_wrapper(replica_path_set, model_trainer, hyperscanner, max_evals=
     return hyperscanner.space_eval(best)
 
 
+def _status_wrapper(hyperparametrizable: Callable) -> Callable:
+    """
+    Wrapper that just converts the "status" value to hyperopt's conventions.
+    """
+
+    def wrapped(*args, **kwargs):
+        results_dict = hyperparametrizable(*args, **kwargs)
+        results_dict["status"] = HYPEROPT_STATUSSES[results_dict["status"]]
+        return results_dict
+
+    return wrapped
+
+
 class ActivationStr:
     """
     Upon call this class returns an array where the activation function

diff --git a/n3fit/src/n3fit/hyper_optimization/penalties.py b/n3fit/src/n3fit/hyper_optimization/penalties.py
@@ -40,6 +40,11 @@ def saturation(pdf_model=None, n=100, min_x=1e-6, max_x=1e-4, flavors=None, **_k
         flavors: list(int)
             indices of the flavors to inspect
 
+    Returns
+    -------
+        NDArray
+            array of saturation penalties for each replica
+
     Example
     -------
     >>> from n3fit.hyper_optimization.penalties import saturation
@@ -72,8 +77,9 @@ def saturation(pdf_model=None, n=100, min_x=1e-6, max_x=1e-4, flavors=None, **_k
     return extra_loss
 
 
-def patience(stopping_object=None, alpha=1e-4, **_kwargs):
-    """Adds a penalty for fits that have finished too soon, which
+def patience(stopping_object, alpha: float = 1e-4, **_kwargs):
+    """
+    Adds a penalty for fits that have finished too soon, which
     means the number of epochs or its patience is not optimal.
     The penalty is proportional to the validation loss and will be 0
     when the best epoch is exactly at max_epoch - patience
@@ -85,6 +91,11 @@ def patience(stopping_object=None, alpha=1e-4, **_kwargs):
         alpha: float
             dumping factor for the exponent
 
+    Returns
+    -------
+        NDArray
+            patience penalty for each replica
+
     Example
     -------
     >>> from n3fit.hyper_optimization.penalties import patience
@@ -94,11 +105,11 @@ def patience(stopping_object=None, alpha=1e-4, **_kwargs):
     3.434143467595683
 
     """
-    epoch_best = np.take(stopping_object.e_best_chi2, 0)
+    epoch_best = np.array(stopping_object.e_best_chi2)
     patience = stopping_object.stopping_patience
     max_epochs = stopping_object.total_epochs
     diff = abs(max_epochs - patience - epoch_best)
-    vl_loss = np.take(stopping_object.vl_chi2, 0)
+    vl_loss = np.array(stopping_object.vl_chi2)
     return vl_loss * np.exp(alpha * diff)
 
 
@@ -109,6 +120,11 @@ def integrability(pdf_model=None, **_kwargs):
 
     The penalty increases exponentially with the growth of the integrability number
 
+    Returns
+    -------
+        NDArray
+            array of integrability penalties for each replica
+
     Example
     -------
     >>> from n3fit.hyper_optimization.penalties import integrability
@@ -121,8 +137,19 @@ def integrability(pdf_model=None, **_kwargs):
     """
     pdf_instance = N3PDF(pdf_model.split_replicas())
     integ_values = integrability_numbers(pdf_instance)
-    integ_overflow = np.sum(integ_values[integ_values > fitveto.INTEG_THRESHOLD])
-    if integ_overflow > 50.0:
-        # before reaching an overflow, just give a stupidly big number
-        return np.exp(50.0)
+
+    # set components under the threshold to 0
+    integ_values[integ_values <= fitveto.INTEG_THRESHOLD] = 0.0
+
+    # sum over flavours
+    integ_overflow = np.sum(integ_values, axis=-1)  # -1 rather than 1 so it works with 1 replica
+
+    # Limit components to 50 to avoid overflow
+    if isinstance(integ_overflow, np.ndarray):
+        # Case: multi-replica scenario
+        integ_overflow[integ_overflow > 50.0] = 50.0
+    elif isinstance(integ_overflow, (float, np.float64)):
+        # Case: single replica scenario
+        integ_overflow = min(integ_overflow, 50.0)
+
     return np.exp(integ_overflow) - 1.0