Nixtla · kdgutier · Dec 6, 2022 · Dec 6, 2022 · Dec 6, 2022
diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py
@@ -12,7 +12,9 @@
                                            'hierarchicalforecast.core.HierarchicalReconciliation.reconcile': ( 'core.html#hierarchicalreconciliation.reconcile',
                                                                                                                'hierarchicalforecast/core.py'),
                                            'hierarchicalforecast.core._build_fn_name': ( 'core.html#_build_fn_name',
-                                                                                         'hierarchicalforecast/core.py')},
+                                                                                         'hierarchicalforecast/core.py'),
+                                           'hierarchicalforecast.core._reverse_engineer_sigmah': ( 'core.html#_reverse_engineer_sigmah',
+                                                                                                   'hierarchicalforecast/core.py')},
             'hierarchicalforecast.evaluation': { 'hierarchicalforecast.evaluation.HierarchicalEvaluation': ( 'evaluation.html#hierarchicalevaluation',
                                                                                                              'hierarchicalforecast/evaluation.py'),
                                                  'hierarchicalforecast.evaluation.HierarchicalEvaluation.__init__': ( 'evaluation.html#hierarchicalevaluation.__init__',

diff --git a/hierarchicalforecast/core.py b/hierarchicalforecast/core.py
@@ -35,6 +35,35 @@ def _build_fn_name(fn) -> str:
     return fn_name
 
 # %% ../nbs/core.ipynb 9
+def _reverse_engineer_sigmah(Y_hat_df, y_hat_model, model_name, uids):
+    """
+    This function assumes that the model creates prediction intervals
+    under a normality assumption with the following the Equation:
+    $\hat{y}_{t+h} + c \hat{sigma}_{h}$
+
+    In the future, we might deprecate this function in favor of a 
+    direct usage of an estimated $\hat{sigma}_{h}$
+    """
+    drop_cols = ['ds', 'y'] if 'y' in Y_hat_df.columns else ['ds']
+    model_names = Y_hat_df.drop(columns=drop_cols, axis=1).columns.to_list()
+    pi_model_names = [name for name in model_names if ('-lo' in name or '-hi' in name)]
+    pi_model_name = [pi_name for pi_name in pi_model_names if model_name in pi_name]
+    pi = len(pi_model_name) > 0
+
+    if not pi:
+        raise Exception(f'Please include {model_name} prediction intervals in `Y_hat_df`')
+
+    pi_col = pi_model_name[0]
+    sign = -1 if 'lo' in pi_col else 1
+    level_col = re.findall('[\d]+[.,\d]+|[\d]*[.][\d]+|[\d]+', pi_col)
+    level_col = float(level_col[0])
+    z = norm.ppf(0.5 + level_col / 200)
+    sigmah = Y_hat_df.pivot(columns='ds', values=pi_col).loc[uids].values
+    sigmah = sign * (sigmah - y_hat_model) / z
+
+    return sigmah
+
+# %% ../nbs/core.ipynb 10
 class HierarchicalReconciliation:
     """Hierarchical Reconciliation Class.
 
@@ -138,24 +167,16 @@ def reconcile(self,
             reconcile_fn_name = _build_fn_name(reconcile_fn)
             has_fitted = 'y_hat_insample' in signature(reconcile_fn).parameters
             has_level = 'level' in signature(reconcile_fn).parameters
+
             for model_name in model_names:
-                # should we calculate prediction intervals?
-                pi_model_name = [pi_name for pi_name in pi_model_names if model_name in pi_name]
-                pi = len(pi_model_name) > 0
                 # Remember: pivot sorts uid
                 y_hat_model = Y_hat_df.pivot(columns='ds', values=model_name).loc[uids].values
-                if pi and has_level and level is not None and intervals_method in ['normality', 'permbu']:
-                    # we need to construct sigmah and add it
-                    # to the reconciler_args
-                    # to recover sigmah we only need 
-                    # one prediction intervals
-                    pi_col = pi_model_name[0]
-                    sign = -1 if 'lo' in pi_col else 1
-                    level_col = re.findall('[\d]+[.,\d]+|[\d]*[.][\d]+|[\d]+', pi_col)
-                    level_col = float(level_col[0])
-                    z = norm.ppf(0.5 + level_col / 200)
-                    sigmah = Y_hat_df.pivot(columns='ds', values=pi_col).loc[uids].values
-                    sigmah = sign * (sigmah - y_hat_model) / z
+
+                # Recover sigmah and add it to reconciler_args
+                if has_level and level is not None and intervals_method in ['normality', 'permbu']:
+                    sigmah = _reverse_engineer_sigmah(Y_hat_df=Y_hat_df,
+                                y_hat_model=y_hat_model, model_name=model_name, uids=uids)
+
                     reconciler_args['level'] = level
                     if intervals_method == 'permbu':
                         y_hat_insample = Y_df.pivot(columns='ds', values=model_name).loc[uids].values
@@ -202,7 +223,7 @@ def reconcile(self,
                 # this will require outputs of reconcile_fn to include P, W
 
                 fcsts[f'{model_name}/{reconcile_fn_name}'] = fcsts_model['mean'].flatten()
-                if (pi and has_level and level is not None) or (intervals_method in ['bootstrap'] and level is not None):
+                if intervals_method in ['bootstrap', 'normality', 'permbu'] and level is not None:
                     for lv in level:
                         fcsts[f'{model_name}/{reconcile_fn_name}-lo-{lv}'] = fcsts_model[f'lo-{lv}'].flatten()
                         fcsts[f'{model_name}/{reconcile_fn_name}-hi-{lv}'] = fcsts_model[f'hi-{lv}'].flatten()

diff --git a/hierarchicalforecast/utils.py b/hierarchicalforecast/utils.py
@@ -244,7 +244,7 @@ def plot_hierarchical_predictions_gap(self,
         fig, ax = plt.subplots(figsize=(8, 5))
 
         if 'y' in Y_df.columns:
-            idx_top = S.sum(axis=1).idxmax()
+            idx_top = self.S.sum(axis=1).idxmax()
             y_plot = Y_df.loc[idx_top].y.values
             plt.plot(horizon_dates, y_plot, label='True')
 

diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -121,6 +121,42 @@
     "# <span style=\"color:DarkBlue\"> core.HierarchicalReconciliation </span>"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| exporti\n",
+    "def _reverse_engineer_sigmah(Y_hat_df, y_hat_model, model_name, uids):\n",
+    "    \"\"\"\n",
+    "    This function assumes that the model creates prediction intervals\n",
+    "    under a normality assumption with the following the Equation:\n",
+    "    $\\hat{y}_{t+h} + c \\hat{sigma}_{h}$\n",
+    "\n",
+    "    In the future, we might deprecate this function in favor of a \n",
+    "    direct usage of an estimated $\\hat{sigma}_{h}$\n",
+    "    \"\"\"\n",
+    "    drop_cols = ['ds', 'y'] if 'y' in Y_hat_df.columns else ['ds']\n",
+    "    model_names = Y_hat_df.drop(columns=drop_cols, axis=1).columns.to_list()\n",
+    "    pi_model_names = [name for name in model_names if ('-lo' in name or '-hi' in name)]\n",
+    "    pi_model_name = [pi_name for pi_name in pi_model_names if model_name in pi_name]\n",
+    "    pi = len(pi_model_name) > 0\n",
+    "\n",
+    "    if not pi:\n",
+    "        raise Exception(f'Please include {model_name} prediction intervals in `Y_hat_df`')\n",
+    "\n",
+    "    pi_col = pi_model_name[0]\n",
+    "    sign = -1 if 'lo' in pi_col else 1\n",
+    "    level_col = re.findall('[\\d]+[.,\\d]+|[\\d]*[.][\\d]+|[\\d]+', pi_col)\n",
+    "    level_col = float(level_col[0])\n",
+    "    z = norm.ppf(0.5 + level_col / 200)\n",
+    "    sigmah = Y_hat_df.pivot(columns='ds', values=pi_col).loc[uids].values\n",
+    "    sigmah = sign * (sigmah - y_hat_model) / z\n",
+    "\n",
+    "    return sigmah"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -231,24 +267,16 @@
     "            reconcile_fn_name = _build_fn_name(reconcile_fn)\n",
     "            has_fitted = 'y_hat_insample' in signature(reconcile_fn).parameters\n",
     "            has_level = 'level' in signature(reconcile_fn).parameters\n",
+    "            \n",
     "            for model_name in model_names:\n",
-    "                # should we calculate prediction intervals?\n",
-    "                pi_model_name = [pi_name for pi_name in pi_model_names if model_name in pi_name]\n",
-    "                pi = len(pi_model_name) > 0\n",
     "                # Remember: pivot sorts uid\n",
     "                y_hat_model = Y_hat_df.pivot(columns='ds', values=model_name).loc[uids].values\n",
-    "                if pi and has_level and level is not None and intervals_method in ['normality', 'permbu']:\n",
-    "                    # we need to construct sigmah and add it\n",
-    "                    # to the reconciler_args\n",
-    "                    # to recover sigmah we only need \n",
-    "                    # one prediction intervals\n",
-    "                    pi_col = pi_model_name[0]\n",
-    "                    sign = -1 if 'lo' in pi_col else 1\n",
-    "                    level_col = re.findall('[\\d]+[.,\\d]+|[\\d]*[.][\\d]+|[\\d]+', pi_col)\n",
-    "                    level_col = float(level_col[0])\n",
-    "                    z = norm.ppf(0.5 + level_col / 200)\n",
-    "                    sigmah = Y_hat_df.pivot(columns='ds', values=pi_col).loc[uids].values\n",
-    "                    sigmah = sign * (sigmah - y_hat_model) / z\n",
+    "\n",
+    "                # Recover sigmah and add it to reconciler_args\n",
+    "                if has_level and level is not None and intervals_method in ['normality', 'permbu']:\n",
+    "                    sigmah = _reverse_engineer_sigmah(Y_hat_df=Y_hat_df,\n",
+    "                                y_hat_model=y_hat_model, model_name=model_name, uids=uids)\n",
+    "\n",
     "                    reconciler_args['level'] = level\n",
     "                    if intervals_method == 'permbu':\n",
     "                        y_hat_insample = Y_df.pivot(columns='ds', values=model_name).loc[uids].values\n",
@@ -295,7 +323,7 @@
     "                # this will require outputs of reconcile_fn to include P, W\n",
     "\n",
     "                fcsts[f'{model_name}/{reconcile_fn_name}'] = fcsts_model['mean'].flatten()\n",
-    "                if (pi and has_level and level is not None) or (intervals_method in ['bootstrap'] and level is not None):\n",
+    "                if intervals_method in ['bootstrap', 'normality', 'permbu'] and level is not None:\n",
     "                    for lv in level:\n",
     "                        fcsts[f'{model_name}/{reconcile_fn_name}-lo-{lv}'] = fcsts_model[f'lo-{lv}'].flatten()\n",
     "                        fcsts[f'{model_name}/{reconcile_fn_name}-hi-{lv}'] = fcsts_model[f'hi-{lv}'].flatten()\n",

diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb
@@ -359,7 +359,7 @@
     "        fig, ax = plt.subplots(figsize=(8, 5))\n",
     "        \n",
     "        if 'y' in Y_df.columns:\n",
-    "            idx_top = S.sum(axis=1).idxmax()\n",
+    "            idx_top = self.S.sum(axis=1).idxmax()\n",
     "            y_plot = Y_df.loc[idx_top].y.values\n",
     "            plt.plot(horizon_dates, y_plot, label='True')\n",
     "\n",
@@ -577,9 +577,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "hierarchicalforecast",
    "language": "python",
-   "name": "python3"
+   "name": "hierarchicalforecast"
   }
  },
  "nbformat": 4,