Nixtla · jmoralez · Jul 25, 2024 · Mar 19, 2024 · Mar 19, 2024 · Jul 25, 2024
diff --git a/.gitignore b/.gitignore
@@ -144,3 +144,6 @@ Gemfile.lock
 _docs/
 sidebar.yml
 _proc/
+
+# VS Code project settings
+.vscode
diff --git a/hierarchicalforecast/evaluation.py b/hierarchicalforecast/evaluation.py
@@ -114,8 +114,10 @@ def rel_mse(y, y_hat, y_train, mask=None):
     Computes Relative mean squared error (RelMSE), as proposed by Hyndman & Koehler (2006)
     as an alternative to percentage errors, to avoid measure unstability.
 
-    $$ \mathrm{RelMSE}(\\mathbf{y}, \\mathbf{\hat{y}}, \\mathbf{\hat{y}}^{naive1}) =
-    \\frac{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}})}{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}}^{naive1})} $$
+    $$
+    \mathrm{RelMSE}(\\mathbf{y}, \\mathbf{\hat{y}}, \\mathbf{\hat{y}}^{naive1}) =
+    \\frac{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}})}{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}}^{naive1})}
+    $$
 
     **Parameters:**<br>
     `y`: numpy array, Actual values of size (`n_series`, `horizon`).<br>
@@ -151,8 +153,10 @@ def msse(y, y_hat, y_train, mask=None):
     Computes Mean squared scaled error (MSSE), as proposed by Hyndman & Koehler (2006)
     as an alternative to percentage errors, to avoid measure unstability.
 
-    $$ \\mathrm{MSSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{y}^{in-sample}) =
-    \\frac{\\frac{1}{h} \\sum^{t+h}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^2}{\\frac{1}{t-1} \\sum^{t}_{\\tau=2} (y_{\\tau} - y_{\\tau-1})^2},$$
+    $$
+    \\mathrm{MSSE}(\\mathbf{y}, \\mathbf{\\hat{y}}, \\mathbf{y}^{in-sample}) =
+    \\frac{\\frac{1}{h} \\sum^{t+h}_{\\tau=t+1} (y_{\\tau} - \\hat{y}_{\\tau})^2}{\\frac{1}{t-1} \\sum^{t}_{\\tau=2} (y_{\\tau} - y_{\\tau-1})^2}
+    $$
 
     where $n$ ($n=$`n`) is the size of the training data, and $h$ is the forecasting horizon ($h=$`horizon`).
 
@@ -192,9 +196,11 @@ def scaled_crps(y, y_hat, quantiles):
     This metric averages percentual weighted absolute deviations as 
     defined by the quantile losses.
 
-    $$ \mathrm{sCRPS}(\hat{F}_{\\tau}, \mathbf{y}_{\\tau}) = \\frac{2}{N} \sum_{i}
+    $$
+    \mathrm{sCRPS}(\hat{F}_{\\tau}, \mathbf{y}_{\\tau}) = \\frac{2}{N} \sum_{i}
     \int^{1}_{0}
-    \\frac{\mathrm{QL}(\hat{F}_{i,\\tau}, y_{i,\\tau})_{q}}{\sum_{i} | y_{i,\\tau} |} dq $$
+    \\frac{\mathrm{QL}(\hat{F}_{i,\\tau}, y_{i,\\tau})_{q}}{\sum_{i} | y_{i,\\tau} |} dq
+    $$
 
     where $\hat{F}_{\\tau}$ is the an estimated multivariate distribution, and $y_{i,\\tau}$
     are its realizations.
@@ -231,10 +237,12 @@ def energy_score(y, y_sample1, y_sample2, beta=2):
     `y` and independent multivariate samples `y_sample1` and `y_sample2`.
     The Energy Score generalizes the CRPS (`beta`=1) in the multivariate setting.
 
-    $$ \mathrm{ES}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}') 
+    $$
+    \mathrm{ES}(\\mathbf{y}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}') 
     = \\frac{1}{2} \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{\hat{y}}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}'||^{\\beta} \\right]
     -  \mathbb{E}_{\hat{P}} \\left[ ||\\mathbf{y}_{\\tau} - \\mathbf{\hat{y}}_{\\tau}||^{\\beta} \\right] 
-    \quad \\beta \in (0,2]$$
+    \quad \\beta \in (0,2]
+    $$
 
     where $\\mathbf{\hat{y}}_{\\tau}, \\mathbf{\hat{y}}_{\\tau}'$ are independent samples drawn from $\hat{P}$.
 
@@ -274,21 +282,25 @@ def log_score(y, y_hat, cov, allow_singular=True):
     One of the simplest multivariate probability scoring rules,
     it evaluates the negative density at the value of the realisation.
 
-    $$ \mathrm{LS}(\\mathbf{y}_{\\tau}, \\mathbf{P}(\\theta_{\\tau}))
-    = - \\log(f(\\mathbf{y}_{\\tau}, \\theta_{\\tau}))$$
+    $$
+    \mathrm{LS}(\\mathbf{y}_{\\tau}, \\mathbf{P}(\\theta_{\\tau}))
+    = - \\log(f(\\mathbf{y}_{\\tau}, \\theta_{\\tau}))
+    $$
 
     where $f$ is the density, $\\mathbf{P}(\\theta_{\\tau})$ is a 
     parametric distribution and $f(\\mathbf{y}_{\\tau}, \\theta_{\\tau})$
     represents its density. 
     For the moment we only support multivariate normal log score.
 
-    $$f(\\mathbf{y}_{\\tau}, \\theta_{\\tau}) =
+    $$
+    f(\\mathbf{y}_{\\tau}, \\theta_{\\tau}) =
     (2\\pi )^{-k/2}\\det({\\boldsymbol{\Sigma }})^{-1/2}
     \,\\exp \\left(
     -{\\frac {1}{2}}(\mathbf{y}_{\\tau} -\\hat{\mathbf{y}}_{\\tau})^{\!{\mathsf{T}}}
     {\\boldsymbol{\Sigma }}^{-1}
     (\mathbf{y}_{\\tau} -\\hat{\mathbf{y}}_{\\tau})
-    \\right)$$
+    \\right)
+    $$
 
     **Parameters:**<br>
     `y`: numpy array, Actual values of size (`n_series`, `horizon`).<br>

diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py
@@ -611,10 +611,18 @@ def _get_PW_matrices(self,
         if self.method in res_methods and y_insample is None and y_hat_insample is None:
             raise ValueError(f"For methods {', '.join(res_methods)} you need to pass residuals")
         n_hiers, n_bottom = S.shape
+        n_aggs = n_hiers - n_bottom
+        # Construct J and U.T
+        J = np.concatenate((np.zeros((n_bottom, n_aggs), dtype=np.float64), S[n_aggs:]), axis=1)
+        Ut = np.concatenate((np.eye(n_aggs, dtype=np.float64), -S[:n_aggs]), axis=1)
         if self.method == 'ols':
             W = np.eye(n_hiers)
+            UtW = Ut
         elif self.method == 'wls_struct':
-            W = np.diag(S @ np.ones((n_bottom,)))
+            # W = np.diag(S @ np.ones((n_bottom,)))
+            Wdiag = np.sum(S, axis=1, dtype=np.float64)
+            UtW = Ut * Wdiag
+            W = np.diag(Wdiag)
         elif self.method in res_methods:
             # Residuals with shape (obs, n_hiers)
             residuals = (y_insample - y_hat_insample).T
@@ -627,18 +635,23 @@ def _get_PW_matrices(self,
             if zero_residual_prc > .98:
                 raise Exception(f'Insample residuals close to 0, zero_residual_prc={zero_residual_prc}. Check `Y_df`')
 
-            # Protection: cases where data is unavailable/nan
-            masked_res = np.ma.array(residuals, mask=np.isnan(residuals))
-            covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data
-
             if self.method == 'wls_var':
-                W = np.diag(np.diag(covm))
+                Wdiag = np.nansum(residuals**2, axis=0, dtype=np.float64) / residuals.shape[0]
+                W = np.diag(Wdiag)
+                UtW = Ut * Wdiag
             elif self.method == 'mint_cov':
+                # Protection: cases where data is unavailable/nan
+                masked_res = np.ma.array(residuals, mask=np.isnan(residuals))
+                covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data
                 W = covm
+                UtW = Ut @ W
+
             elif self.method == 'mint_shrink':
                 # Schäfer and Strimmer 2005, scale invariant shrinkage
-                # lasso or ridge might improve numerical stability but
-                # this version follows https://robjhyndman.com/papers/MinT.pdf
+
+                # Protection: cases where data is unavailable/nan
+                masked_res = np.ma.array(residuals, mask=np.isnan(residuals))
+                covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data
                 tar = np.diag(np.diag(covm))
 
                 # Protections: constant's correlation set to 0
@@ -661,24 +674,22 @@ def _get_PW_matrices(self,
 
                 # Protection: final ridge diagonal protection
                 W = (lmd * tar + (1 - lmd) * covm) + self.mint_shr_ridge
+
+                UtW = Ut @ W
         else:
             raise ValueError(f'Unknown reconciliation method {self.method}')
 
         if self.method not in diag_only_methods:
-            eigenvalues, _ = np.linalg.eig(W)
+            try:
+                L = np.linalg.cholesky(W)
+            except np.linalg.LinAlgError:
+                raise Exception(f'min_trace ({self.method}) needs covariance matrix to be positive definite.')
         else:
             eigenvalues = np.diag(W)
-
-        if any(eigenvalues < 1e-8):
-            raise Exception(f'min_trace ({self.method}) needs covariance matrix to be positive definite.')
-
-        else:
-            # compute P for free reconciliation
-            if self.method not in diag_only_methods:
-                R = S.T @ np.linalg.pinv(W)
-            else:
-                R = S.T * np.reciprocal(np.diag(W))
-            P = np.linalg.pinv(R @ S) @ R
+            if any(eigenvalues < 1e-8):
+                raise Exception(f'min_trace ({self.method}) needs covariance matrix to be positive definite.')
+
+        P = (J - np.linalg.solve(UtW[:, n_aggs:] @ Ut.T[n_aggs:] + UtW[:, :n_aggs], UtW[:, n_aggs:] @ J.T[n_aggs:]).T @ Ut)
 
         return P, W
 

diff --git a/hierarchicalforecast/probabilistic_methods.py b/hierarchicalforecast/probabilistic_methods.py
@@ -25,8 +25,11 @@ class Normality:
     $$\hat{y}_{h} \sim \mathrm{N}(\hat{\\boldsymbol{\\mu}}, \hat{\mathbf{W}}_{h})$$
 
     The reconciled forecasts are also normally distributed:
-    $$\\tilde{y}_{h} \sim \mathrm{N}(\mathbf{S}\mathbf{P}\hat{\\boldsymbol{\\mu}}, 
-    \mathbf{S}\mathbf{P}\hat{\mathbf{W}}_{h} \mathbf{P}^{\intercal} \mathbf{S}^{\intercal})$$
+
+    $$
+    \\tilde{y}_{h} \sim \mathrm{N}(\mathbf{S}\mathbf{P}\hat{\\boldsymbol{\\mu}}, 
+    \mathbf{S}\mathbf{P}\hat{\mathbf{W}}_{h} \mathbf{P}^{\intercal} \mathbf{S}^{\intercal})
+    $$
 
     **Parameters:**<br>
     `S`: np.array, summing matrix of size (`base`, `bottom`).<br>

diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -875,7 +875,7 @@
     "    tags=tags_strict,\n",
     "    is_balanced=True,\n",
     ")\n",
-    "test_eq(reconciled, reconciled_balanced)"
+    "test_close(reconciled.drop(columns='ds').values, reconciled_balanced.drop(columns='ds').values, eps=1e-10)"
    ]
   },
   {

diff --git a/nbs/methods.ipynb b/nbs/methods.ipynb
@@ -1065,10 +1065,18 @@
     "        if self.method in res_methods and y_insample is None and y_hat_insample is None:\n",
     "            raise ValueError(f\"For methods {', '.join(res_methods)} you need to pass residuals\")\n",
     "        n_hiers, n_bottom = S.shape\n",
+    "        n_aggs = n_hiers - n_bottom\n",
+    "        # Construct J and U.T\n",
+    "        J = np.concatenate((np.zeros((n_bottom, n_aggs), dtype=np.float64), S[n_aggs:]), axis=1)\n",
+    "        Ut = np.concatenate((np.eye(n_aggs, dtype=np.float64), -S[:n_aggs]), axis=1)\n",
     "        if self.method == 'ols':\n",
     "            W = np.eye(n_hiers)\n",
+    "            UtW = Ut\n",
     "        elif self.method == 'wls_struct':\n",
-    "            W = np.diag(S @ np.ones((n_bottom,)))\n",
+    "            # W = np.diag(S @ np.ones((n_bottom,)))\n",
+    "            Wdiag = np.sum(S, axis=1, dtype=np.float64)\n",
+    "            UtW = Ut * Wdiag\n",
+    "            W = np.diag(Wdiag)\n",
     "        elif self.method in res_methods:\n",
     "            # Residuals with shape (obs, n_hiers)\n",
     "            residuals = (y_insample - y_hat_insample).T\n",
@@ -1081,18 +1089,23 @@
     "            if zero_residual_prc > .98:\n",
     "                raise Exception(f'Insample residuals close to 0, zero_residual_prc={zero_residual_prc}. Check `Y_df`')\n",
     "\n",
-    "            # Protection: cases where data is unavailable/nan\n",
-    "            masked_res = np.ma.array(residuals, mask=np.isnan(residuals))\n",
-    "            covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data\n",
-    "\n",
     "            if self.method == 'wls_var':\n",
-    "                W = np.diag(np.diag(covm))\n",
+    "                Wdiag = np.nansum(residuals**2, axis=0, dtype=np.float64) / residuals.shape[0]\n",
+    "                W = np.diag(Wdiag)\n",
+    "                UtW = Ut * Wdiag\n",
     "            elif self.method == 'mint_cov':\n",
+    "                # Protection: cases where data is unavailable/nan\n",
+    "                masked_res = np.ma.array(residuals, mask=np.isnan(residuals))\n",
+    "                covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data\n",
     "                W = covm\n",
+    "                UtW = Ut @ W\n",
+    "\n",
     "            elif self.method == 'mint_shrink':\n",
     "                # Schäfer and Strimmer 2005, scale invariant shrinkage\n",
-    "                # lasso or ridge might improve numerical stability but\n",
-    "                # this version follows https://robjhyndman.com/papers/MinT.pdf\n",
+    "\n",
+    "                # Protection: cases where data is unavailable/nan\n",
+    "                masked_res = np.ma.array(residuals, mask=np.isnan(residuals))\n",
+    "                covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data\n",
     "                tar = np.diag(np.diag(covm))\n",
     "\n",
     "                # Protections: constant's correlation set to 0\n",
@@ -1115,24 +1128,22 @@
     "\n",
     "                # Protection: final ridge diagonal protection\n",
     "                W = (lmd * tar + (1 - lmd) * covm) + self.mint_shr_ridge\n",
+    "\n",
+    "                UtW = Ut @ W\n",
     "        else:\n",
     "            raise ValueError(f'Unknown reconciliation method {self.method}')\n",
     "\n",
     "        if self.method not in diag_only_methods:\n",
-    "            eigenvalues, _ = np.linalg.eig(W)\n",
+    "            try:\n",
+    "                L = np.linalg.cholesky(W)\n",
+    "            except np.linalg.LinAlgError:\n",
+    "                raise Exception(f'min_trace ({self.method}) needs covariance matrix to be positive definite.')\n",
     "        else:\n",
     "            eigenvalues = np.diag(W)\n",
-    "\n",
-    "        if any(eigenvalues < 1e-8):\n",
-    "            raise Exception(f'min_trace ({self.method}) needs covariance matrix to be positive definite.')\n",
-    "\n",
-    "        else:\n",
-    "            # compute P for free reconciliation\n",
-    "            if self.method not in diag_only_methods:\n",
-    "                R = S.T @ np.linalg.pinv(W)\n",
-    "            else:\n",
-    "                R = S.T * np.reciprocal(np.diag(W))\n",
-    "            P = np.linalg.pinv(R @ S) @ R\n",
+    "            if any(eigenvalues < 1e-8):\n",
+    "                raise Exception(f'min_trace ({self.method}) needs covariance matrix to be positive definite.')\n",
+    "            \n",
+    "        P = (J - np.linalg.solve(UtW[:, n_aggs:] @ Ut.T[n_aggs:] + UtW[:, :n_aggs], UtW[:, n_aggs:] @ J.T[n_aggs:]).T @ Ut)\n",
     "\n",
     "        return P, W\n",
     "\n",