Nixtla · elephaint · Aug 15, 2024 · Jul 25, 2024 · Jul 25, 2024 · Jul 25, 2024
diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py
@@ -112,6 +112,10 @@
                                                                                             'hierarchicalforecast/methods.py'),
                                               'hierarchicalforecast.methods.TopDown.fit_predict': ( 'methods.html#topdown.fit_predict',
                                                                                                     'hierarchicalforecast/methods.py'),
+                                              'hierarchicalforecast.methods.TopDownSparse': ( 'methods.html#topdownsparse',
+                                                                                              'hierarchicalforecast/methods.py'),
+                                              'hierarchicalforecast.methods.TopDownSparse._get_PW_matrices': ( 'methods.html#topdownsparse._get_pw_matrices',
+                                                                                                               'hierarchicalforecast/methods.py'),
                                               'hierarchicalforecast.methods._get_child_nodes': ( 'methods.html#_get_child_nodes',
                                                                                                  'hierarchicalforecast/methods.py'),
                                               'hierarchicalforecast.methods._reconcile_fcst_proportions': ( 'methods.html#_reconcile_fcst_proportions',

diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py
@@ -1,7 +1,8 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/methods.ipynb.
 
 # %% auto 0
-__all__ = ['BottomUp', 'BottomUpSparse', 'TopDown', 'MiddleOut', 'MinTrace', 'MinTraceSparse', 'OptimalCombination', 'ERM']
+__all__ = ['BottomUp', 'BottomUpSparse', 'TopDown', 'TopDownSparse', 'MiddleOut', 'MinTrace', 'MinTraceSparse',
+           'OptimalCombination', 'ERM']
 
 # %% ../nbs/methods.ipynb 3
 import warnings
@@ -255,7 +256,9 @@ def _get_PW_matrices(self, S, idx_bottom):
         return P, W
 
 # %% ../nbs/methods.ipynb 22
-def _get_child_nodes(S: np.ndarray, tags: Dict[str, np.ndarray]):
+def _get_child_nodes(S: Union[np.ndarray, sparse.csr_matrix], tags: Dict[str, np.ndarray]):
+    if isinstance(S, sparse.spmatrix):
+        S = S.toarray()
     level_names = list(tags.keys())
     nodes = OrderedDict()
     for i_level, level in enumerate(level_names[:-1]):
@@ -368,6 +371,7 @@ def fit(self,
         **Returns:**<br>
         `self`: object, fitted reconciler.
         """
+        self.intervals_method = intervals_method
         self.P, self.W = self._get_PW_matrices(S=S, y_hat=y_hat, 
                                                tags=tags, y_insample=y_insample)
         self.sampler = self._get_sampler(S=S,
@@ -440,7 +444,69 @@ def fit_predict(self,
 
     __call__ = fit_predict
 
-# %% ../nbs/methods.ipynb 32
+# %% ../nbs/methods.ipynb 25
+class TopDownSparse(TopDown):
+    """TopDownSparse Reconciliation Class.
+
+    This is an implementation of top-down reconciliation using the sparse matrix
+    approach. It works much more efficiently on data sets with many time series.
+
+    See the parent class for more details.
+    """
+
+    is_sparse_method = True
+
+    def _get_PW_matrices(
+        self,
+        S: sparse.csr_matrix,
+        y_hat: np.ndarray,
+        tags: Dict[str, np.ndarray],
+        y_insample: Optional[np.ndarray] = None,
+    ):
+        # Check if the data structure is strictly hierarchical.
+        if not is_strictly_hierarchical(S, tags):
+            raise ValueError(
+                "Top-down reconciliation requires strictly hierarchical structures."
+            )
+
+        # Get the dimensions of the "summing" matrix.
+        n_hiers, n_bottom = S.shape
+
+        # Get the in-sample values of the top node and bottom nodes.
+        y_top = y_insample[0]
+        y_btm = y_insample[(n_hiers - n_bottom) :]
+
+        # Calculate the disaggregation proportions.
+        if self.method == "average_proportions":
+            prop = np.mean(y_btm / y_top, 1)
+        elif self.method == "proportion_averages":
+            prop = np.mean(y_btm, 1) / np.mean(y_top)
+        elif self.method == "forecast_proportions":
+            raise Exception(f"Fit method not yet implemented for {self.method}.")
+        else:
+            raise Exception(f"{self.method} is an unknown disaggregation method.")
+
+        # Instantiate and allocate the "projection" matrix to distribute the
+        # disaggregated base forecast of the top node to the bottom nodes.
+        P = sparse.csr_matrix(
+            (
+                prop,
+                np.zeros_like(prop, np.uint8),
+                np.arange(len(prop) + 1, dtype=np.min_scalar_type(n_bottom)),
+            ),
+            shape=(n_bottom, n_hiers),
+            dtype=np.float64,
+        )
+
+        # Instantiate and allocate the "weight" matrix.
+        if getattr(self, "intervals_method", False) is None:
+            W = None
+        else:
+            W = sparse.eye(n_hiers, dtype=np.float32, format="csr")
+
+        return P, W
+
+# %% ../nbs/methods.ipynb 35
 class MiddleOut(HReconciler):
     """Middle Out Reconciliation Class.
 
@@ -556,11 +622,11 @@ def fit_predict(self,
 
     __call__ = fit_predict
 
-# %% ../nbs/methods.ipynb 37
+# %% ../nbs/methods.ipynb 40
 def crossprod(x):
     return x.T @ x
 
-# %% ../nbs/methods.ipynb 38
+# %% ../nbs/methods.ipynb 41
 class MinTrace(HReconciler):
     """MinTrace Reconciliation Class.
 
@@ -829,7 +895,7 @@ def fit_predict(self,
 
     __call__ = fit_predict
 
-# %% ../nbs/methods.ipynb 39
+# %% ../nbs/methods.ipynb 42
 class MinTraceSparse(MinTrace):
     """MinTraceSparse Reconciliation Class.
 
@@ -939,7 +1005,7 @@ def get_P_action(y):
 
         return P, W
 
-# %% ../nbs/methods.ipynb 49
+# %% ../nbs/methods.ipynb 52
 class OptimalCombination(MinTrace):
     """Optimal Combination Reconciliation Class.
 
@@ -973,7 +1039,7 @@ def __init__(self,
         super().__init__(method=method, nonnegative=nonnegative, num_threads=num_threads)
         self.insample = False
 
-# %% ../nbs/methods.ipynb 58
+# %% ../nbs/methods.ipynb 61
 @njit
 def lasso(X: np.ndarray, y: np.ndarray, 
           lambda_reg: float, max_iters: int = 1_000,
@@ -1005,7 +1071,7 @@ def lasso(X: np.ndarray, y: np.ndarray,
     #print(it)
     return beta
 
-# %% ../nbs/methods.ipynb 59
+# %% ../nbs/methods.ipynb 62
 class ERM(HReconciler):
     """Optimal Combination Reconciliation Class.
 

diff --git a/nbs/methods.ipynb b/nbs/methods.ipynb
@@ -524,6 +524,8 @@
    "source": [
     "#| exporti\n",
     "def _get_child_nodes(S: np.ndarray, tags: Dict[str, np.ndarray]):\n",
+    "    if isinstance(S, sparse.spmatrix):\n",
+    "        S = S.toarray()\n",
     "    level_names = list(tags.keys())\n",
     "    nodes = OrderedDict()\n",
     "    for i_level, level in enumerate(level_names[:-1]):\n",
@@ -650,6 +652,7 @@
     "        **Returns:**<br>\n",
     "        `self`: object, fitted reconciler.\n",
     "        \"\"\"\n",
+    "        self.intervals_method = intervals_method\n",
     "        self.P, self.W = self._get_PW_matrices(S=S, y_hat=y_hat, \n",
     "                                               tags=tags, y_insample=y_insample)\n",
     "        self.sampler = self._get_sampler(S=S,\n",
@@ -723,6 +726,75 @@
     "    __call__ = fit_predict"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class TopDownSparse(TopDown):\n",
+    "    \"\"\"TopDownSparse Reconciliation Class.\n",
+    "\n",
+    "    This is an implementation of top-down reconciliation using the sparse matrix\n",
+    "    approach. It works much more efficiently on data sets with many time series.\n",
+    "\n",
+    "    See the parent class for more details.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    is_sparse_method = True\n",
+    "\n",
+    "    def _get_PW_matrices(\n",
+    "        self,\n",
+    "        S: sparse.csr_matrix,\n",
+    "        y_hat: np.ndarray,\n",
+    "        tags: Dict[str, np.ndarray],\n",
+    "        y_insample: Optional[np.ndarray] = None,\n",
+    "    ):\n",
+    "        # Check if the data structure is strictly hierarchical.\n",
+    "        if not is_strictly_hierarchical(S, tags):\n",
+    "            raise ValueError(\n",
+    "                \"Top-down reconciliation requires strictly hierarchical structures.\"\n",
+    "            )\n",
+    "\n",
+    "        # Get the dimensions of the \"summing\" matrix.\n",
+    "        n_hiers, n_bottom = S.shape\n",
+    "\n",
+    "        # Get the in-sample values of the top node and bottom nodes.\n",
+    "        y_top = y_insample[0]\n",
+    "        y_btm = y_insample[(n_hiers - n_bottom) :]\n",
+    "\n",
+    "        # Calculate the disaggregation proportions.\n",
+    "        if self.method == \"average_proportions\":\n",
+    "            prop = np.mean(y_btm / y_top, 1)\n",
+    "        elif self.method == \"proportion_averages\":\n",
+    "            prop = np.mean(y_btm, 1) / np.mean(y_top)\n",
+    "        elif self.method == \"forecast_proportions\":\n",
+    "            raise Exception(f\"Fit method not yet implemented for {self.method}.\")\n",
+    "        else:\n",
+    "            raise Exception(f\"{self.method} is an unknown disaggregation method.\")\n",
+    "\n",
+    "        # Instantiate and allocate the \"projection\" matrix to distribute the\n",
+    "        # disaggregated base forecast of the top node to the bottom nodes.\n",
+    "        P = sparse.csr_matrix(\n",
+    "            (\n",
+    "                prop,\n",
+    "                np.zeros_like(prop, np.uint8),\n",
+    "                np.arange(len(prop) + 1, dtype=np.min_scalar_type(n_bottom)),\n",
+    "            ),\n",
+    "            shape=(n_bottom, n_hiers),\n",
+    "            dtype=np.float64,\n",
+    "        )\n",
+    "\n",
+    "        # Instantiate and allocate the \"weight\" matrix.\n",
+    "        if getattr(self, \"intervals_method\", False) is None:\n",
+    "            W = None\n",
+    "        else:\n",
+    "            W = sparse.eye(n_hiers, dtype=np.float32, format=\"csr\")\n",
+    "        \n",
+    "        return P, W"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -805,6 +877,75 @@
     "        )"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "for method in [\"forecast_proportions\", \"average_proportions\", \"proportion_averages\"]:\n",
+    "    cls_top_down = TopDownSparse(method=method)\n",
+    "    if cls_top_down.insample:\n",
+    "        assert method in [\"average_proportions\", \"proportion_averages\"]\n",
+    "        test_close(\n",
+    "            cls_top_down(\n",
+    "                S=sparse.csr_matrix(S),\n",
+    "                y_hat=S @ y_hat_bottom,\n",
+    "                y_insample=S @ y_bottom,\n",
+    "                tags=tags,\n",
+    "            )[\"mean\"],\n",
+    "            S @ y_hat_bottom,\n",
+    "        )\n",
+    "    else:\n",
+    "        test_close(\n",
+    "            cls_top_down(S=sparse.csr_matrix(S), y_hat=S @ y_hat_bottom, tags=tags)[\n",
+    "                \"mean\"\n",
+    "            ],\n",
+    "            S @ y_hat_bottom,\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "for method in [\"forecast_proportions\", \"average_proportions\", \"proportion_averages\"]:\n",
+    "    cls_top_down = TopDown(method=method)\n",
+    "    cls_top_down_sparse = TopDownSparse(method=method)\n",
+    "    if cls_top_down.insample:\n",
+    "        assert method in [\"average_proportions\", \"proportion_averages\"]\n",
+    "        test_close(\n",
+    "            cls_top_down(\n",
+    "                S=S,\n",
+    "                y_hat=S @ y_hat_bottom,\n",
+    "                y_insample=S @ y_bottom,\n",
+    "                tags=tags,\n",
+    "            )[\"mean\"],\n",
+    "            cls_top_down_sparse(\n",
+    "                S=sparse.csr_matrix(S),\n",
+    "                y_hat=S @ y_hat_bottom,\n",
+    "                y_insample=S @ y_bottom,\n",
+    "                tags=tags,\n",
+    "            )[\"mean\"],\n",
+    "            np.finfo(np.float64).eps,\n",
+    "        )\n",
+    "    else:\n",
+    "        test_close(\n",
+    "            cls_top_down(S=S, y_hat=S @ y_hat_bottom, tags=tags)[\"mean\"],\n",
+    "            cls_top_down_sparse(\n",
+    "                S=sparse.csr_matrix(S),\n",
+    "                y_hat=S @ y_hat_bottom,\n",
+    "                y_insample=S @ y_bottom,\n",
+    "                tags=tags,\n",
+    "            )[\"mean\"],\n",
+    "            np.finfo(np.float64).eps,\n",
+    "        )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},