Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add sparse top-down reconciliation via TopDownSparse #277

Merged
merged 9 commits into from
Aug 15, 2024
4 changes: 4 additions & 0 deletions hierarchicalforecast/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.TopDown.fit_predict': ( 'methods.html#topdown.fit_predict',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.TopDownSparse': ( 'methods.html#topdownsparse',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.TopDownSparse._get_PW_matrices': ( 'methods.html#topdownsparse._get_pw_matrices',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods._get_child_nodes': ( 'methods.html#_get_child_nodes',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods._reconcile_fcst_proportions': ( 'methods.html#_reconcile_fcst_proportions',
Expand Down
84 changes: 75 additions & 9 deletions hierarchicalforecast/methods.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/methods.ipynb.

# %% auto 0
__all__ = ['BottomUp', 'BottomUpSparse', 'TopDown', 'MiddleOut', 'MinTrace', 'MinTraceSparse', 'OptimalCombination', 'ERM']
__all__ = ['BottomUp', 'BottomUpSparse', 'TopDown', 'TopDownSparse', 'MiddleOut', 'MinTrace', 'MinTraceSparse',
'OptimalCombination', 'ERM']

# %% ../nbs/methods.ipynb 3
import warnings
Expand Down Expand Up @@ -255,7 +256,9 @@ def _get_PW_matrices(self, S, idx_bottom):
return P, W

# %% ../nbs/methods.ipynb 22
def _get_child_nodes(S: np.ndarray, tags: Dict[str, np.ndarray]):
def _get_child_nodes(S: Union[np.ndarray, sparse.csr_matrix], tags: Dict[str, np.ndarray]):
if isinstance(S, sparse.spmatrix):
S = S.toarray()
level_names = list(tags.keys())
nodes = OrderedDict()
for i_level, level in enumerate(level_names[:-1]):
Expand Down Expand Up @@ -368,6 +371,7 @@ def fit(self,
**Returns:**<br>
`self`: object, fitted reconciler.
"""
self.intervals_method = intervals_method
self.P, self.W = self._get_PW_matrices(S=S, y_hat=y_hat,
tags=tags, y_insample=y_insample)
self.sampler = self._get_sampler(S=S,
Expand Down Expand Up @@ -440,7 +444,69 @@ def fit_predict(self,

__call__ = fit_predict

# %% ../nbs/methods.ipynb 32
# %% ../nbs/methods.ipynb 25
class TopDownSparse(TopDown):
"""TopDownSparse Reconciliation Class.

This is an implementation of top-down reconciliation using the sparse matrix
approach. It works much more efficiently on data sets with many time series.

See the parent class for more details.
"""

is_sparse_method = True

def _get_PW_matrices(
self,
S: sparse.csr_matrix,
y_hat: np.ndarray,
tags: Dict[str, np.ndarray],
y_insample: Optional[np.ndarray] = None,
):
# Check if the data structure is strictly hierarchical.
if not is_strictly_hierarchical(S, tags):
raise ValueError(
"Top-down reconciliation requires strictly hierarchical structures."
)

# Get the dimensions of the "summing" matrix.
n_hiers, n_bottom = S.shape

# Get the in-sample values of the top node and bottom nodes.
y_top = y_insample[0]
y_btm = y_insample[(n_hiers - n_bottom) :]

# Calculate the disaggregation proportions.
if self.method == "average_proportions":
prop = np.mean(y_btm / y_top, 1)
elif self.method == "proportion_averages":
prop = np.mean(y_btm, 1) / np.mean(y_top)
elif self.method == "forecast_proportions":
raise Exception(f"Fit method not yet implemented for {self.method}.")
else:
raise Exception(f"{self.method} is an unknown disaggregation method.")

# Instantiate and allocate the "projection" matrix to distribute the
# disaggregated base forecast of the top node to the bottom nodes.
P = sparse.csr_matrix(
(
prop,
np.zeros_like(prop, np.uint8),
np.arange(len(prop) + 1, dtype=np.min_scalar_type(n_bottom)),
),
shape=(n_bottom, n_hiers),
dtype=np.float64,
)

# Instantiate and allocate the "weight" matrix.
if getattr(self, "intervals_method", False) is None:
W = None
else:
W = sparse.eye(n_hiers, dtype=np.float32, format="csr")

return P, W

# %% ../nbs/methods.ipynb 35
class MiddleOut(HReconciler):
"""Middle Out Reconciliation Class.

Expand Down Expand Up @@ -556,11 +622,11 @@ def fit_predict(self,

__call__ = fit_predict

# %% ../nbs/methods.ipynb 37
# %% ../nbs/methods.ipynb 40
def crossprod(x):
return x.T @ x

# %% ../nbs/methods.ipynb 38
# %% ../nbs/methods.ipynb 41
class MinTrace(HReconciler):
"""MinTrace Reconciliation Class.

Expand Down Expand Up @@ -829,7 +895,7 @@ def fit_predict(self,

__call__ = fit_predict

# %% ../nbs/methods.ipynb 39
# %% ../nbs/methods.ipynb 42
class MinTraceSparse(MinTrace):
"""MinTraceSparse Reconciliation Class.

Expand Down Expand Up @@ -939,7 +1005,7 @@ def get_P_action(y):

return P, W

# %% ../nbs/methods.ipynb 49
# %% ../nbs/methods.ipynb 52
class OptimalCombination(MinTrace):
"""Optimal Combination Reconciliation Class.

Expand Down Expand Up @@ -973,7 +1039,7 @@ def __init__(self,
super().__init__(method=method, nonnegative=nonnegative, num_threads=num_threads)
self.insample = False

# %% ../nbs/methods.ipynb 58
# %% ../nbs/methods.ipynb 61
@njit
def lasso(X: np.ndarray, y: np.ndarray,
lambda_reg: float, max_iters: int = 1_000,
Expand Down Expand Up @@ -1005,7 +1071,7 @@ def lasso(X: np.ndarray, y: np.ndarray,
#print(it)
return beta

# %% ../nbs/methods.ipynb 59
# %% ../nbs/methods.ipynb 62
class ERM(HReconciler):
"""Optimal Combination Reconciliation Class.

Expand Down
141 changes: 141 additions & 0 deletions nbs/methods.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,8 @@
"source": [
"#| exporti\n",
"def _get_child_nodes(S: np.ndarray, tags: Dict[str, np.ndarray]):\n",
" if isinstance(S, sparse.spmatrix):\n",
" S = S.toarray()\n",
" level_names = list(tags.keys())\n",
" nodes = OrderedDict()\n",
" for i_level, level in enumerate(level_names[:-1]):\n",
Expand Down Expand Up @@ -650,6 +652,7 @@
" **Returns:**<br>\n",
" `self`: object, fitted reconciler.\n",
" \"\"\"\n",
" self.intervals_method = intervals_method\n",
" self.P, self.W = self._get_PW_matrices(S=S, y_hat=y_hat, \n",
" tags=tags, y_insample=y_insample)\n",
" self.sampler = self._get_sampler(S=S,\n",
Expand Down Expand Up @@ -723,6 +726,75 @@
" __call__ = fit_predict"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"class TopDownSparse(TopDown):\n",
" \"\"\"TopDownSparse Reconciliation Class.\n",
"\n",
" This is an implementation of top-down reconciliation using the sparse matrix\n",
" approach. It works much more efficiently on data sets with many time series.\n",
"\n",
" See the parent class for more details.\n",
" \"\"\"\n",
"\n",
" is_sparse_method = True\n",
"\n",
" def _get_PW_matrices(\n",
" self,\n",
" S: sparse.csr_matrix,\n",
" y_hat: np.ndarray,\n",
" tags: Dict[str, np.ndarray],\n",
" y_insample: Optional[np.ndarray] = None,\n",
" ):\n",
" # Check if the data structure is strictly hierarchical.\n",
" if not is_strictly_hierarchical(S, tags):\n",
" raise ValueError(\n",
" \"Top-down reconciliation requires strictly hierarchical structures.\"\n",
" )\n",
"\n",
" # Get the dimensions of the \"summing\" matrix.\n",
" n_hiers, n_bottom = S.shape\n",
"\n",
" # Get the in-sample values of the top node and bottom nodes.\n",
" y_top = y_insample[0]\n",
" y_btm = y_insample[(n_hiers - n_bottom) :]\n",
"\n",
" # Calculate the disaggregation proportions.\n",
" if self.method == \"average_proportions\":\n",
" prop = np.mean(y_btm / y_top, 1)\n",
" elif self.method == \"proportion_averages\":\n",
" prop = np.mean(y_btm, 1) / np.mean(y_top)\n",
" elif self.method == \"forecast_proportions\":\n",
" raise Exception(f\"Fit method not yet implemented for {self.method}.\")\n",
" else:\n",
" raise Exception(f\"{self.method} is an unknown disaggregation method.\")\n",
"\n",
" # Instantiate and allocate the \"projection\" matrix to distribute the\n",
" # disaggregated base forecast of the top node to the bottom nodes.\n",
" P = sparse.csr_matrix(\n",
" (\n",
" prop,\n",
" np.zeros_like(prop, np.uint8),\n",
" np.arange(len(prop) + 1, dtype=np.min_scalar_type(n_bottom)),\n",
" ),\n",
" shape=(n_bottom, n_hiers),\n",
" dtype=np.float64,\n",
" )\n",
"\n",
" # Instantiate and allocate the \"weight\" matrix.\n",
" if getattr(self, \"intervals_method\", False) is None:\n",
" W = None\n",
" else:\n",
" W = sparse.eye(n_hiers, dtype=np.float32, format=\"csr\")\n",
" \n",
" return P, W"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -805,6 +877,75 @@
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"for method in [\"forecast_proportions\", \"average_proportions\", \"proportion_averages\"]:\n",
" cls_top_down = TopDownSparse(method=method)\n",
" if cls_top_down.insample:\n",
" assert method in [\"average_proportions\", \"proportion_averages\"]\n",
" test_close(\n",
" cls_top_down(\n",
" S=sparse.csr_matrix(S),\n",
" y_hat=S @ y_hat_bottom,\n",
" y_insample=S @ y_bottom,\n",
" tags=tags,\n",
" )[\"mean\"],\n",
" S @ y_hat_bottom,\n",
" )\n",
" else:\n",
" test_close(\n",
" cls_top_down(S=sparse.csr_matrix(S), y_hat=S @ y_hat_bottom, tags=tags)[\n",
" \"mean\"\n",
" ],\n",
" S @ y_hat_bottom,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"for method in [\"forecast_proportions\", \"average_proportions\", \"proportion_averages\"]:\n",
" cls_top_down = TopDown(method=method)\n",
" cls_top_down_sparse = TopDownSparse(method=method)\n",
" if cls_top_down.insample:\n",
" assert method in [\"average_proportions\", \"proportion_averages\"]\n",
" test_close(\n",
" cls_top_down(\n",
" S=S,\n",
" y_hat=S @ y_hat_bottom,\n",
" y_insample=S @ y_bottom,\n",
" tags=tags,\n",
" )[\"mean\"],\n",
" cls_top_down_sparse(\n",
" S=sparse.csr_matrix(S),\n",
" y_hat=S @ y_hat_bottom,\n",
" y_insample=S @ y_bottom,\n",
" tags=tags,\n",
" )[\"mean\"],\n",
" np.finfo(np.float64).eps,\n",
" )\n",
" else:\n",
" test_close(\n",
" cls_top_down(S=S, y_hat=S @ y_hat_bottom, tags=tags)[\"mean\"],\n",
" cls_top_down_sparse(\n",
" S=sparse.csr_matrix(S),\n",
" y_hat=S @ y_hat_bottom,\n",
" y_insample=S @ y_bottom,\n",
" tags=tags,\n",
" )[\"mean\"],\n",
" np.finfo(np.float64).eps,\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
Loading