Skip to content

Commit

Permalink
Fix/comp lags feat order (#2272)
Browse files Browse the repository at this point in the history
* fix: reorder lagged features per lags when they are provided component-wise

* fix: parametrize lagged_features_names test

* feat: added tests for lagged_features_names when lags are component-specific

* fix: create_lagged_name is not affected by lags order different than the components

* fix: improve comment

* feat: tests verify that list and dict lags yield the same result

* fix: remove staticmethod for the tests to pass on python 3.9

* feat: properly reorder features during autoregression, added corresponding test

* update changelog

* fix: adressing review comments

* fix: moved autoregression lags extraction to tabularization

* fix: refactor tests to reduce code duplication

* fix: adress review comment

* fix: remove usage of strict argument in zip, not support in python 3.9

* further refactor lagged data extraction for autoregression

* allow coverage diffs for codecov upload

* use codecov v3

* precompute lagged and ordered feature indices

---------

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
  • Loading branch information
madtoinou and dennisbader authored Apr 12, 2024
1 parent e597998 commit 78d39ad
Show file tree
Hide file tree
Showing 5 changed files with 1,450 additions and 673 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
**Fixed**
- Fixed a bug in `quantile_loss`, where the loss was computed on all samples rather than only on the predicted quantiles. [#2284](https://github.com/unit8co/darts/pull/2284) by [Dennis Bader](https://github.com/dennisbader).
- Fixed type hint warning "Unexpected argument" when calling `historical_forecasts()` caused by the `_with_sanity_checks` decorator. The type hinting is now properly configured to expect any input arguments and return the output type of the method for which the sanity checks are performed for. [#2286](https://github.com/unit8co/darts/pull/2286) by [Dennis Bader](https://github.com/dennisbader).
- Fixed the order of the features when using component-wise lags so that they are grouped by values, then by components (before, were grouped by components, then by values). [#2272](https://github.com/unit8co/darts/pull/2272) by [Antoine Madrona](https://github.com/madtoinou).
- Fixed a segmentation fault that some users were facing when importing a `LightGBMModel`. [#2304](https://github.com/unit8co/darts/pull/2304) by [Dennis Bader](https://github.com/dennisbader).
- Fixed a bug when using a dropout with a `TorchForecasting` and pytorch lightning versions >= 2.2.0, where the dropout was not properly activated during training. [#2312](https://github.com/unit8co/darts/pull/2312) by [Dennis Bader](https://github.com/dennisbader).

Expand Down
94 changes: 18 additions & 76 deletions darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from darts.models.forecasting.forecasting_model import GlobalForecastingModel
from darts.timeseries import TimeSeries
from darts.utils.data.tabularization import (
add_static_covariates_to_lagged_data,
_create_lagged_data_autoregression,
create_lagged_component_names,
create_lagged_training_data,
)
Expand Down Expand Up @@ -1019,83 +1019,25 @@ def predict(
last_step_shift = t_pred - (n - step)
t_pred = n - step

np_X = []
# retrieve target lags
if "target" in self.lags:
if predictions:
series_matrix = np.concatenate(
[series_matrix, predictions[-1]], axis=1
)
# component-wise lags
if "target" in self.component_lags:
tmp_X = [
series_matrix[
:,
[lag - (shift + last_step_shift) for lag in comp_lags],
comp_i,
]
for comp_i, (comp, comp_lags) in enumerate(
self.component_lags["target"].items()
)
]
# values are grouped by component
np_X.append(
np.concatenate(tmp_X, axis=1).reshape(
len(series) * num_samples, -1
)
)
else:
# values are grouped by lags
np_X.append(
series_matrix[
:,
[
lag - (shift + last_step_shift)
for lag in self.lags["target"]
],
].reshape(len(series) * num_samples, -1)
)
# retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+)
for cov_type in ["past", "future"]:
if cov_type in covariate_matrices:
# component-wise lags
if cov_type in self.component_lags:
tmp_X = [
covariate_matrices[cov_type][
:,
np.array(comp_lags) - self.lags[cov_type][0] + t_pred,
comp_i,
]
for comp_i, (comp, comp_lags) in enumerate(
self.component_lags[cov_type].items()
)
]
np_X.append(
np.concatenate(tmp_X, axis=1).reshape(
len(series) * num_samples, -1
)
)
else:
np_X.append(
covariate_matrices[cov_type][
:, relative_cov_lags[cov_type] + t_pred
].reshape(len(series) * num_samples, -1)
)

# concatenate retrieved lags
X = np.concatenate(np_X, axis=1)
# Need to split up `X` into three equally-sized sub-blocks
# corresponding to each timeseries in `series`, so that
# static covariates can be added to each block; valid since
# each block contains same number of observations:
X_blocks = np.split(X, len(series), axis=0)
X_blocks, _ = add_static_covariates_to_lagged_data(
X_blocks,
series,
# concatenate previous iteration forecasts
if "target" in self.lags and predictions:
series_matrix = np.concatenate([series_matrix, predictions[-1]], axis=1)

# extract and concatenate lags from target and covariates series
X = _create_lagged_data_autoregression(
target_series=series,
t_pred=t_pred,
shift=shift,
last_step_shift=last_step_shift,
series_matrix=series_matrix,
covariate_matrices=covariate_matrices,
lags=self.lags,
component_lags=self.component_lags,
relative_cov_lags=relative_cov_lags,
num_samples=num_samples,
uses_static_covariates=self.uses_static_covariates,
last_shape=self._static_covariates_shape,
last_static_covariates_shape=self._static_covariates_shape,
)
X = np.concatenate(X_blocks, axis=0)

# X has shape (n_series * n_samples, n_regression_features)
prediction = self._predict_and_sample(
Expand Down
7 changes: 6 additions & 1 deletion darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1991,7 +1991,7 @@ def test_component_specific_lags(self, config):
)

# n > output_chunk_length
model.predict(
pred = model.predict(
7,
series=series[0] if multiple_series else None,
past_covariates=(
Expand All @@ -2005,6 +2005,11 @@ def test_component_specific_lags(self, config):
else None
),
)
# check that lagged features are properly extracted during auto-regression
if multivar_target:
np.testing.assert_array_almost_equal(
tg.sine_timeseries(length=27)[-7:].values(), pred["sine"].values()
)

@pytest.mark.parametrize(
"config",
Expand Down
Loading

0 comments on commit 78d39ad

Please sign in to comment.