diff --git a/mikeio/dataset/_dataarray.py b/mikeio/dataset/_dataarray.py index d539d443a..f78dbde9b 100644 --- a/mikeio/dataset/_dataarray.py +++ b/mikeio/dataset/_dataarray.py @@ -1228,15 +1228,15 @@ def interp_like( @staticmethod def concat( - dataarrays: Sequence["DataArray"], keep: Literal["last"] = "last" + dataarrays: Sequence["DataArray"], keep: Literal["last", "first"] = "last" ) -> "DataArray": """Concatenate DataArrays along the time axis Parameters --------- dataarrays: sequence of DataArrays - keep: str, optional - TODO Yet to be implemented, default: last + keep: 'first' or 'last', optional + default: last Returns ------- @@ -1506,7 +1506,9 @@ def aggregate( if "name" in kwargs: item.name = kwargs.pop("name") - with warnings.catch_warnings(): # there might be all-Nan slices, it is ok, so we ignore them! + with ( + warnings.catch_warnings() + ): # there might be all-Nan slices, it is ok, so we ignore them! warnings.simplefilter("ignore", category=RuntimeWarning) data = func(self.to_numpy(), axis=axis, keepdims=False, **kwargs) diff --git a/mikeio/dataset/_dataset.py b/mikeio/dataset/_dataset.py index 4296e41cc..d8f23ead5 100644 --- a/mikeio/dataset/_dataset.py +++ b/mikeio/dataset/_dataset.py @@ -1169,15 +1169,16 @@ def interp_like( @staticmethod def concat( - datasets: Sequence["Dataset"], keep: Literal["last"] = "last" + datasets: Sequence["Dataset"], keep: Literal["last", "first"] = "last" ) -> "Dataset": """Concatenate Datasets along the time axis Parameters --------- datasets: sequence of Datasets - keep: str, optional - TODO Yet to be implemented, default: last + keep: 'first' or 'last', optional + which values to keep in case of overlap, by default 'last' + Returns ------- @@ -1195,14 +1196,9 @@ def concat( >>> ds3.n_timesteps 4 """ - - if keep != "last": - raise NotImplementedError( - "Last values is the only available option at the moment." - ) ds = datasets[0].copy() for dsj in datasets[1:]: - ds = ds._concat_time(dsj, copy=False) + ds = ds._concat_time(dsj, copy=False, keep=keep) return ds @@ -1237,7 +1233,12 @@ def merge(datasets: Sequence["Dataset"]) -> "Dataset": return ds - def _concat_time(self, other: "Dataset", copy: bool = True) -> "Dataset": + def _concat_time( + self, + other: "Dataset", + copy: bool = True, + keep: Literal["last", "first"] = "last", + ) -> "Dataset": self._check_all_items_match(other) # assuming time is always first dimension we can skip / keep it by bool start_dim = int("time" in self.dims) @@ -1264,16 +1265,23 @@ def _concat_time(self, other: "Dataset", copy: bool = True) -> "Dataset": idx1 = np.where(~df12["idx1"].isna()) idx2 = np.where(~df12["idx2"].isna()) for j in range(ds.n_items): - # # if there is an overlap "other" data will be used! - newdata[j][idx1] = ds[j].to_numpy() - newdata[j][idx2] = other[j].to_numpy() + if keep == "last": + newdata[j][idx1] = ds[j].to_numpy() + newdata[j][idx2] = other[j].to_numpy() + else: + newdata[j][idx2] = other[j].to_numpy() + newdata[j][idx1] = ds[j].to_numpy() zn = None if self._zn is not None: zshape = (len(newtime), self._zn.shape[start_dim]) zn = np.zeros(shape=zshape, dtype=self._zn.dtype) - zn[idx1, :] = self._zn - zn[idx2, :] = other._zn + if keep == "last": + zn[idx1, :] = self._zn + zn[idx2, :] = other._zn + else: + zn[idx2, :] = other._zn + zn[idx1, :] = self._zn return Dataset( newdata, time=newtime, items=ds.items, geometry=ds.geometry, zn=zn @@ -2048,11 +2056,17 @@ def _parse_items( eum_type = items.type eum_unit = items.unit eum_data_value_type = items.data_value_type - item_list = [ItemInfo(name, eum_type, eum_unit, eum_data_value_type) for name in column_names] + item_list = [ + ItemInfo(name, eum_type, eum_unit, eum_data_value_type) + for name in column_names + ] elif isinstance(items, Mapping): item_list = [ - ItemInfo(name, items[name].type, items[name].unit, items[name].data_value_type) for name in column_names + ItemInfo( + name, items[name].type, items[name].unit, items[name].data_value_type + ) + for name in column_names ] elif isinstance(items, Sequence): item_list = [ diff --git a/mikeio/generic.py b/mikeio/generic.py index c7a4ad587..b047a7328 100644 --- a/mikeio/generic.py +++ b/mikeio/generic.py @@ -452,7 +452,7 @@ def concat( """ # fast path for Dfs0 suffix = pathlib.Path(infilenames[0]).suffix - if suffix == ".dfs0" and keep == "last": + if suffix == ".dfs0": dss = [mikeio.read(f) for f in infilenames] ds = mikeio.Dataset.concat(dss, keep=keep) # type: ignore ds.to_dfs(outfilename) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index a5ac411a8..94a92ed27 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -1265,6 +1265,21 @@ def test_concat_dataarray_by_time(): assert da3.is_equidistant +def test_concat_dataarray_keep_first() -> None: + da1 = mikeio.DataArray( + data=np.array([1.0, 2.0, 3.0]), time=pd.date_range("2000-01-01", periods=3) + ) + # another dataarray with partly overlapping time + da2 = mikeio.DataArray( + data=np.array([4.0, 5.0]), time=pd.date_range("2000-01-02", periods=2) + ) + + da3 = mikeio.DataArray.concat([da1, da2], keep="first") + + assert da3.n_timesteps == 3 + assert da3.to_numpy()[2] == 3.0 + + def test_concat_by_time(): ds1 = mikeio.read("tests/testdata/tide1.dfs1") ds2 = mikeio.read("tests/testdata/tide2.dfs1") + 0.5 # add offset