Skip to content

Commit

Permalink
Merge pull request #719 from DHI/fix_dfs0_concat
Browse files Browse the repository at this point in the history
Special case for generic.concat dfs0
  • Loading branch information
ecomodeller committed Aug 22, 2024
2 parents b14b862 + e168611 commit 0124639
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 21 deletions.
10 changes: 6 additions & 4 deletions mikeio/dataset/_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1228,15 +1228,15 @@ def interp_like(

@staticmethod
def concat(
dataarrays: Sequence["DataArray"], keep: Literal["last"] = "last"
dataarrays: Sequence["DataArray"], keep: Literal["last", "first"] = "last"
) -> "DataArray":
"""Concatenate DataArrays along the time axis
Parameters
---------
dataarrays: sequence of DataArrays
keep: str, optional
TODO Yet to be implemented, default: last
keep: 'first' or 'last', optional
default: last
Returns
-------
Expand Down Expand Up @@ -1506,7 +1506,9 @@ def aggregate(
if "name" in kwargs:
item.name = kwargs.pop("name")

with warnings.catch_warnings(): # there might be all-Nan slices, it is ok, so we ignore them!
with (
warnings.catch_warnings()
): # there might be all-Nan slices, it is ok, so we ignore them!
warnings.simplefilter("ignore", category=RuntimeWarning)
data = func(self.to_numpy(), axis=axis, keepdims=False, **kwargs)

Expand Down
48 changes: 31 additions & 17 deletions mikeio/dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,15 +1169,16 @@ def interp_like(

@staticmethod
def concat(
datasets: Sequence["Dataset"], keep: Literal["last"] = "last"
datasets: Sequence["Dataset"], keep: Literal["last", "first"] = "last"
) -> "Dataset":
"""Concatenate Datasets along the time axis
Parameters
---------
datasets: sequence of Datasets
keep: str, optional
TODO Yet to be implemented, default: last
keep: 'first' or 'last', optional
which values to keep in case of overlap, by default 'last'
Returns
-------
Expand All @@ -1195,14 +1196,9 @@ def concat(
>>> ds3.n_timesteps
4
"""

if keep != "last":
raise NotImplementedError(
"Last values is the only available option at the moment."
)
ds = datasets[0].copy()
for dsj in datasets[1:]:
ds = ds._concat_time(dsj, copy=False)
ds = ds._concat_time(dsj, copy=False, keep=keep)

return ds

Expand Down Expand Up @@ -1237,7 +1233,12 @@ def merge(datasets: Sequence["Dataset"]) -> "Dataset":

return ds

def _concat_time(self, other: "Dataset", copy: bool = True) -> "Dataset":
def _concat_time(
self,
other: "Dataset",
copy: bool = True,
keep: Literal["last", "first"] = "last",
) -> "Dataset":
self._check_all_items_match(other)
# assuming time is always first dimension we can skip / keep it by bool
start_dim = int("time" in self.dims)
Expand All @@ -1264,16 +1265,23 @@ def _concat_time(self, other: "Dataset", copy: bool = True) -> "Dataset":
idx1 = np.where(~df12["idx1"].isna())
idx2 = np.where(~df12["idx2"].isna())
for j in range(ds.n_items):
# # if there is an overlap "other" data will be used!
newdata[j][idx1] = ds[j].to_numpy()
newdata[j][idx2] = other[j].to_numpy()
if keep == "last":
newdata[j][idx1] = ds[j].to_numpy()
newdata[j][idx2] = other[j].to_numpy()
else:
newdata[j][idx2] = other[j].to_numpy()
newdata[j][idx1] = ds[j].to_numpy()

zn = None
if self._zn is not None:
zshape = (len(newtime), self._zn.shape[start_dim])
zn = np.zeros(shape=zshape, dtype=self._zn.dtype)
zn[idx1, :] = self._zn
zn[idx2, :] = other._zn
if keep == "last":
zn[idx1, :] = self._zn
zn[idx2, :] = other._zn
else:
zn[idx2, :] = other._zn
zn[idx1, :] = self._zn

return Dataset(
newdata, time=newtime, items=ds.items, geometry=ds.geometry, zn=zn
Expand Down Expand Up @@ -2048,11 +2056,17 @@ def _parse_items(
eum_type = items.type
eum_unit = items.unit
eum_data_value_type = items.data_value_type
item_list = [ItemInfo(name, eum_type, eum_unit, eum_data_value_type) for name in column_names]
item_list = [
ItemInfo(name, eum_type, eum_unit, eum_data_value_type)
for name in column_names
]

elif isinstance(items, Mapping):
item_list = [
ItemInfo(name, items[name].type, items[name].unit, items[name].data_value_type) for name in column_names
ItemInfo(
name, items[name].type, items[name].unit, items[name].data_value_type
)
for name in column_names
]
elif isinstance(items, Sequence):
item_list = [
Expand Down
15 changes: 15 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,21 @@ def test_concat_dataarray_by_time():
assert da3.is_equidistant


def test_concat_dataarray_keep_first() -> None:
da1 = mikeio.DataArray(
data=np.array([1.0, 2.0, 3.0]), time=pd.date_range("2000-01-01", periods=3)
)
# another dataarray with partly overlapping time
da2 = mikeio.DataArray(
data=np.array([4.0, 5.0]), time=pd.date_range("2000-01-02", periods=2)
)

da3 = mikeio.DataArray.concat([da1, da2], keep="first")

assert da3.n_timesteps == 3
assert da3.to_numpy()[2] == 3.0


def test_concat_by_time():
ds1 = mikeio.read("tests/testdata/tide1.dfs1")
ds2 = mikeio.read("tests/testdata/tide2.dfs1") + 0.5 # add offset
Expand Down

0 comments on commit 0124639

Please sign in to comment.