Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Special case for generic.concat dfs0 #719

Merged
merged 2 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions mikeio/dataset/_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1228,15 +1228,15 @@ def interp_like(

@staticmethod
def concat(
dataarrays: Sequence["DataArray"], keep: Literal["last"] = "last"
dataarrays: Sequence["DataArray"], keep: Literal["last", "first"] = "last"
) -> "DataArray":
"""Concatenate DataArrays along the time axis

Parameters
---------
dataarrays: sequence of DataArrays
keep: str, optional
TODO Yet to be implemented, default: last
keep: 'first' or 'last', optional
default: last

Returns
-------
Expand Down Expand Up @@ -1506,7 +1506,9 @@ def aggregate(
if "name" in kwargs:
item.name = kwargs.pop("name")

with warnings.catch_warnings(): # there might be all-Nan slices, it is ok, so we ignore them!
with (
warnings.catch_warnings()
): # there might be all-Nan slices, it is ok, so we ignore them!
warnings.simplefilter("ignore", category=RuntimeWarning)
data = func(self.to_numpy(), axis=axis, keepdims=False, **kwargs)

Expand Down
48 changes: 31 additions & 17 deletions mikeio/dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,15 +1169,16 @@ def interp_like(

@staticmethod
def concat(
datasets: Sequence["Dataset"], keep: Literal["last"] = "last"
datasets: Sequence["Dataset"], keep: Literal["last", "first"] = "last"
) -> "Dataset":
"""Concatenate Datasets along the time axis

Parameters
---------
datasets: sequence of Datasets
keep: str, optional
TODO Yet to be implemented, default: last
keep: 'first' or 'last', optional
which values to keep in case of overlap, by default 'last'


Returns
-------
Expand All @@ -1195,14 +1196,9 @@ def concat(
>>> ds3.n_timesteps
4
"""

if keep != "last":
raise NotImplementedError(
"Last values is the only available option at the moment."
)
ds = datasets[0].copy()
for dsj in datasets[1:]:
ds = ds._concat_time(dsj, copy=False)
ds = ds._concat_time(dsj, copy=False, keep=keep)

return ds

Expand Down Expand Up @@ -1237,7 +1233,12 @@ def merge(datasets: Sequence["Dataset"]) -> "Dataset":

return ds

def _concat_time(self, other: "Dataset", copy: bool = True) -> "Dataset":
def _concat_time(
self,
other: "Dataset",
copy: bool = True,
keep: Literal["last", "first"] = "last",
) -> "Dataset":
self._check_all_items_match(other)
# assuming time is always first dimension we can skip / keep it by bool
start_dim = int("time" in self.dims)
Expand All @@ -1264,16 +1265,23 @@ def _concat_time(self, other: "Dataset", copy: bool = True) -> "Dataset":
idx1 = np.where(~df12["idx1"].isna())
idx2 = np.where(~df12["idx2"].isna())
for j in range(ds.n_items):
# # if there is an overlap "other" data will be used!
newdata[j][idx1] = ds[j].to_numpy()
newdata[j][idx2] = other[j].to_numpy()
if keep == "last":
newdata[j][idx1] = ds[j].to_numpy()
newdata[j][idx2] = other[j].to_numpy()
else:
newdata[j][idx2] = other[j].to_numpy()
newdata[j][idx1] = ds[j].to_numpy()

zn = None
if self._zn is not None:
zshape = (len(newtime), self._zn.shape[start_dim])
zn = np.zeros(shape=zshape, dtype=self._zn.dtype)
zn[idx1, :] = self._zn
zn[idx2, :] = other._zn
if keep == "last":
zn[idx1, :] = self._zn
zn[idx2, :] = other._zn
else:
zn[idx2, :] = other._zn
zn[idx1, :] = self._zn

return Dataset(
newdata, time=newtime, items=ds.items, geometry=ds.geometry, zn=zn
Expand Down Expand Up @@ -2048,11 +2056,17 @@ def _parse_items(
eum_type = items.type
eum_unit = items.unit
eum_data_value_type = items.data_value_type
item_list = [ItemInfo(name, eum_type, eum_unit, eum_data_value_type) for name in column_names]
item_list = [
ItemInfo(name, eum_type, eum_unit, eum_data_value_type)
for name in column_names
]

elif isinstance(items, Mapping):
item_list = [
ItemInfo(name, items[name].type, items[name].unit, items[name].data_value_type) for name in column_names
ItemInfo(
name, items[name].type, items[name].unit, items[name].data_value_type
)
for name in column_names
]
elif isinstance(items, Sequence):
item_list = [
Expand Down
15 changes: 15 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,21 @@ def test_concat_dataarray_by_time():
assert da3.is_equidistant


def test_concat_dataarray_keep_first() -> None:
da1 = mikeio.DataArray(
data=np.array([1.0, 2.0, 3.0]), time=pd.date_range("2000-01-01", periods=3)
)
# another dataarray with partly overlapping time
da2 = mikeio.DataArray(
data=np.array([4.0, 5.0]), time=pd.date_range("2000-01-02", periods=2)
)

da3 = mikeio.DataArray.concat([da1, da2], keep="first")

assert da3.n_timesteps == 3
assert da3.to_numpy()[2] == 3.0


def test_concat_by_time():
ds1 = mikeio.read("tests/testdata/tide1.dfs1")
ds2 = mikeio.read("tests/testdata/tide2.dfs1") + 0.5 # add offset
Expand Down
Loading