Merge pull request #719 from DHI/fix_dfs0_concat

Special case for generic.concat dfs0
DHI · Aug 22, 2024 · 0124639 · 0124639
2 parents b14b862 + e168611
commit 0124639
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 21 deletions.
diff --git a/mikeio/dataset/_dataarray.py b/mikeio/dataset/_dataarray.py
@@ -1228,15 +1228,15 @@ def interp_like(
 
     @staticmethod
     def concat(
-        dataarrays: Sequence["DataArray"], keep: Literal["last"] = "last"
+        dataarrays: Sequence["DataArray"], keep: Literal["last", "first"] = "last"
     ) -> "DataArray":
         """Concatenate DataArrays along the time axis
 
         Parameters
         ---------
         dataarrays: sequence of DataArrays
-        keep: str, optional
-            TODO Yet to be implemented, default: last
+        keep: 'first' or 'last', optional
+            default: last
 
         Returns
         -------
@@ -1506,7 +1506,9 @@ def aggregate(
         if "name" in kwargs:
             item.name = kwargs.pop("name")
 
-        with warnings.catch_warnings():  # there might be all-Nan slices, it is ok, so we ignore them!
+        with (
+            warnings.catch_warnings()
+        ):  # there might be all-Nan slices, it is ok, so we ignore them!
             warnings.simplefilter("ignore", category=RuntimeWarning)
             data = func(self.to_numpy(), axis=axis, keepdims=False, **kwargs)
 

diff --git a/mikeio/dataset/_dataset.py b/mikeio/dataset/_dataset.py
@@ -1169,15 +1169,16 @@ def interp_like(
 
     @staticmethod
     def concat(
-        datasets: Sequence["Dataset"], keep: Literal["last"] = "last"
+        datasets: Sequence["Dataset"], keep: Literal["last", "first"] = "last"
     ) -> "Dataset":
         """Concatenate Datasets along the time axis
 
         Parameters
         ---------
         datasets: sequence of Datasets
-        keep: str, optional
-            TODO Yet to be implemented, default: last
+        keep: 'first' or 'last', optional
+            which values to keep in case of overlap, by default 'last'
+
 
         Returns
         -------
@@ -1195,14 +1196,9 @@ def concat(
         >>> ds3.n_timesteps
         4
         """
-
-        if keep != "last":
-            raise NotImplementedError(
-                "Last values is the only available option at the moment."
-            )
         ds = datasets[0].copy()
         for dsj in datasets[1:]:
-            ds = ds._concat_time(dsj, copy=False)
+            ds = ds._concat_time(dsj, copy=False, keep=keep)
 
         return ds
 
@@ -1237,7 +1233,12 @@ def merge(datasets: Sequence["Dataset"]) -> "Dataset":
 
         return ds
 
-    def _concat_time(self, other: "Dataset", copy: bool = True) -> "Dataset":
+    def _concat_time(
+        self,
+        other: "Dataset",
+        copy: bool = True,
+        keep: Literal["last", "first"] = "last",
+    ) -> "Dataset":
         self._check_all_items_match(other)
         # assuming time is always first dimension we can skip / keep it by bool
         start_dim = int("time" in self.dims)
@@ -1264,16 +1265,23 @@ def _concat_time(self, other: "Dataset", copy: bool = True) -> "Dataset":
         idx1 = np.where(~df12["idx1"].isna())
         idx2 = np.where(~df12["idx2"].isna())
         for j in range(ds.n_items):
-            #    # if there is an overlap "other" data will be used!
-            newdata[j][idx1] = ds[j].to_numpy()
-            newdata[j][idx2] = other[j].to_numpy()
+            if keep == "last":
+                newdata[j][idx1] = ds[j].to_numpy()
+                newdata[j][idx2] = other[j].to_numpy()
+            else:
+                newdata[j][idx2] = other[j].to_numpy()
+                newdata[j][idx1] = ds[j].to_numpy()
 
         zn = None
         if self._zn is not None:
             zshape = (len(newtime), self._zn.shape[start_dim])
             zn = np.zeros(shape=zshape, dtype=self._zn.dtype)
-            zn[idx1, :] = self._zn
-            zn[idx2, :] = other._zn
+            if keep == "last":
+                zn[idx1, :] = self._zn
+                zn[idx2, :] = other._zn
+            else:
+                zn[idx2, :] = other._zn
+                zn[idx1, :] = self._zn
 
         return Dataset(
             newdata, time=newtime, items=ds.items, geometry=ds.geometry, zn=zn
@@ -2048,11 +2056,17 @@ def _parse_items(
         eum_type = items.type
         eum_unit = items.unit
         eum_data_value_type = items.data_value_type
-        item_list = [ItemInfo(name, eum_type, eum_unit, eum_data_value_type) for name in column_names]
+        item_list = [
+            ItemInfo(name, eum_type, eum_unit, eum_data_value_type)
+            for name in column_names
+        ]
 
     elif isinstance(items, Mapping):
         item_list = [
-            ItemInfo(name, items[name].type, items[name].unit, items[name].data_value_type) for name in column_names
+            ItemInfo(
+                name, items[name].type, items[name].unit, items[name].data_value_type
+            )
+            for name in column_names
         ]
     elif isinstance(items, Sequence):
         item_list = [

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -1265,6 +1265,21 @@ def test_concat_dataarray_by_time():
     assert da3.is_equidistant
 
 
+def test_concat_dataarray_keep_first() -> None:
+    da1 = mikeio.DataArray(
+        data=np.array([1.0, 2.0, 3.0]), time=pd.date_range("2000-01-01", periods=3)
+    )
+    # another dataarray with partly overlapping time
+    da2 = mikeio.DataArray(
+        data=np.array([4.0, 5.0]), time=pd.date_range("2000-01-02", periods=2)
+    )
+
+    da3 = mikeio.DataArray.concat([da1, da2], keep="first")
+
+    assert da3.n_timesteps == 3
+    assert da3.to_numpy()[2] == 3.0
+
+
 def test_concat_by_time():
     ds1 = mikeio.read("tests/testdata/tide1.dfs1")
     ds2 = mikeio.read("tests/testdata/tide2.dfs1") + 0.5  # add offset