Issue #197 add basic pg based test of UDF on VectorCube

Open-EO · Aug 3, 2023 · 21bc21d · 21bc21d
1 parent a25f3ab
commit 21bc21d
Show file tree

Hide file tree

Showing 6 changed files with 171 additions and 16 deletions.
diff --git a/openeo_driver/ProcessGraphDeserializer.py b/openeo_driver/ProcessGraphDeserializer.py
@@ -677,10 +677,10 @@ def apply_neighborhood(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
 
 @process
 def apply_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
-    data_cube = args.get_required("data", expected_type=DriverDataCube)
+    data_cube = args.get_required("data", expected_type=(DriverDataCube, DriverVectorCube))
     process = args.get_deep("process", "process_graph", expected_type=dict)
     dimension = args.get_required(
-        "dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names())
+        "dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names())
     )
     target_dimension = args.get_optional("target_dimension", default=None, expected_type=str)
     context = args.get_optional("context", default=None)
@@ -748,7 +748,7 @@ def reduce_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
     data_cube: DriverDataCube = args.get_required("data", expected_type=DriverDataCube)
     reduce_pg = args.get_deep("reducer", "process_graph", expected_type=dict)
     dimension = args.get_required(
-        "dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names())
+        "dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names())
     )
     context = args.get_optional("context", default=None)
     return data_cube.reduce_dimension(reducer=reduce_pg, dimension=dimension, context=context, env=env)
@@ -924,7 +924,7 @@ def aggregate_temporal(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
     dimension = args.get_optional(
         "dimension",
         default=lambda: data_cube.metadata.temporal_dimension.name,
-        validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()),
+        validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()),
     )
     context = args.get_optional("context", default=None)
 
@@ -941,7 +941,7 @@ def aggregate_temporal_period(args: ProcessArgs, env: EvalEnv) -> DriverDataCube
     dimension = args.get_optional(
         "dimension",
         default=lambda: data_cube.metadata.temporal_dimension.name,
-        validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()),
+        validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()),
     )
     context = args.get_optional("context", default=None)
 

diff --git a/openeo_driver/datacube.py b/openeo_driver/datacube.py
@@ -61,6 +61,9 @@ def __eq__(self, o: object) -> bool:
                 return True
         return False
 
+    def get_dimension_names(self) -> List[str]:
+        return self.metadata.dimension_names()
+
     def _not_implemented(self):
         """Helper to raise a NotImplemented exception containing method name"""
         raise NotImplementedError("DataCube method not implemented: {m!r}".format(m=inspect.stack()[1].function))
@@ -511,6 +514,12 @@ def to_legacy_save_result(self) -> Union["AggregatePolygonResult", "JSONResult"]
             f"Unsupported cube configuration {cube.dims} for _write_legacy_aggregate_polygon_result_json"
         )
 
+    def get_dimension_names(self) -> List[str]:
+        if self._cube is None:
+            return [self.DIM_GEOMETRIES]
+        else:
+            return list(self._cube.dims)
+
     def get_bounding_box(self) -> Tuple[float, float, float, float]:
         # TODO: cache bounding box?
         # TODO #114 #141 Open-EO/openeo-geopyspark-driver#239: option to buffer point geometries (if any)

diff --git a/openeo_driver/dummy/dummy_backend.py b/openeo_driver/dummy/dummy_backend.py
@@ -181,10 +181,12 @@ def __init__(self, metadata: CollectionMetadata = None):
         self.apply_tiles = Mock(name="apply_tiles", return_value=self)
         self.apply_tiles_spatiotemporal = Mock(name="apply_tiles_spatiotemporal", return_value=self)
 
-        # Create mock methods for remaining data cube methods that are not yet defined
-        already_defined = set(DummyDataCube.__dict__.keys()).union(self.__dict__.keys())
+        # Create mock methods for remaining DriverDataCube methods that are not yet defined directly by DummyDataCube
+        to_keep = set(DummyDataCube.__dict__.keys()).union(self.__dict__.keys())
+        to_keep.update(m for m in DriverDataCube.__dict__.keys() if m.startswith("_"))
+        to_keep.update(["get_dimension_names"])
         for name, method in DriverDataCube.__dict__.items():
-            if not name.startswith('_') and name not in already_defined and callable(method):
+            if not name in to_keep and callable(method):
                 setattr(self, name, Mock(name=name, return_value=self))
 
         for name in [n for n, m in DummyDataCube.__dict__.items() if getattr(m, '_mock_side_effect', False)]:

diff --git a/openeo_driver/testing.py b/openeo_driver/testing.py
@@ -10,7 +10,7 @@
 import re
 import urllib.request
 from pathlib import Path
-from typing import Any, Callable, Dict, Optional, Pattern, Tuple, Union
+from typing import Any, Callable, Dict, Optional, Pattern, Tuple, Union, Collection
 from unittest import mock
 
 import pytest
@@ -532,9 +532,48 @@ def to_geojson_feature_collection(self) -> dict:
         return approxify(result, rel=self.rel, abs=self.abs)
 
 
-def caplog_with_custom_formatter(
-    caplog: pytest.LogCaptureFixture, format: Union[str, logging.Formatter]
-):
+class ApproxGeoJSONByBounds:
+    """pytest assert helper to build a matcher to check if a certain GeoJSON construct is within expected bounds"""
+
+    def __init__(
+        self,
+        *args,
+        types: Collection[str] = ("Polygon", "MultiPolygon"),
+        rel: Optional[float] = None,
+        abs: Optional[float] = None,
+    ):
+        bounds = args[0] if len(args) == 1 else args
+        assert isinstance(bounds, (list, tuple)) and len(bounds) == 4
+        self.expected_bounds = [float(b) for b in bounds]
+        self.rel = rel
+        self.abs = abs
+        self.expected_types = set(types)
+        self.actual_info = []
+
+    def __eq__(self, other):
+        try:
+            assert isinstance(other, dict), "Not a dict"
+            assert "type" in other, "No 'type' field"
+            assert other["type"] in self.expected_types, f"Wrong type {other['type']!r}"
+            assert "coordinates" in other, "No 'coordinates' field"
+
+            actual_bounds = shapely.geometry.shape(other).bounds
+            matching = actual_bounds == pytest.approx(self.expected_bounds, rel=self.rel, abs=self.abs)
+            if not matching:
+                self.actual_info.append(f"expected bounds {self.expected_bounds} != actual bounds: {actual_bounds}")
+            return matching
+        except Exception as e:
+            self.actual_info.append(str(e))
+        return False
+
+    def __repr__(self):
+        msg = f"<{type(self).__name__} types={self.expected_types} bounds={self.expected_bounds} rel={self.rel}, abs={self.abs}>"
+        if self.actual_info:
+            msg += "\n" + "\n".join(f"    # {i}" for i in self.actual_info)
+        return msg
+
+
+def caplog_with_custom_formatter(caplog: pytest.LogCaptureFixture, format: Union[str, logging.Formatter]):
     """
     Context manager to set a custom formatter on the caplog fixture.
 

diff --git a/tests/test_testing.py b/tests/test_testing.py
@@ -21,6 +21,7 @@
     caplog_with_custom_formatter,
     ephemeral_fileserver,
     preprocess_check_and_replace,
+    ApproxGeoJSONByBounds,
 )
 
 
@@ -284,3 +285,38 @@ def test_caplog_with_custom_formatter(caplog, format):
         "[WARNING] still not good (root)",
         "WARNING  root:test_testing.py:XXX hmm bad times",
     ]
+
+
+class TestApproxGeoJSONByBounds:
+    def test_basic(self):
+        geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
+        assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, abs=0.1)
+
+    @pytest.mark.parametrize(
+        ["data", "expected_message"],
+        [
+            ("nope", "# Not a dict"),
+            ({"foo": "bar"}, "    # No 'type' field"),
+            ({"type": "Polygommm", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}, "    # Wrong type 'Polygommm'"),
+            ({"type": "Polygon"}, "    # No 'coordinates' field"),
+        ],
+    )
+    def test_invalid_construct(self, data, expected_message):
+        expected = ApproxGeoJSONByBounds(1, 2, 3, 4)
+        assert data != expected
+        assert expected_message in repr(expected)
+
+    def test_out_of_bounds(self):
+        geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
+        expected = ApproxGeoJSONByBounds(11, 22, 33, 44, abs=0.1)
+        assert geometry != expected
+        assert "# expected bounds [11.0, 22.0, 33.0, 44.0] != actual bounds: (1.0, 1.0, 3.0, 4.0)" in repr(expected)
+
+    def test_types(self):
+        geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
+        assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, types=["Polygon"], abs=0.1)
+        assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, types=["Polygon", "Point"], abs=0.1)
+
+        expected = ApproxGeoJSONByBounds(1, 1, 3, 4, types=["MultiPolygon"], abs=0.1)
+        assert geometry != expected
+        assert "Wrong type 'Polygon'" in repr(expected)
diff --git a/tests/test_views_execute.py b/tests/test_views_execute.py
@@ -33,6 +33,7 @@
     generate_unique_test_process_id,
     preprocess_check_and_replace,
     preprocess_regex_check_and_replace,
+    ApproxGeoJSONByBounds,
 )
 from openeo_driver.util.geometry import as_geojson_feature, as_geojson_feature_collection
 from openeo_driver.util.ioformats import IOFORMATS
@@ -1310,7 +1311,7 @@ def test_run_udf_on_vector_read_vector(api100, udf_code):
                 "udf": udf_code,
                 "runtime": "Python",
             },
-            "result": "true",
+            "result": True,
         },
     }
     resp = api100.check_result(process_graph)
@@ -1355,8 +1356,8 @@ def test_run_udf_on_vector_get_geometries(api100, udf_code):
                 "udf": udf_code,
                 "runtime": "Python",
             },
-            "result": "true"
-        }
+            "result": True,
+        },
     }
     resp = api100.check_result(process_graph)
     assert resp.json == [
@@ -1401,7 +1402,7 @@ def test_run_udf_on_vector_load_uploaded_files(api100, udf_code):
                 "udf": udf_code,
                 "runtime": "Python",
             },
-            "result": "true",
+            "result": True,
         },
     }
     resp = api100.check_result(process_graph)
@@ -3522,3 +3523,71 @@ def test_request_costs_for_failed_request(api, backend_implementation):
     assert env["correlation_id"] == "r-abc123"
 
     get_request_costs.assert_called_with(TEST_USER, "r-abc123", False)
+
+
+class TestVectorCubeRunUDF:
+    """
+    Related to:
+    - https://github.com/Open-EO/openeo-python-driver/issues/197
+    - https://github.com/Open-EO/openeo-python-driver/pull/200
+    - https://github.com/Open-EO/openeo-geopyspark-driver/issues/437
+    """
+
+    def test_apply_dimension_run_udf_change_geometry(self, api100):
+        udf_code = """
+            from openeo.udf import UdfData, FeatureCollection
+            def process_geometries(udf_data: UdfData) -> UdfData:
+                [feature_collection] = udf_data.get_feature_collection_list()
+                gdf = feature_collection.data
+                gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2)
+                udf_data.set_feature_collection_list([
+                    FeatureCollection(id="_", data=gdf),
+                ])
+            """
+        udf_code = textwrap.dedent(udf_code)
+        process_graph = {
+            "get_vector_data": {
+                "process_id": "load_uploaded_files",
+                "arguments": {"paths": [str(get_path("geojson/FeatureCollection02.json"))], "format": "GeoJSON"},
+            },
+            "apply_dimension": {
+                "process_id": "apply_dimension",
+                "arguments": {
+                    "data": {"from_node": "get_vector_data"},
+                    "dimension": "properties",
+                    "process": {
+                        "process_graph": {
+                            "runudf1": {
+                                "process_id": "run_udf",
+                                "arguments": {
+                                    "data": {"from_node": "get_vector_data"},
+                                    "udf": udf_code,
+                                    "runtime": "Python",
+                                },
+                                "result": True,
+                            }
+                        },
+                    },
+                },
+                "result": True,
+            },
+        }
+        resp = api100.check_result(process_graph)
+        # DictSubSet=dict
+        assert resp.json == DictSubSet(
+            {
+                "type": "FeatureCollection",
+                "features": [
+                    {
+                        "type": "Feature",
+                        "geometry": ApproxGeoJSONByBounds(0, 0, 4, 4, types=["Polygon"], abs=0.1),
+                        "properties": {"id": "first", "pop": 1234},
+                    },
+                    {
+                        "type": "Feature",
+                        "geometry": ApproxGeoJSONByBounds(2, 1, 6, 5, types=["Polygon"], abs=0.1),
+                        "properties": {"id": "second", "pop": 5678},
+                    },
+                ],
+            }
+        )