From f0954805b768eedcca79bd1ff4c42820c79709a5 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Mon, 7 Aug 2023 12:12:14 +0200 Subject: [PATCH] Issue #197/#211/#213 use load_geojson in TestVectorCubeRunUDF --- openeo_driver/ProcessGraphDeserializer.py | 1 + openeo_driver/datacube.py | 8 +- tests/test_views_execute.py | 213 ++++++++++------------ 3 files changed, 104 insertions(+), 118 deletions(-) diff --git a/openeo_driver/ProcessGraphDeserializer.py b/openeo_driver/ProcessGraphDeserializer.py index 51fdfdb9..6c978cdb 100644 --- a/openeo_driver/ProcessGraphDeserializer.py +++ b/openeo_driver/ProcessGraphDeserializer.py @@ -1598,6 +1598,7 @@ def load_geojson(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube: allowed_types=["Point", "MultiPoint", "Polygon", "MultiPolygon", "Feature", "FeatureCollection"] ), ) + # TODO: better default value for `properties`? https://github.com/Open-EO/openeo-processes/issues/448 properties = args.get_optional("properties", default=[], expected_type=(list, tuple)) vector_cube = env.backend_implementation.vector_cube_cls.from_geojson(data, columns_for_cube=properties) return vector_cube diff --git a/openeo_driver/datacube.py b/openeo_driver/datacube.py index 6bed712f..8d4103b1 100644 --- a/openeo_driver/datacube.py +++ b/openeo_driver/datacube.py @@ -21,7 +21,7 @@ from pyproj import CRS from openeo_driver.datastructs import ResolutionMergeArgs, SarBackscatterArgs, StacAsset -from openeo_driver.errors import FeatureUnsupportedException, InternalException +from openeo_driver.errors import FeatureUnsupportedException, InternalException, ProcessGraphInvalidException from openeo_driver.util.geometry import GeometryBufferer, validate_geojson_coordinates from openeo_driver.util.ioformats import IOFORMATS from openeo_driver.util.pgparsing import SingleRunUDFProcessGraph @@ -638,6 +638,10 @@ def apply_dimension( if single_run_udf: # Process with single "run_udf" node + if single_run_udf.data != {"from_parameter": "data"}: + raise ProcessGraphInvalidException( + message="Vector cube `apply_dimension` process does not reference `data` parameter." + ) if ( dimension == self.DIM_GEOMETRIES or (dimension in {self.DIM_BANDS, self.DIM_PROPERTIES}.intersection(self.get_dimension_names())) @@ -651,7 +655,7 @@ def apply_dimension( feature_collection = openeo.udf.FeatureCollection(id="_", data=gdf) # TODO: dedicated UDF signature to indicate to work on vector cube through a feature collection based API udf_data = openeo.udf.UdfData( - proj={"EPSG": self._geometries.crs.to_epsg()}, + proj={"EPSG": self._geometries.crs.to_epsg()} if self._geometries.crs else None, feature_collection_list=[feature_collection], user_context=context, ) diff --git a/tests/test_views_execute.py b/tests/test_views_execute.py index a43f4580..d1596a1c 100644 --- a/tests/test_views_execute.py +++ b/tests/test_views_execute.py @@ -3628,6 +3628,22 @@ class TestVectorCubeRunUDF: - https://github.com/Open-EO/openeo-geopyspark-driver/issues/437 """ + def _build_run_udf_callback(self, udf_code: str) -> dict: + udf_code = textwrap.dedent(udf_code) + return { + "process_graph": { + "runudf1": { + "process_id": "run_udf", + "arguments": { + "data": {"from_parameter": "data"}, + "udf": udf_code, + "runtime": "Python", + }, + "result": True, + } + }, + } + @pytest.mark.parametrize( "dimension", [ @@ -3636,40 +3652,32 @@ class TestVectorCubeRunUDF: ], ) def test_apply_dimension_run_udf_change_geometry(self, api100, dimension): - udf_code = """ - from openeo.udf import UdfData, FeatureCollection - def process_geometries(udf_data: UdfData) -> UdfData: - [feature_collection] = udf_data.get_feature_collection_list() - gdf = feature_collection.data - gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2) - udf_data.set_feature_collection_list([ - FeatureCollection(id="_", data=gdf), - ]) - """ - udf_code = textwrap.dedent(udf_code) + """VectorCube + apply_dimension + UDF (changing geometry)""" process_graph = { - "get_vector_data": { - "process_id": "load_uploaded_files", - "arguments": {"paths": [str(get_path("geojson/FeatureCollection02.json"))], "format": "GeoJSON"}, + "load": { + "process_id": "load_geojson", + "arguments": { + "data": load_json("geojson/FeatureCollection02.json"), + "properties": ["pop"], + }, }, "apply_dimension": { "process_id": "apply_dimension", "arguments": { - "data": {"from_node": "get_vector_data"}, + "data": {"from_node": "load"}, "dimension": dimension, - "process": { - "process_graph": { - "runudf1": { - "process_id": "run_udf", - "arguments": { - "data": {"from_node": "get_vector_data"}, - "udf": udf_code, - "runtime": "Python", - }, - "result": True, - } - }, - }, + "process": self._build_run_udf_callback( + """ + from openeo.udf import UdfData, FeatureCollection + def process_geometries(udf_data: UdfData) -> UdfData: + [feature_collection] = udf_data.get_feature_collection_list() + gdf = feature_collection.data + gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2) + udf_data.set_feature_collection_list([ + FeatureCollection(id="_", data=gdf), + ]) + """ + ), }, "result": True, }, @@ -3708,42 +3716,33 @@ def test_apply_dimension_run_udf_filter_on_geometries(self, api100, dimension): Test to use `apply_dimension(dimension="...", process=UDF)` to filter out certain entries from geometries dimension based on geometry (e.g. intersection with another geometry) """ - udf_code = """ - from openeo.udf import UdfData, FeatureCollection - import shapely.geometry - def process_geometries(udf_data: UdfData) -> UdfData: - [feature_collection] = udf_data.get_feature_collection_list() - gdf = feature_collection.data - to_intersect = shapely.geometry.box(4, 3, 8, 4) - gdf = gdf[gdf["geometry"].intersects(to_intersect)] - udf_data.set_feature_collection_list([ - FeatureCollection(id="_", data=gdf), - ]) - """ - udf_code = textwrap.dedent(udf_code) process_graph = { - "get_vector_data": { - "process_id": "load_uploaded_files", - "arguments": {"paths": [str(get_path("geojson/FeatureCollection10.json"))], "format": "GeoJSON"}, + "load": { + "process_id": "load_geojson", + "arguments": { + "data": load_json("geojson/FeatureCollection10.json"), + "properties": ["pop"], + }, }, "apply_dimension": { "process_id": "apply_dimension", "arguments": { - "data": {"from_node": "get_vector_data"}, + "data": {"from_node": "load"}, "dimension": dimension, - "process": { - "process_graph": { - "runudf1": { - "process_id": "run_udf", - "arguments": { - "data": {"from_node": "get_vector_data"}, - "udf": udf_code, - "runtime": "Python", - }, - "result": True, - } - }, - }, + "process": self._build_run_udf_callback( + """ + from openeo.udf import UdfData, FeatureCollection + import shapely.geometry + def process_geometries(udf_data: UdfData) -> UdfData: + [feature_collection] = udf_data.get_feature_collection_list() + gdf = feature_collection.data + to_intersect = shapely.geometry.box(4, 3, 8, 4) + gdf = gdf[gdf["geometry"].intersects(to_intersect)] + udf_data.set_feature_collection_list([ + FeatureCollection(id="_", data=gdf), + ]) + """ + ), }, "result": True, }, @@ -3787,41 +3786,32 @@ def test_apply_dimension_run_udf_filter_on_properties(self, api100, dimension): as apply_dimension only allows changing the cardinality of the provided dimension ("properties" in this case), not any other dimension (like "geometries" in this case). """ - udf_code = """ - from openeo.udf import UdfData, FeatureCollection - import shapely.geometry - def process_geometries(udf_data: UdfData) -> UdfData: - [feature_collection] = udf_data.get_feature_collection_list() - gdf = feature_collection.data - gdf = gdf[gdf["pop"] > 500] - udf_data.set_feature_collection_list([ - FeatureCollection(id="_", data=gdf), - ]) - """ - udf_code = textwrap.dedent(udf_code) process_graph = { - "get_vector_data": { - "process_id": "load_uploaded_files", - "arguments": {"paths": [str(get_path("geojson/FeatureCollection10.json"))], "format": "GeoJSON"}, + "load": { + "process_id": "load_geojson", + "arguments": { + "data": load_json("geojson/FeatureCollection10.json"), + "properties": ["pop"], + }, }, "apply_dimension": { "process_id": "apply_dimension", "arguments": { - "data": {"from_node": "get_vector_data"}, + "data": {"from_node": "load"}, "dimension": dimension, - "process": { - "process_graph": { - "runudf1": { - "process_id": "run_udf", - "arguments": { - "data": {"from_node": "get_vector_data"}, - "udf": udf_code, - "runtime": "Python", - }, - "result": True, - } - }, - }, + "process": self._build_run_udf_callback( + """ + from openeo.udf import UdfData, FeatureCollection + import shapely.geometry + def process_geometries(udf_data: UdfData) -> UdfData: + [feature_collection] = udf_data.get_feature_collection_list() + gdf = feature_collection.data + gdf = gdf[gdf["pop"] > 500] + udf_data.set_feature_collection_list([ + FeatureCollection(id="_", data=gdf), + ]) + """ + ), }, "result": True, }, @@ -3859,41 +3849,32 @@ def test_apply_dimension_run_udf_add_properties(self, api100, dimension): """ Test to use `apply_dimension(dimension="...", process=UDF)` to add properties """ - udf_code = """ - from openeo.udf import UdfData, FeatureCollection - import shapely.geometry - def process_geometries(udf_data: UdfData) -> UdfData: - [feature_collection] = udf_data.get_feature_collection_list() - gdf = feature_collection.data - gdf["poppop"] = gdf["pop"] ** 2 - udf_data.set_feature_collection_list([ - FeatureCollection(id="_", data=gdf), - ]) - """ - udf_code = textwrap.dedent(udf_code) process_graph = { - "get_vector_data": { - "process_id": "load_uploaded_files", - "arguments": {"paths": [str(get_path("geojson/FeatureCollection02.json"))], "format": "GeoJSON"}, + "load": { + "process_id": "load_geojson", + "arguments": { + "data": load_json("geojson/FeatureCollection02.json"), + "properties": ["pop"], + }, }, "apply_dimension": { "process_id": "apply_dimension", "arguments": { - "data": {"from_node": "get_vector_data"}, + "data": {"from_node": "load"}, "dimension": dimension, - "process": { - "process_graph": { - "runudf1": { - "process_id": "run_udf", - "arguments": { - "data": {"from_node": "get_vector_data"}, - "udf": udf_code, - "runtime": "Python", - }, - "result": True, - } - }, - }, + "process": self._build_run_udf_callback( + """ + from openeo.udf import UdfData, FeatureCollection + import shapely.geometry + def process_geometries(udf_data: UdfData) -> UdfData: + [feature_collection] = udf_data.get_feature_collection_list() + gdf = feature_collection.data + gdf["poppop"] = gdf["pop"] ** 2 + udf_data.set_feature_collection_list([ + FeatureCollection(id="_", data=gdf), + ]) + """ + ), }, "result": True, },