diff --git a/openeo_driver/ProcessGraphDeserializer.py b/openeo_driver/ProcessGraphDeserializer.py index 046ff821..51fdfdb9 100644 --- a/openeo_driver/ProcessGraphDeserializer.py +++ b/openeo_driver/ProcessGraphDeserializer.py @@ -1581,17 +1581,23 @@ def load_uploaded_files(args: dict, env: EvalEnv) -> Union[DriverVectorCube,Driv .returns("vector-cube", schema={"type": "object", "subtype": "vector-cube"}) ) def to_vector_cube(args: Dict, env: EvalEnv): - # TODO: standardization of something like this? https://github.com/Open-EO/openeo-processes/issues/346 + _log.warning("Experimental process `to_vector_cube` is deprecated, use `load_geojson` instead") + # TODO: remove this experimental/deprecated process data = extract_arg(args, "data", process_id="to_vector_cube") if isinstance(data, dict) and data.get("type") in {"Polygon", "MultiPolygon", "Feature", "FeatureCollection"}: return env.backend_implementation.vector_cube_cls.from_geojson(data) - # TODO: support more inputs: string with geojson, string with WKT, list of WKT, string with URL to GeoJSON, ... raise FeatureUnsupportedException(f"Converting {type(data)} to vector cube is not supported") @process_registry_100.add_function(spec=read_spec("openeo-processes/2.x/proposals/load_geojson.json")) def load_geojson(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube: - data = args.get_required("data", validator=ProcessArgs.validator_geojson_dict()) + data = args.get_required( + "data", + validator=ProcessArgs.validator_geojson_dict( + # TODO: also allow LineString and MultiLineString? + allowed_types=["Point", "MultiPoint", "Polygon", "MultiPolygon", "Feature", "FeatureCollection"] + ), + ) properties = args.get_optional("properties", default=[], expected_type=(list, tuple)) vector_cube = env.backend_implementation.vector_cube_cls.from_geojson(data, columns_for_cube=properties) return vector_cube diff --git a/openeo_driver/datacube.py b/openeo_driver/datacube.py index a9a1afcc..b0744e72 100644 --- a/openeo_driver/datacube.py +++ b/openeo_driver/datacube.py @@ -8,6 +8,7 @@ import geopandas as gpd import numpy +import pandas import pyproj import shapely.geometry import shapely.geometry.base @@ -284,14 +285,21 @@ def from_geodataframe( elif columns_for_cube == cls.COLUMN_SELECTION_ALL: columns_for_cube = available_columns elif isinstance(columns_for_cube, list): - # TODO #114 limit to subset with available columns (and automatically fill in missing columns with nodata)? columns_for_cube = columns_for_cube else: raise ValueError(columns_for_cube) assert isinstance(columns_for_cube, list) if columns_for_cube: - cube_df = data[columns_for_cube] + existing = [c for c in columns_for_cube if c in available_columns] + to_add = [c for c in columns_for_cube if c not in available_columns] + if existing: + cube_df = data[existing] + if to_add: + cube_df.loc[:, to_add] = numpy.nan + else: + cube_df = pandas.DataFrame(index=data.index, columns=to_add) + # TODO: remove `columns_for_cube` from geopandas data frame? # Enabling that triggers failure of som existing tests that use `aggregate_spatial` # to "enrich" a vector cube with pre-existing properties @@ -311,6 +319,7 @@ def from_geodataframe( return cls(geometries=geometries_df, cube=cube) else: + # TODO: add a dummy 1D no-data cube? return cls(geometries=data) @classmethod @@ -429,6 +438,16 @@ def to_wkt(self) -> List[str]: wkts = [str(g) for g in self._geometries.geometry] return wkts + def to_internal_json(self) -> dict: + """ + Export to an internal JSON-style representation. + Subject to change any time: not intended for public consumption, just for (unit) test purposes. + """ + return { + "geometries": shapely.geometry.mapping(self._geometries), + "cube": self._cube.to_dict(data="array") if self._cube is not None else None, + } + def get_crs(self) -> pyproj.CRS: return self._geometries.crs or pyproj.CRS.from_epsg(4326) diff --git a/openeo_driver/testing.py b/openeo_driver/testing.py index d1524f58..68f6bf7f 100644 --- a/openeo_driver/testing.py +++ b/openeo_driver/testing.py @@ -6,6 +6,7 @@ import http.server import json import logging +import math import multiprocessing import re import urllib.request @@ -494,6 +495,11 @@ def approxify(x: Any, rel: Optional = None, abs: Optional[float] = None) -> Any: raise ValueError(x) +class IsNan: + def __eq__(self, other): + return isinstance(other, float) and math.isnan(other) + + class ApproxGeometry: """Helper to compactly and approximately compare geometries.""" diff --git a/tests/test_testing.py b/tests/test_testing.py index 23ba81d4..efd8ca27 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -1,6 +1,8 @@ import logging import re import subprocess + +import numpy import sys import textwrap import urllib.error @@ -22,6 +24,7 @@ ephemeral_fileserver, preprocess_check_and_replace, ApproxGeoJSONByBounds, + IsNan, ) @@ -261,6 +264,18 @@ def test_approxify_tolerance_rel(): assert {"a": [10.1, 2.1]} != approxify({"a": [10, 2.3]}, rel=0.01) +@pytest.mark.parametrize("other", [float("nan"), numpy.nan]) +def test_is_nan(other): + assert other == IsNan() + assert IsNan() == other + + +@pytest.mark.parametrize("other", [0, 123, False, True, None, "dfd", [], {}, ()]) +def test_is_not_nan(other): + assert other != IsNan() + assert IsNan() != other + + @pytest.mark.parametrize( "format", [ diff --git a/tests/test_vectorcube.py b/tests/test_vectorcube.py index c3ce62ba..e07ed2e5 100644 --- a/tests/test_vectorcube.py +++ b/tests/test_vectorcube.py @@ -10,7 +10,7 @@ from openeo_driver.errors import OpenEOApiException from openeo_driver.datacube import DriverVectorCube -from openeo_driver.testing import DictSubSet, ApproxGeometry +from openeo_driver.testing import DictSubSet, ApproxGeometry, IsNan from openeo_driver.util.geometry import as_geojson_feature_collection from openeo_driver.utils import EvalEnv @@ -83,6 +83,144 @@ def test_to_wkt(self, gdf): ['POLYGON ((1 1, 3 1, 2 3, 1 1))', 'POLYGON ((4 2, 5 4, 3 4, 4 2))'] ) + def test_to_internal_json_defaults(self, gdf): + vc = DriverVectorCube(gdf) + assert vc.to_internal_json() == { + "geometries": DictSubSet( + { + "type": "FeatureCollection", + "features": [ + DictSubSet( + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": (((1.0, 1.0), (3.0, 1.0), (2.0, 3.0), (1.0, 1.0)),), + }, + "properties": {"id": "first", "pop": 1234}, + } + ), + DictSubSet( + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": (((4.0, 2.0), (5.0, 4.0), (3.0, 4.0), (4.0, 2.0)),), + }, + "properties": {"id": "second", "pop": 5678}, + } + ), + ], + } + ), + "cube": None, + } + + @pytest.mark.parametrize( + ["columns_for_cube", "expected_cube"], + [ + ( + "numerical", + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["pop"], "dims": ("properties",)}, + }, + "data": [[1234], [5678]], + "attrs": {}, + }, + ), + ( + "all", + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["id", "pop"], "dims": ("properties",)}, + }, + "data": [["first", 1234], ["second", 5678]], + "attrs": {}, + }, + ), + ([], None), + ( + ["pop", "id"], + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["pop", "id"], "dims": ("properties",)}, + }, + "data": [[1234, "first"], [5678, "second"]], + "attrs": {}, + }, + ), + ( + ["pop", "color"], + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["pop", "color"], "dims": ("properties",)}, + }, + "data": [[1234.0, IsNan()], [5678.0, IsNan()]], + "attrs": {}, + }, + ), + ( + ["color"], + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["color"], "dims": ("properties",)}, + }, + "data": [[IsNan()], [IsNan()]], + "attrs": {}, + }, + ), + ], + ) + def test_to_internal_json_columns_for_cube(self, gdf, columns_for_cube, expected_cube): + vc = DriverVectorCube.from_geodataframe(gdf, columns_for_cube=columns_for_cube) + internal = vc.to_internal_json() + assert internal == { + "geometries": DictSubSet( + { + "type": "FeatureCollection", + "features": [ + DictSubSet( + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": (((1.0, 1.0), (3.0, 1.0), (2.0, 3.0), (1.0, 1.0)),), + }, + "properties": {"id": "first", "pop": 1234}, + } + ), + DictSubSet( + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": (((4.0, 2.0), (5.0, 4.0), (3.0, 4.0), (4.0, 2.0)),), + }, + "properties": {"id": "second", "pop": 5678}, + } + ), + ], + } + ), + "cube": expected_cube, + } + def test_get_crs(self, gdf): vc = DriverVectorCube(gdf) assert vc.get_crs() == pyproj.CRS.from_epsg(4326) @@ -193,53 +331,123 @@ def test_from_geodataframe_default(self, gdf): assert {k: list(v.values) for k, v in cube.coords.items()} == {"geometries": [0, 1], "properties": ["pop"]} @pytest.mark.parametrize( - ["columns_for_cube", "expected"], + ["columns_for_cube", "expected_cube"], [ - ("numerical", {"shape": (2, 1), "coords": {"geometries": [0, 1], "properties": ["pop"]}}), - ("all", {"shape": (2, 2), "coords": {"geometries": [0, 1], "properties": ["id", "pop"]}}), + ( + "numerical", + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["pop"], "dims": ("properties",)}, + }, + "data": [[1234], [5678]], + "attrs": {}, + }, + ), + ( + "all", + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["id", "pop"], "dims": ("properties",)}, + }, + "data": [["first", 1234], ["second", 5678]], + "attrs": {}, + }, + ), ([], None), - (["id"], {"shape": (2, 1), "coords": {"geometries": [0, 1], "properties": ["id"]}}), - (["pop", "id"], {"shape": (2, 2), "coords": {"geometries": [0, 1], "properties": ["pop", "id"]}}), - # TODO: test specifying non-existent column (to be filled with no-data): - # (["pop", "nopenope"], {"shape": (2, 2), "coords": {"geometries": [0, 1], "properties": ["pop", "nopenope"]}}), + ( + ["id"], + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["id"], "dims": ("properties",)}, + }, + "data": [["first"], ["second"]], + "attrs": {}, + }, + ), + ( + ["pop", "id"], + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["pop", "id"], "dims": ("properties",)}, + }, + "data": [[1234, "first"], [5678, "second"]], + "attrs": {}, + }, + ), + ( + ["color"], + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["color"], "dims": ("properties",)}, + }, + "data": [[IsNan()], [IsNan()]], + "attrs": {}, + }, + ), + ( + ["pop", "color"], + { + "name": None, + "dims": ("geometries", "properties"), + "coords": { + "geometries": {"attrs": {}, "data": [0, 1], "dims": ("geometries",)}, + "properties": {"attrs": {}, "data": ["pop", "color"], "dims": ("properties",)}, + }, + "data": [[1234, IsNan()], [5678, IsNan()]], + "attrs": {}, + }, + ), ], ) - def test_from_geodataframe_columns_for_cube(self, gdf, columns_for_cube, expected): + def test_from_geodataframe_columns_for_cube(self, gdf, columns_for_cube, expected_cube): vc = DriverVectorCube.from_geodataframe(gdf, columns_for_cube=columns_for_cube) - assert vc.to_geojson() == DictSubSet( - { - "type": "FeatureCollection", - "features": [ - DictSubSet( - { - "type": "Feature", - "properties": {"id": "first", "pop": 1234}, - "geometry": { - "coordinates": (((1.0, 1.0), (3.0, 1.0), (2.0, 3.0), (1.0, 1.0)),), - "type": "Polygon", - }, - } - ), - DictSubSet( - { - "type": "Feature", - "properties": {"id": "second", "pop": 5678}, - "geometry": { - "coordinates": (((4.0, 2.0), (5.0, 4.0), (3.0, 4.0), (4.0, 2.0)),), - "type": "Polygon", - }, - } - ), - ], - } - ) - cube = vc.get_cube() - if expected is None: - assert cube is None - else: - assert cube.dims == ("geometries", "properties") - assert cube.shape == expected["shape"] - assert {k: list(v.values) for k, v in cube.coords.items()} == expected["coords"] + + assert vc.to_internal_json() == { + "geometries": DictSubSet( + { + "type": "FeatureCollection", + "features": [ + DictSubSet( + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": (((1.0, 1.0), (3.0, 1.0), (2.0, 3.0), (1.0, 1.0)),), + }, + "properties": {"id": "first", "pop": 1234}, + } + ), + DictSubSet( + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": (((4.0, 2.0), (5.0, 4.0), (3.0, 4.0), (4.0, 2.0)),), + }, + "properties": {"id": "second", "pop": 5678}, + } + ), + ], + } + ), + "cube": expected_cube, + } + @pytest.mark.parametrize(["geojson", "expected"], [ ( diff --git a/tests/test_views_execute.py b/tests/test_views_execute.py index 8f23aa77..48cbb82c 100644 --- a/tests/test_views_execute.py +++ b/tests/test_views_execute.py @@ -1801,6 +1801,175 @@ def test_vector_save_result(self, api, output_format, content_type, data_prefix, assert isinstance(geometry, shapely.geometry.Polygon) assert geometry.bounds == expected + @pytest.mark.parametrize( + ["geojson", "expected"], + [ + ( + {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]}, + [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, + "properties": {}, + }, + ], + ), + ( + {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + [ + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {}, + }, + ], + ), + ( + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + "properties": {"id": "12_3"}, + }, + [ + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {"id": "12_3"}, + }, + ], + ), + ( + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]}, + "properties": {"id": 1}, + }, + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + "properties": {"id": 2}, + }, + ], + }, + [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, + "properties": {"id": 1}, + }, + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {"id": 2}, + }, + ], + ), + ], + ) + def test_to_vector_cube(self, api100, geojson, expected): + res = api100.check_result( + { + "vc": { + "process_id": "to_vector_cube", + "arguments": {"data": geojson}, + "result": True, + } + } + ) + assert res.json == DictSubSet( + { + "type": "FeatureCollection", + "features": expected, + } + ) + + @pytest.mark.parametrize( + ["geojson", "expected"], + [ + ( + {"type": "Point", "coordinates": (1, 2)}, + [ + { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [1, 2]}, + "properties": {}, + }, + ], + ), + ( + {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]}, + [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, + "properties": {}, + }, + ], + ), + ( + {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + [ + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {}, + }, + ], + ), + ( + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + "properties": {"id": "12_3"}, + }, + [ + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {"id": "12_3"}, + }, + ], + ), + ( + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]}, + "properties": {"id": 1}, + }, + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + "properties": {"id": 2}, + }, + ], + }, + [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, + "properties": {"id": 1}, + }, + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {"id": 2}, + }, + ], + ), + ], + ) + def test_load_geojson(self, api100, geojson, expected): + res = api100.check_result( + {"vc": {"process_id": "load_geojson", "arguments": {"data": geojson}, "result": True}} + ) + assert res.json == DictSubSet({"type": "FeatureCollection", "features": expected}) + def test_no_nested_JSONResult(api): api.set_auth_bearer_token() @@ -3365,82 +3534,6 @@ def test_if_merge_cubes(api100): }) -@pytest.mark.parametrize(["geojson", "expected"], [ - ( - {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]}, - [ - { - "type": "Feature", - "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, - "properties": {}, - }, - ], - ), - ( - {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, - [ - { - "type": "Feature", - "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, - "properties": {}, - }, - ], - ), - ( - { - "type": "Feature", - "geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, - "properties": {"id": "12_3"}, - }, - [ - { - "type": "Feature", - "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, - "properties": {"id": "12_3"}, - }, - ], - ), - ( - { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "geometry": {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]}, - "properties": {"id": 1}, - }, - { - "type": "Feature", - "geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, - "properties": {"id": 2}, - }, - ]}, - [ - { - "type": "Feature", - "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, - "properties": {"id": 1}, - }, - { - "type": "Feature", - "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, - "properties": {"id": 2}, - }, - ], - ), -]) -def test_to_vector_cube(api100, geojson, expected): - res = api100.check_result({ - "vc": { - "process_id": "to_vector_cube", - "arguments": {"data": geojson}, - "result": True, - } - }) - assert res.json == DictSubSet({ - "type": "FeatureCollection", - "features": expected, - }) def test_vector_buffer_returns_error_on_empty_result_geometry(api):