diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d344bea..f7e44bd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,10 @@ and start a new "In Progress" section above it. ## In progress +## 0.104.0 + +- Expose CSV/GeoParquet output assets as STAC items ([Open-EO/openeo-geopyspark-driver#787](https://github.com/Open-EO/openeo-geopyspark-driver/issues/787)) + ## 0.103.2 - Start warning about deprecated `evaluate_process_from_url` usage (eu-cdse/openeo-cdse-infra#167) diff --git a/openeo_driver/_version.py b/openeo_driver/_version.py index e2e25bbd..371175ee 100644 --- a/openeo_driver/_version.py +++ b/openeo_driver/_version.py @@ -1 +1 @@ -__version__ = "0.103.2a1" +__version__ = "0.104.0a1" diff --git a/openeo_driver/dummy/dummy_backend.py b/openeo_driver/dummy/dummy_backend.py index 4359fb2b..8884f8da 100644 --- a/openeo_driver/dummy/dummy_backend.py +++ b/openeo_driver/dummy/dummy_backend.py @@ -693,6 +693,35 @@ def get_result_assets(self, job_id: str, user_id: str) -> Dict[str, dict]: != JOB_STATUS.FINISHED ): raise JobNotFinishedException + + if job_id == 'j-2406047c20fc4966ab637d387502728f': + return { + "timeseries.csv": { + "roles": ["data"], + "type": "text/csv", + "href": "s3://OpenEO-data/batch_jobs/j-2406047c20fc4966ab637d387502728f/timeseries.csv", + "bands": [Band(name="S2-L2A-EVI_t0"), Band(name="S2-L2A-EVI_t1"), Band(name="S2-L2A-EVI_t2")], + "geometry": {"type": "Polygon", + "coordinates": [[[2.70964374625748, 51.00377983772219], + [2.70964374625748, 51.10589339112414], + [2.777548414187305, 51.10589339112414], + [2.777548414187305, 51.00377983772219], + [2.70964374625748, 51.00377983772219]]]} + }, + "timeseries.parquet": { + "roles": ["data"], + "type": "application/parquet; profile=geo", + "href": "s3://OpenEO-data/batch_jobs/j-2406047c20fc4966ab637d387502728f/timeseries.parquet", + "bands": [Band(name="S2-L2A-EVI_t0"), Band(name="S2-L2A-EVI_t1"), Band(name="S2-L2A-EVI_t2")], + "geometry": {"type": "Polygon", + "coordinates": [[[2.70964374625748, 51.00377983772219], + [2.70964374625748, 51.10589339112414], + [2.777548414187305, 51.10589339112414], + [2.777548414187305, 51.00377983772219], + [2.70964374625748, 51.00377983772219]]]} + }, + } + return { "output.tiff": { "output_dir": f"{self._output_root()}/{job_id}", diff --git a/openeo_driver/views.py b/openeo_driver/views.py index 71cc3287..f31c5ea0 100644 --- a/openeo_driver/views.py +++ b/openeo_driver/views.py @@ -1120,7 +1120,8 @@ def job_result_item_url(item_id) -> str: for filename, metadata in result_assets.items(): if ("data" in metadata.get("roles", []) and - any(media_type in metadata.get("type", "") for media_type in ["geotiff", "netcdf"])): + any(media_type in metadata.get("type", "") for media_type in + ["geotiff", "netcdf", "text/csv", "application/parquet"])): links.append( {"rel": "item", "href": job_result_item_url(item_id=filename), "type": stac_item_media_type} ) @@ -1306,11 +1307,12 @@ def _get_job_result_item(job_id, item_id, user_id): asset_filename, metadata = next(iter(assets_for_item_id.items())) - geometry = metadata.get("geometry") - bbox = metadata.get("bbox") + job_info = backend_implementation.batch_jobs.get_job_info(job_id, user_id) + + geometry = metadata.get("geometry", job_info.geometry) + bbox = metadata.get("bbox", job_info.bbox) properties = {"datetime": metadata.get("datetime")} - job_info = backend_implementation.batch_jobs.get_job_info(job_id, user_id) if properties["datetime"] is None: to_datetime = Rfc3339(propagate_none=True).datetime @@ -1419,7 +1421,6 @@ def _asset_object(job_id, user_id, filename: str, asset_metadata: dict, job_info # Machine learning models. return result_dict bands = asset_metadata.get("bands") - nodata = asset_metadata.get("nodata") result_dict.update( dict_no_none( diff --git a/tests/test_views.py b/tests/test_views.py index 844ca9fb..71603627 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -1180,7 +1180,15 @@ def _fresh_job_registry(next_job_id="job-1234", output_root: Optional[Path] = No costs=1.23, budget=4.56, proj_shape=[300, 600], - ) + ), + (TEST_USER, 'j-2406047c20fc4966ab637d387502728f'): BatchJobMetadata( + id='j-2406047c20fc4966ab637d387502728f', + status='finished', + created=datetime(2024, 6, 4, 14, 20, 23), + bbox=[2.70964374625748, 51.00377983772219, 2.777548414187305, 51.10589339112414], + start_datetime=datetime(2020, 1, 1, 0, 0, 0), + end_datetime=datetime(2020, 3, 15, 0, 0, 0), + ), } dummy_backend.DummyBatchJobs._job_result_registry = {} @@ -1359,7 +1367,13 @@ def test_list_user_jobs_100(self, api100): 'plan': 'some_plan', 'costs': 1.23, 'budget': 4.56 - } + }, + { + 'id': 'j-2406047c20fc4966ab637d387502728f', + 'status': 'finished', + 'progress': 100, + 'created': "2024-06-04T14:20:23Z", + }, ], "links": [] } @@ -1770,6 +1784,71 @@ def test_get_job_results_110(self, api110): "openeo:status": "finished", } + resp = api110.get("/jobs/j-2406047c20fc4966ab637d387502728f/results", headers=self.AUTH_HEADER) + + assert resp.assert_status_code(200).json == DictSubSet({ + "stac_version": "1.0.0", + "type": "Collection", + "id": "j-2406047c20fc4966ab637d387502728f", + "description": "Results for batch job j-2406047c20fc4966ab637d387502728f", + "license": "proprietary", + "extent": { + "spatial": {"bbox": [[2.70964374625748, 51.00377983772219, 2.777548414187305, 51.10589339112414]]}, + "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-03-15T00:00:00Z"]]} + }, + "assets": { + 'timeseries.csv': { + 'href': 'http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/assets/timeseries.csv', + 'roles': ['data'], + 'title': 'timeseries.csv', + 'type': 'text/csv', + 'eo:bands': [ + {"name": "S2-L2A-EVI_t0"}, + {"name": "S2-L2A-EVI_t1"}, + {"name": "S2-L2A-EVI_t2"} + ], + }, + 'timeseries.parquet': { + 'href': 'http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/assets/timeseries.parquet', + 'roles': ['data'], + 'title': 'timeseries.parquet', + 'type': 'application/parquet; profile=geo', + 'eo:bands': [ + {"name": "S2-L2A-EVI_t0"}, + {"name": "S2-L2A-EVI_t1"}, + {"name": "S2-L2A-EVI_t2"} + ], + }, + }, + "links": [ + { + "href": "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results", + "rel": "self", + "type": "application/json" + }, + { + "href": "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results", + "rel": "canonical", + "type": "application/json" + }, + { + "href": "http://ceos.org/ard/files/PFS/SR/v5.0/CARD4L_Product_Family_Specification_Surface_Reflectance-v5.0.pdf", + "rel": "card4l-document", + "type": "application/pdf" + }, + { + "href": "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/items/timeseries.csv", + "rel": "item", + "type": "application/geo+json", + }, + { + "href": "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/items/timeseries.parquet", + "rel": "item", + "type": "application/geo+json", + }, + ] + }) + def test_get_job_results_public_href_asset_100(self, api, backend_implementation): import numpy as np @@ -2592,6 +2671,73 @@ def test_get_job_result_item(self, flask_app, api110, backend_config_overrides): extensions=resp_data.get("stac_extensions", []), ) + @pytest.mark.parametrize(["vector_item_id", "vector_asset_media_type"], [ + ("timeseries.csv", "text/csv"), + ("timeseries.parquet", "application/parquet; profile=geo"), + ]) + def test_get_vector_cube_job_result_item(self, flask_app, api110, backend_config_overrides, vector_item_id, + vector_asset_media_type): + vector_asset_filename = vector_item_id + + with self._fresh_job_registry(): + resp = api110.get(f"/jobs/j-2406047c20fc4966ab637d387502728f/results/items/{vector_item_id}", + headers=self.AUTH_HEADER) + + resp_data = resp.assert_status_code(200).json + + assert resp_data == DictSubSet({ + 'type': 'Feature', + 'stac_version': "1.0.0", + 'id': vector_item_id, + 'geometry': {"type": "Polygon", + "coordinates": [[[2.70964374625748, 51.00377983772219], + [2.70964374625748, 51.10589339112414], + [2.777548414187305, 51.10589339112414], + [2.777548414187305, 51.00377983772219], + [2.70964374625748, 51.00377983772219]]]}, + 'bbox': [2.70964374625748, 51.00377983772219, 2.777548414187305, 51.10589339112414], + 'properties': { + 'datetime': None, + 'start_datetime': "2020-01-01T00:00:00Z", + 'end_datetime': "2020-03-15T00:00:00Z", + }, + 'collection': 'j-2406047c20fc4966ab637d387502728f', + 'links': [ + { + 'href': f"http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/items/{vector_item_id}", + 'rel': 'self', + 'type': 'application/geo+json', + }, + { + 'href': "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results", + 'rel': 'collection', + 'type': 'application/json', + } + ], + 'assets': { + vector_asset_filename: { + 'href': f"http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/assets/{vector_asset_filename}", + 'type': vector_asset_media_type, + 'roles': ["data"], + 'title': vector_asset_filename, + 'eo:bands': [ + {"name": "S2-L2A-EVI_t0"}, + {"name": "S2-L2A-EVI_t1"}, + {"name": "S2-L2A-EVI_t2"} + ], + } + } + }) + + assert resp.headers["Content-Type"] == "application/geo+json" + + pystac.validation.stac_validator.JsonSchemaSTACValidator().validate( + stac_dict=resp_data, + stac_object_type=pystac.STACObjectType.ITEM, + stac_version=resp_data.get("stac_version", "0.9.0"), + extensions=resp_data.get("stac_extensions", []), + ) + @mock.patch("time.time", mock.MagicMock(return_value=1234)) @pytest.mark.parametrize("backend_config_overrides", [{"url_signer": UrlSigner(secret="123&@#", expiration=1000)}]) def test_download_ml_model_metadata_signed(self, flask_app, api110, backend_config_overrides):