Skip to content

Commit

Permalink
expose CSV/GeoParquet output assets as STAC items
Browse files Browse the repository at this point in the history
  • Loading branch information
bossie committed Jun 11, 2024
1 parent 9fced81 commit a02520e
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ and start a new "In Progress" section above it.

## In progress

## 0.104.0

- Expose CSV/GeoParquet output assets as STAC items ([Open-EO/openeo-geopyspark-driver#787](https://github.com/Open-EO/openeo-geopyspark-driver/issues/787))

## 0.103.2

- Start warning about deprecated `evaluate_process_from_url` usage (eu-cdse/openeo-cdse-infra#167)
Expand Down
2 changes: 1 addition & 1 deletion openeo_driver/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.103.2a1"
__version__ = "0.104.0a1"
29 changes: 29 additions & 0 deletions openeo_driver/dummy/dummy_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,35 @@ def get_result_assets(self, job_id: str, user_id: str) -> Dict[str, dict]:
!= JOB_STATUS.FINISHED
):
raise JobNotFinishedException

if job_id == 'j-2406047c20fc4966ab637d387502728f':
return {
"timeseries.csv": {
"roles": ["data"],
"type": "text/csv",
"href": "s3://OpenEO-data/batch_jobs/j-2406047c20fc4966ab637d387502728f/timeseries.csv",
"bands": [Band(name="S2-L2A-EVI_t0"), Band(name="S2-L2A-EVI_t1"), Band(name="S2-L2A-EVI_t2")],
"geometry": {"type": "Polygon",
"coordinates": [[[2.70964374625748, 51.00377983772219],
[2.70964374625748, 51.10589339112414],
[2.777548414187305, 51.10589339112414],
[2.777548414187305, 51.00377983772219],
[2.70964374625748, 51.00377983772219]]]}
},
"timeseries.parquet": {
"roles": ["data"],
"type": "application/parquet; profile=geo",
"href": "s3://OpenEO-data/batch_jobs/j-2406047c20fc4966ab637d387502728f/timeseries.parquet",
"bands": [Band(name="S2-L2A-EVI_t0"), Band(name="S2-L2A-EVI_t1"), Band(name="S2-L2A-EVI_t2")],
"geometry": {"type": "Polygon",
"coordinates": [[[2.70964374625748, 51.00377983772219],
[2.70964374625748, 51.10589339112414],
[2.777548414187305, 51.10589339112414],
[2.777548414187305, 51.00377983772219],
[2.70964374625748, 51.00377983772219]]]}
},
}

return {
"output.tiff": {
"output_dir": f"{self._output_root()}/{job_id}",
Expand Down
11 changes: 6 additions & 5 deletions openeo_driver/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1120,7 +1120,8 @@ def job_result_item_url(item_id) -> str:

for filename, metadata in result_assets.items():
if ("data" in metadata.get("roles", []) and
any(media_type in metadata.get("type", "") for media_type in ["geotiff", "netcdf"])):
any(media_type in metadata.get("type", "") for media_type in
["geotiff", "netcdf", "text/csv", "application/parquet"])):
links.append(
{"rel": "item", "href": job_result_item_url(item_id=filename), "type": stac_item_media_type}
)
Expand Down Expand Up @@ -1306,11 +1307,12 @@ def _get_job_result_item(job_id, item_id, user_id):

asset_filename, metadata = next(iter(assets_for_item_id.items()))

geometry = metadata.get("geometry")
bbox = metadata.get("bbox")
job_info = backend_implementation.batch_jobs.get_job_info(job_id, user_id)

geometry = metadata.get("geometry", job_info.geometry)
bbox = metadata.get("bbox", job_info.bbox)

properties = {"datetime": metadata.get("datetime")}
job_info = backend_implementation.batch_jobs.get_job_info(job_id, user_id)
if properties["datetime"] is None:
to_datetime = Rfc3339(propagate_none=True).datetime

Expand Down Expand Up @@ -1419,7 +1421,6 @@ def _asset_object(job_id, user_id, filename: str, asset_metadata: dict, job_info
# Machine learning models.
return result_dict
bands = asset_metadata.get("bands")
nodata = asset_metadata.get("nodata")

result_dict.update(
dict_no_none(
Expand Down
150 changes: 148 additions & 2 deletions tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,15 @@ def _fresh_job_registry(next_job_id="job-1234", output_root: Optional[Path] = No
costs=1.23,
budget=4.56,
proj_shape=[300, 600],
)
),
(TEST_USER, 'j-2406047c20fc4966ab637d387502728f'): BatchJobMetadata(
id='j-2406047c20fc4966ab637d387502728f',
status='finished',
created=datetime(2024, 6, 4, 14, 20, 23),
bbox=[2.70964374625748, 51.00377983772219, 2.777548414187305, 51.10589339112414],
start_datetime=datetime(2020, 1, 1, 0, 0, 0),
end_datetime=datetime(2020, 3, 15, 0, 0, 0),
),
}
dummy_backend.DummyBatchJobs._job_result_registry = {}

Expand Down Expand Up @@ -1359,7 +1367,13 @@ def test_list_user_jobs_100(self, api100):
'plan': 'some_plan',
'costs': 1.23,
'budget': 4.56
}
},
{
'id': 'j-2406047c20fc4966ab637d387502728f',
'status': 'finished',
'progress': 100,
'created': "2024-06-04T14:20:23Z",
},
],
"links": []
}
Expand Down Expand Up @@ -1770,6 +1784,71 @@ def test_get_job_results_110(self, api110):
"openeo:status": "finished",
}

resp = api110.get("/jobs/j-2406047c20fc4966ab637d387502728f/results", headers=self.AUTH_HEADER)

assert resp.assert_status_code(200).json == DictSubSet({
"stac_version": "1.0.0",
"type": "Collection",
"id": "j-2406047c20fc4966ab637d387502728f",
"description": "Results for batch job j-2406047c20fc4966ab637d387502728f",
"license": "proprietary",
"extent": {
"spatial": {"bbox": [[2.70964374625748, 51.00377983772219, 2.777548414187305, 51.10589339112414]]},
"temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-03-15T00:00:00Z"]]}
},
"assets": {
'timeseries.csv': {
'href': 'http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/assets/timeseries.csv',
'roles': ['data'],
'title': 'timeseries.csv',
'type': 'text/csv',
'eo:bands': [
{"name": "S2-L2A-EVI_t0"},
{"name": "S2-L2A-EVI_t1"},
{"name": "S2-L2A-EVI_t2"}
],
},
'timeseries.parquet': {
'href': 'http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/assets/timeseries.parquet',
'roles': ['data'],
'title': 'timeseries.parquet',
'type': 'application/parquet; profile=geo',
'eo:bands': [
{"name": "S2-L2A-EVI_t0"},
{"name": "S2-L2A-EVI_t1"},
{"name": "S2-L2A-EVI_t2"}
],
},
},
"links": [
{
"href": "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results",
"rel": "self",
"type": "application/json"
},
{
"href": "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results",
"rel": "canonical",
"type": "application/json"
},
{
"href": "http://ceos.org/ard/files/PFS/SR/v5.0/CARD4L_Product_Family_Specification_Surface_Reflectance-v5.0.pdf",
"rel": "card4l-document",
"type": "application/pdf"
},
{
"href": "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/items/timeseries.csv",
"rel": "item",
"type": "application/geo+json",
},
{
"href": "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/items/timeseries.parquet",
"rel": "item",
"type": "application/geo+json",
},
]
})

def test_get_job_results_public_href_asset_100(self, api, backend_implementation):
import numpy as np

Expand Down Expand Up @@ -2592,6 +2671,73 @@ def test_get_job_result_item(self, flask_app, api110, backend_config_overrides):
extensions=resp_data.get("stac_extensions", []),
)

@pytest.mark.parametrize(["vector_item_id", "vector_asset_media_type"], [
("timeseries.csv", "text/csv"),
("timeseries.parquet", "application/parquet; profile=geo"),
])
def test_get_vector_cube_job_result_item(self, flask_app, api110, backend_config_overrides, vector_item_id,
vector_asset_media_type):
vector_asset_filename = vector_item_id

with self._fresh_job_registry():
resp = api110.get(f"/jobs/j-2406047c20fc4966ab637d387502728f/results/items/{vector_item_id}",
headers=self.AUTH_HEADER)

resp_data = resp.assert_status_code(200).json

assert resp_data == DictSubSet({
'type': 'Feature',
'stac_version': "1.0.0",
'id': vector_item_id,
'geometry': {"type": "Polygon",
"coordinates": [[[2.70964374625748, 51.00377983772219],
[2.70964374625748, 51.10589339112414],
[2.777548414187305, 51.10589339112414],
[2.777548414187305, 51.00377983772219],
[2.70964374625748, 51.00377983772219]]]},
'bbox': [2.70964374625748, 51.00377983772219, 2.777548414187305, 51.10589339112414],
'properties': {
'datetime': None,
'start_datetime': "2020-01-01T00:00:00Z",
'end_datetime': "2020-03-15T00:00:00Z",
},
'collection': 'j-2406047c20fc4966ab637d387502728f',
'links': [
{
'href': f"http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/items/{vector_item_id}",
'rel': 'self',
'type': 'application/geo+json',
},
{
'href': "http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results",
'rel': 'collection',
'type': 'application/json',
}
],
'assets': {
vector_asset_filename: {
'href': f"http://oeo.net/openeo/1.1.0/jobs/j-2406047c20fc4966ab637d387502728f/results/assets/{vector_asset_filename}",
'type': vector_asset_media_type,
'roles': ["data"],
'title': vector_asset_filename,
'eo:bands': [
{"name": "S2-L2A-EVI_t0"},
{"name": "S2-L2A-EVI_t1"},
{"name": "S2-L2A-EVI_t2"}
],
}
}
})

assert resp.headers["Content-Type"] == "application/geo+json"

pystac.validation.stac_validator.JsonSchemaSTACValidator().validate(
stac_dict=resp_data,
stac_object_type=pystac.STACObjectType.ITEM,
stac_version=resp_data.get("stac_version", "0.9.0"),
extensions=resp_data.get("stac_extensions", []),
)

@mock.patch("time.time", mock.MagicMock(return_value=1234))
@pytest.mark.parametrize("backend_config_overrides", [{"url_signer": UrlSigner(secret="123&@#", expiration=1000)}])
def test_download_ml_model_metadata_signed(self, flask_app, api110, backend_config_overrides):
Expand Down

0 comments on commit a02520e

Please sign in to comment.