diff --git a/docs/api.md b/docs/api.md index a8e4b32..2ee00f2 100644 --- a/docs/api.md +++ b/docs/api.md @@ -51,6 +51,8 @@ .. automodule:: zen3geo.datapipes.pystac_client .. autoclass:: zen3geo.datapipes.PySTACAPISearcher .. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPISearcherIterDataPipe +.. autoclass:: zen3geo.datapipes.PySTACAPIItemLister +.. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPIItemListerIterDataPipe :show-inheritance: ``` diff --git a/docs/object-detection-boxes.md b/docs/object-detection-boxes.md index cef4779..8bdcdba 100644 --- a/docs/object-detection-boxes.md +++ b/docs/object-detection-boxes.md @@ -137,12 +137,12 @@ catalog = pystac_client.Client.open( url="https://planetarycomputer.microsoft.com/api/stac/v1", modifier=planetary_computer.sign_inplace, ) -items = catalog.search( +search = catalog.search( collections=["ms-buildings"], query={"msbuildings:region": {"eq": "Brunei"}}, intersects=shapely.geometry.box(minx=114.94, miny=4.88, maxx=114.95, maxy=4.89), ) -item = next(items.get_items()) +item = next(search.items()) item ``` diff --git a/zen3geo/datapipes/__init__.py b/zen3geo/datapipes/__init__.py index 46e36e0..60ffb81 100644 --- a/zen3geo/datapipes/__init__.py +++ b/zen3geo/datapipes/__init__.py @@ -12,6 +12,7 @@ from zen3geo.datapipes.pyogrio import PyogrioReaderIterDataPipe as PyogrioReader from zen3geo.datapipes.pystac import PySTACItemReaderIterDataPipe as PySTACItemReader from zen3geo.datapipes.pystac_client import ( + PySTACAPIItemListerIterDataPipe as PySTACAPIItemLister, PySTACAPISearcherIterDataPipe as PySTACAPISearcher, ) from zen3geo.datapipes.rioxarray import RioXarrayReaderIterDataPipe as RioXarrayReader diff --git a/zen3geo/datapipes/pystac.py b/zen3geo/datapipes/pystac.py index 8f0396e..c01b39f 100644 --- a/zen3geo/datapipes/pystac.py +++ b/zen3geo/datapipes/pystac.py @@ -29,8 +29,9 @@ class PySTACItemReaderIterDataPipe(IterDataPipe): Yields ------ stac_item : pystac.Item - An :py:class:`pystac.Item` object containing the specific STACObject - implementation class represented in a JSON format. + A :py:class:`pystac.Item` object containing the specific + :py:class:`pystac.STACObject` implementation class represented in a + JSON format. Raises ------ diff --git a/zen3geo/datapipes/pystac_client.py b/zen3geo/datapipes/pystac_client.py index 3989959..0fff369 100644 --- a/zen3geo/datapipes/pystac_client.py +++ b/zen3geo/datapipes/pystac_client.py @@ -78,7 +78,7 @@ class PySTACAPISearcherIterDataPipe(IterDataPipe): ... >>> # Peform STAC API query using DataPipe >>> query = dict( - ... bbox=[174.5, -41.37, 174.9, -41.19], + ... bbox=[174.5, -41.37, 174.9, -41.19], # xmin, ymin, xmax, ymax ... datetime=["2012-02-20T00:00:00Z", "2022-12-22T00:00:00Z"], ... collections=["cop-dem-glo-30"], ... ) @@ -133,3 +133,101 @@ def __iter__(self) -> Iterator: def __len__(self) -> int: return len(self.source_datapipe) + + +@functional_datapipe("list_pystac_items_by_search") +class PySTACAPIItemListerIterDataPipe(IterDataPipe): + """ + Lists the :py:class:`pystac.Item` objects that match the provided STAC API + search parameters (functional name: ``list_pystac_items_by_search``). + + Parameters + ---------- + source_datapipe : IterDataPipe[pystac_client.ItemSearch] + A DataPipe that contains :py:class:`pystac_client.ItemSearch` object + instances that represents + a deferred query to a STAC search endpoint as described in the + `STAC API - Item Search spec `_. + + Yields + ------ + stac_item : pystac.Item + A :py:class:`pystac.Item` object containing the specific + :py:class:`pystac.STACObject` implementation class represented in a + JSON format. + + Raises + ------ + ModuleNotFoundError + If ``pystac_client`` is not installed. See + :doc:`install instructions for pystac-client `, + (e.g. via ``pip install pystac-client``) before using this class. + + Example + ------- + >>> import pytest + >>> pystac_client = pytest.importorskip("pystac_client") + ... + >>> from torchdata.datapipes.iter import IterableWrapper + >>> from zen3geo.datapipes import PySTACAPIItemLister + ... + >>> # List STAC Items from a STAC API query + >>> catalog = pystac_client.Client.open( + ... url="https://explorer.digitalearth.africa/stac/" + ... ) + >>> search = catalog.search( + ... bbox=[57.2, -20.6, 57.9, -19.9], # xmin, ymin, xmax, ymax + ... datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"], + ... collections=["s2_l2a"], + ... ) + >>> dp = IterableWrapper(iterable=[search]) + >>> dp_pystac_item_list = dp.list_pystac_items_by_search() + ... + >>> # Loop or iterate over the DataPipe stream + >>> it = iter(dp_pystac_item_list) + >>> stac_item = next(it) + >>> stac_item + + >>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE + {'title': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000', + 'gsd': 10, + 'proj:epsg': 32740, + 'platform': 'sentinel-2b', + 'view:off_nadir': 0, + 'instruments': ['msi'], + 'eo:cloud_cover': 0.02, + 'odc:file_format': 'GeoTIFF', + 'odc:region_code': '40KED', + 'constellation': 'sentinel-2', + 'sentinel:sequence': '0', + 'sentinel:utm_zone': 40, + 'sentinel:product_id': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000', + 'sentinel:grid_square': 'ED', + 'sentinel:data_coverage': 28.61, + 'sentinel:latitude_band': 'K', + 'created': '2023-01-03T06:24:53Z', + 'sentinel:valid_cloud_cover': True, + 'sentinel:boa_offset_applied': True, + 'sentinel:processing_baseline': '05.09', + 'proj:shape': [10980, 10980], + 'proj:transform': [10.0, 0.0, 499980.0, 0.0, -10.0, 7900000.0, 0.0, 0.0, 1.0], + 'datetime': '2023-01-03T06:24:53Z', + 'cubedash:region_code': '40KED'} + """ + + def __init__(self, source_datapipe): + if pystac_client is None: + raise ModuleNotFoundError( + "Package `pystac_client` is required to be installed to use this datapipe. " + "Please use `pip install pystac-client` or " + "`conda install -c conda-forge pystac-client` " + "to install the package" + ) + self.source_datapipe = source_datapipe + + def __iter__(self): + for item_search in self.source_datapipe: + yield from item_search.items() + + def __len__(self): + return sum(item_search.matched() for item_search in self.source_datapipe) diff --git a/zen3geo/tests/test_datapipes_pystac_client.py b/zen3geo/tests/test_datapipes_pystac_client.py index 9992aa9..211e342 100644 --- a/zen3geo/tests/test_datapipes_pystac_client.py +++ b/zen3geo/tests/test_datapipes_pystac_client.py @@ -4,10 +4,11 @@ import pytest from torchdata.datapipes.iter import IterableWrapper -from zen3geo.datapipes import PySTACAPISearcher +from zen3geo.datapipes import PySTACAPIItemLister, PySTACAPISearcher pystac_client = pytest.importorskip("pystac_client") + # %% def test_pystac_client_item_search(): """ @@ -85,3 +86,90 @@ def test_pystac_client_item_search_open_parameters(): stac_item_search = next(it) assert stac_item_search.client.title == "Radiant MLHub API" assert stac_item_search.client.description == "stac-fastapi" + + +def test_pystac_client_item_lister(): + """ + Ensure that PySTACAPIItemLister works to yield pystac.Item instances for + each item matching the given search parameters in a + pystac_client.ItemSearch query. + """ + catalog = pystac_client.Client.open( + url="https://earth-search.aws.element84.com/v1/" + ) + search = catalog.search( + bbox=[134.2, 6.9, 134.8, 8.5], + datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"], + collections=["sentinel-2-l1c"], + ) + dp = IterableWrapper(iterable=[search]) + + # Using class constructors + dp_pystac_item_list = PySTACAPIItemLister(source_datapipe=dp) + # Using functional form (recommended) + dp_pystac_item_list = dp.list_pystac_items_by_search() + + assert len(dp_pystac_item_list) == 14 + it = iter(dp_pystac_item_list) + stac_item = next(it) + assert stac_item.bbox == [ + 134.093840347073, + 6.2442879900058115, + 135.08840137750929, + 7.237809826458827, + ] + assert stac_item.datetime.isoformat() == "2023-01-29T01:35:24.640000+00:00" + assert stac_item.geometry["type"] == "Polygon" + assert stac_item.properties == { + "created": "2023-01-29T06:01:33.679Z", + "platform": "sentinel-2b", + "constellation": "sentinel-2", + "instruments": ["msi"], + "eo:cloud_cover": 92.7676417582305, + "proj:epsg": 32653, + "mgrs:utm_zone": 53, + "mgrs:latitude_band": "N", + "mgrs:grid_square": "MH", + "grid:code": "MGRS-53NMH", + "view:sun_azimuth": 135.719785438016, + "view:sun_elevation": 55.1713941690268, + "s2:degraded_msi_data_percentage": 0.2816, + "s2:product_type": "S2MSI1C", + "s2:processing_baseline": "05.09", + "s2:product_uri": "S2B_MSIL1C_20230129T013449_N0509_R031_T53NMH_20230129T025811.SAFE", + "s2:generation_time": "2023-01-29T02:58:11.000000Z", + "s2:datatake_id": "GS2B_20230129T013449_030802_N05.09", + "s2:datatake_type": "INS-NOBS", + "s2:datastrip_id": "S2B_OPER_MSI_L1C_DS_2BPS_20230129T025811_S20230129T013450_N05.09", + "s2:granule_id": "S2B_OPER_MSI_L1C_TL_2BPS_20230129T025811_A030802_T53NMH_N05.09", + "s2:reflectance_conversion_factor": 1.03193080888673, + "datetime": "2023-01-29T01:35:24.640000Z", + "s2:sequence": "0", + "earthsearch:s3_path": "s3://earthsearch-data/sentinel-2-l1c/53/N/MH/2023/1/S2B_53NMH_20230129_0_L1C", + "earthsearch:payload_id": "roda-sentinel2/workflow-sentinel2-to-stac/15626e44fb54c2182e5ed5d3aec4a209", + "processing:software": {"sentinel2-to-stac": "0.1.0"}, + "updated": "2023-01-29T06:01:33.679Z", + } + assert stac_item.assets["visual"].extra_fields["eo:bands"] == [ + { + "name": "red", + "common_name": "red", + "description": "Red (band 4)", + "center_wavelength": 0.665, + "full_width_half_max": 0.038, + }, + { + "name": "green", + "common_name": "green", + "description": "Green (band 3)", + "center_wavelength": 0.56, + "full_width_half_max": 0.045, + }, + { + "name": "blue", + "common_name": "blue", + "description": "Blue (band 2)", + "center_wavelength": 0.49, + "full_width_half_max": 0.098, + }, + ]