Skip to content

Commit

Permalink
✨ PySTACAPIItemLister to list STAC Items matching STAC API search (#111)
Browse files Browse the repository at this point in the history
* ✨ PySTACAPIItemLister to list STAC Items matching STAC API search

An iterable-style DataPipe to list STAC Items matching a STAC API search query! Calls pystac_client.ItemSearch.items() to yield pystac.Item instances. Included a doctest and a unit test that produces a list of STAC Items from a STAC API search that can be iterated over. Added a new section in the API docs too.

* 🚑 Fix typo on docs/api.md

Should be referencing `zen3geo.datapipes.pystac_client.PySTACAPIItemListerIterDataPipe`

* 📝 Use non-deprecated .items() in object-detection-boxes tutorial

PySTAC Client has renamed `ItemSearch.get_items()` to `ItemSearch.items()` in stac-utils/pystac-client#206, see also https://github.com/stac-utils/pystac-client/blob/v0.7.1/CHANGELOG.md#deprecated-1.

* 📝 Intersphinx link to pystac.STACObject in PySTACItemReader docs

Properly linking to https://pystac.readthedocs.io/en/1.0/api/pystac.html#pystac.STACObject in the docstring of PySTACItemReaderIterDataPipe.
  • Loading branch information
weiji14 committed Jun 20, 2023
1 parent 0caed05 commit 342e43f
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 6 deletions.
2 changes: 2 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
.. automodule:: zen3geo.datapipes.pystac_client
.. autoclass:: zen3geo.datapipes.PySTACAPISearcher
.. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPISearcherIterDataPipe
.. autoclass:: zen3geo.datapipes.PySTACAPIItemLister
.. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPIItemListerIterDataPipe
:show-inheritance:
```

Expand Down
4 changes: 2 additions & 2 deletions docs/object-detection-boxes.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ catalog = pystac_client.Client.open(
url="https://planetarycomputer.microsoft.com/api/stac/v1",
modifier=planetary_computer.sign_inplace,
)
items = catalog.search(
search = catalog.search(
collections=["ms-buildings"],
query={"msbuildings:region": {"eq": "Brunei"}},
intersects=shapely.geometry.box(minx=114.94, miny=4.88, maxx=114.95, maxy=4.89),
)
item = next(items.get_items())
item = next(search.items())
item
```

Expand Down
1 change: 1 addition & 0 deletions zen3geo/datapipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from zen3geo.datapipes.pyogrio import PyogrioReaderIterDataPipe as PyogrioReader
from zen3geo.datapipes.pystac import PySTACItemReaderIterDataPipe as PySTACItemReader
from zen3geo.datapipes.pystac_client import (
PySTACAPIItemListerIterDataPipe as PySTACAPIItemLister,
PySTACAPISearcherIterDataPipe as PySTACAPISearcher,
)
from zen3geo.datapipes.rioxarray import RioXarrayReaderIterDataPipe as RioXarrayReader
Expand Down
5 changes: 3 additions & 2 deletions zen3geo/datapipes/pystac.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ class PySTACItemReaderIterDataPipe(IterDataPipe):
Yields
------
stac_item : pystac.Item
An :py:class:`pystac.Item` object containing the specific STACObject
implementation class represented in a JSON format.
A :py:class:`pystac.Item` object containing the specific
:py:class:`pystac.STACObject` implementation class represented in a
JSON format.
Raises
------
Expand Down
100 changes: 99 additions & 1 deletion zen3geo/datapipes/pystac_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class PySTACAPISearcherIterDataPipe(IterDataPipe):
...
>>> # Peform STAC API query using DataPipe
>>> query = dict(
... bbox=[174.5, -41.37, 174.9, -41.19],
... bbox=[174.5, -41.37, 174.9, -41.19], # xmin, ymin, xmax, ymax
... datetime=["2012-02-20T00:00:00Z", "2022-12-22T00:00:00Z"],
... collections=["cop-dem-glo-30"],
... )
Expand Down Expand Up @@ -133,3 +133,101 @@ def __iter__(self) -> Iterator:

def __len__(self) -> int:
return len(self.source_datapipe)


@functional_datapipe("list_pystac_items_by_search")
class PySTACAPIItemListerIterDataPipe(IterDataPipe):
"""
Lists the :py:class:`pystac.Item` objects that match the provided STAC API
search parameters (functional name: ``list_pystac_items_by_search``).
Parameters
----------
source_datapipe : IterDataPipe[pystac_client.ItemSearch]
A DataPipe that contains :py:class:`pystac_client.ItemSearch` object
instances that represents
a deferred query to a STAC search endpoint as described in the
`STAC API - Item Search spec <https://github.com/radiantearth/stac-api-spec/tree/main/item-search>`_.
Yields
------
stac_item : pystac.Item
A :py:class:`pystac.Item` object containing the specific
:py:class:`pystac.STACObject` implementation class represented in a
JSON format.
Raises
------
ModuleNotFoundError
If ``pystac_client`` is not installed. See
:doc:`install instructions for pystac-client <pystac_client:index>`,
(e.g. via ``pip install pystac-client``) before using this class.
Example
-------
>>> import pytest
>>> pystac_client = pytest.importorskip("pystac_client")
...
>>> from torchdata.datapipes.iter import IterableWrapper
>>> from zen3geo.datapipes import PySTACAPIItemLister
...
>>> # List STAC Items from a STAC API query
>>> catalog = pystac_client.Client.open(
... url="https://explorer.digitalearth.africa/stac/"
... )
>>> search = catalog.search(
... bbox=[57.2, -20.6, 57.9, -19.9], # xmin, ymin, xmax, ymax
... datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"],
... collections=["s2_l2a"],
... )
>>> dp = IterableWrapper(iterable=[search])
>>> dp_pystac_item_list = dp.list_pystac_items_by_search()
...
>>> # Loop or iterate over the DataPipe stream
>>> it = iter(dp_pystac_item_list)
>>> stac_item = next(it)
>>> stac_item
<Item id=ec16dbf6-9729-5a8f-9d72-5e83a8b9f30d>
>>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE
{'title': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000',
'gsd': 10,
'proj:epsg': 32740,
'platform': 'sentinel-2b',
'view:off_nadir': 0,
'instruments': ['msi'],
'eo:cloud_cover': 0.02,
'odc:file_format': 'GeoTIFF',
'odc:region_code': '40KED',
'constellation': 'sentinel-2',
'sentinel:sequence': '0',
'sentinel:utm_zone': 40,
'sentinel:product_id': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000',
'sentinel:grid_square': 'ED',
'sentinel:data_coverage': 28.61,
'sentinel:latitude_band': 'K',
'created': '2023-01-03T06:24:53Z',
'sentinel:valid_cloud_cover': True,
'sentinel:boa_offset_applied': True,
'sentinel:processing_baseline': '05.09',
'proj:shape': [10980, 10980],
'proj:transform': [10.0, 0.0, 499980.0, 0.0, -10.0, 7900000.0, 0.0, 0.0, 1.0],
'datetime': '2023-01-03T06:24:53Z',
'cubedash:region_code': '40KED'}
"""

def __init__(self, source_datapipe):
if pystac_client is None:
raise ModuleNotFoundError(
"Package `pystac_client` is required to be installed to use this datapipe. "
"Please use `pip install pystac-client` or "
"`conda install -c conda-forge pystac-client` "
"to install the package"
)
self.source_datapipe = source_datapipe

def __iter__(self):
for item_search in self.source_datapipe:
yield from item_search.items()

def __len__(self):
return sum(item_search.matched() for item_search in self.source_datapipe)
90 changes: 89 additions & 1 deletion zen3geo/tests/test_datapipes_pystac_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
import pytest
from torchdata.datapipes.iter import IterableWrapper

from zen3geo.datapipes import PySTACAPISearcher
from zen3geo.datapipes import PySTACAPIItemLister, PySTACAPISearcher

pystac_client = pytest.importorskip("pystac_client")


# %%
def test_pystac_client_item_search():
"""
Expand Down Expand Up @@ -85,3 +86,90 @@ def test_pystac_client_item_search_open_parameters():
stac_item_search = next(it)
assert stac_item_search.client.title == "Radiant MLHub API"
assert stac_item_search.client.description == "stac-fastapi"


def test_pystac_client_item_lister():
"""
Ensure that PySTACAPIItemLister works to yield pystac.Item instances for
each item matching the given search parameters in a
pystac_client.ItemSearch query.
"""
catalog = pystac_client.Client.open(
url="https://earth-search.aws.element84.com/v1/"
)
search = catalog.search(
bbox=[134.2, 6.9, 134.8, 8.5],
datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"],
collections=["sentinel-2-l1c"],
)
dp = IterableWrapper(iterable=[search])

# Using class constructors
dp_pystac_item_list = PySTACAPIItemLister(source_datapipe=dp)
# Using functional form (recommended)
dp_pystac_item_list = dp.list_pystac_items_by_search()

assert len(dp_pystac_item_list) == 14
it = iter(dp_pystac_item_list)
stac_item = next(it)
assert stac_item.bbox == [
134.093840347073,
6.2442879900058115,
135.08840137750929,
7.237809826458827,
]
assert stac_item.datetime.isoformat() == "2023-01-29T01:35:24.640000+00:00"
assert stac_item.geometry["type"] == "Polygon"
assert stac_item.properties == {
"created": "2023-01-29T06:01:33.679Z",
"platform": "sentinel-2b",
"constellation": "sentinel-2",
"instruments": ["msi"],
"eo:cloud_cover": 92.7676417582305,
"proj:epsg": 32653,
"mgrs:utm_zone": 53,
"mgrs:latitude_band": "N",
"mgrs:grid_square": "MH",
"grid:code": "MGRS-53NMH",
"view:sun_azimuth": 135.719785438016,
"view:sun_elevation": 55.1713941690268,
"s2:degraded_msi_data_percentage": 0.2816,
"s2:product_type": "S2MSI1C",
"s2:processing_baseline": "05.09",
"s2:product_uri": "S2B_MSIL1C_20230129T013449_N0509_R031_T53NMH_20230129T025811.SAFE",
"s2:generation_time": "2023-01-29T02:58:11.000000Z",
"s2:datatake_id": "GS2B_20230129T013449_030802_N05.09",
"s2:datatake_type": "INS-NOBS",
"s2:datastrip_id": "S2B_OPER_MSI_L1C_DS_2BPS_20230129T025811_S20230129T013450_N05.09",
"s2:granule_id": "S2B_OPER_MSI_L1C_TL_2BPS_20230129T025811_A030802_T53NMH_N05.09",
"s2:reflectance_conversion_factor": 1.03193080888673,
"datetime": "2023-01-29T01:35:24.640000Z",
"s2:sequence": "0",
"earthsearch:s3_path": "s3://earthsearch-data/sentinel-2-l1c/53/N/MH/2023/1/S2B_53NMH_20230129_0_L1C",
"earthsearch:payload_id": "roda-sentinel2/workflow-sentinel2-to-stac/15626e44fb54c2182e5ed5d3aec4a209",
"processing:software": {"sentinel2-to-stac": "0.1.0"},
"updated": "2023-01-29T06:01:33.679Z",
}
assert stac_item.assets["visual"].extra_fields["eo:bands"] == [
{
"name": "red",
"common_name": "red",
"description": "Red (band 4)",
"center_wavelength": 0.665,
"full_width_half_max": 0.038,
},
{
"name": "green",
"common_name": "green",
"description": "Green (band 3)",
"center_wavelength": 0.56,
"full_width_half_max": 0.045,
},
{
"name": "blue",
"common_name": "blue",
"description": "Blue (band 2)",
"center_wavelength": 0.49,
"full_width_half_max": 0.098,
},
]

0 comments on commit 342e43f

Please sign in to comment.