diff --git a/backend/populate_catalog/task.py b/backend/populate_catalog/task.py index 75a22a069..af9c519b7 100644 --- a/backend/populate_catalog/task.py +++ b/backend/populate_catalog/task.py @@ -1,7 +1,5 @@ from json import dumps -from os.path import join from typing import TYPE_CHECKING -from urllib.parse import urlparse import boto3 from pystac import STAC_IO, Catalog, CatalogType, Collection, Item # type: ignore[import] @@ -72,29 +70,20 @@ class UnhandledSQSMessageException(Exception): class GeostoreSTACLayoutStrategy(HrefLayoutStrategy): def get_catalog_href(self, cat: Catalog, parent_dir: str, is_root: bool) -> str: - original_path = urlparse(cat.get_self_href()).path.rsplit("/", maxsplit=2) - if is_root: - cat_root = parent_dir - else: - cat_root = join(parent_dir, original_path[-2]) - - return join(cat_root, original_path[-1]) + return str(cat.get_self_href()) def get_collection_href(self, col: Collection, parent_dir: str, is_root: bool) -> str: - original_path = urlparse(col.get_self_href()).path.rsplit("/", maxsplit=2) assert not is_root - return join(parent_dir, *original_path[-2:]) + return str(col.get_self_href()) def get_item_href(self, item: Item, parent_dir: str) -> str: - original_path = item.get_self_href().split("/") - return join(parent_dir, original_path[-1]) + return str(item.get_self_href()) def handle_dataset(version_metadata_key: str) -> None: """Handle writing a new dataset version to the dataset catalog""" storage_bucket_path = f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}" dataset_prefix = version_metadata_key.split("/", maxsplit=1)[0] - dataset_catalog = Catalog.from_file(f"{storage_bucket_path}/{dataset_prefix}/{CATALOG_KEY}") dataset_version_metadata = STAC_IO.read_stac_object( diff --git a/tests/test_populate_catalog.py b/tests/test_populate_catalog.py index 0a0ef661f..35ee859e6 100644 --- a/tests/test_populate_catalog.py +++ b/tests/test_populate_catalog.py @@ -225,10 +225,59 @@ def should_update_existing_root_catalog(subtests: SubTests) -> None: @mark.infrastructure def should_update_dataset_catalog_with_new_version_catalog(subtests: SubTests) -> None: - + collection_filename = f"{any_safe_filename()}.json" + item_filename = f"{any_safe_filename()}.json" dataset_version = any_dataset_version_id() - filename = f"{any_safe_filename()}.json" + catalog_filename = f"{any_safe_filename()}.json" with Dataset() as dataset, S3Object( + file_object=json_dict_to_file_object( + { + **deepcopy(MINIMAL_VALID_STAC_ITEM_OBJECT), + STAC_ID_KEY: any_dataset_version_id(), + STAC_LINKS_KEY: [ + { + STAC_REL_KEY: STAC_REL_ROOT, + STAC_HREF_KEY: f"./{catalog_filename}", + STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, + }, + { + STAC_REL_KEY: STAC_REL_PARENT, + STAC_HREF_KEY: f"./{collection_filename}", + STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, + }, + ], + } + ), + bucket_name=ResourceName.STORAGE_BUCKET_NAME.value, + key=f"{dataset.dataset_prefix}/{dataset_version}/{item_filename}", + ), S3Object( + file_object=json_dict_to_file_object( + { + **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT), + STAC_ID_KEY: dataset_version, + STAC_TITLE_KEY: dataset.title, + STAC_LINKS_KEY: [ + { + STAC_REL_KEY: STAC_REL_ROOT, + STAC_HREF_KEY: f"./{catalog_filename}", + STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, + }, + { + STAC_REL_KEY: STAC_REL_ITEM, + STAC_HREF_KEY: f"./{item_filename}", + STAC_TYPE_KEY: STAC_MEDIA_TYPE_GEOJSON, + }, + { + STAC_REL_KEY: STAC_REL_PARENT, + STAC_HREF_KEY: f"./{catalog_filename}", + STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, + }, + ], + } + ), + bucket_name=ResourceName.STORAGE_BUCKET_NAME.value, + key=f"{dataset.dataset_prefix}/{dataset_version}/{collection_filename}", + ), S3Object( file_object=json_dict_to_file_object( { **deepcopy(MINIMAL_VALID_STAC_CATALOG_OBJECT), @@ -237,14 +286,19 @@ def should_update_dataset_catalog_with_new_version_catalog(subtests: SubTests) - STAC_LINKS_KEY: [ { STAC_REL_KEY: STAC_REL_ROOT, - STAC_HREF_KEY: f"./{filename}", + STAC_HREF_KEY: f"./{catalog_filename}", + STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, + }, + { + STAC_REL_KEY: STAC_REL_CHILD, + STAC_HREF_KEY: f"./{collection_filename}", STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, }, ], } ), bucket_name=ResourceName.STORAGE_BUCKET_NAME.value, - key=f"{dataset.dataset_prefix}/{dataset_version}/{filename}", + key=f"{dataset.dataset_prefix}/{dataset_version}/{catalog_filename}", ) as dataset_version_metadata, S3Object( file_object=json_dict_to_file_object( { @@ -305,7 +359,7 @@ def should_update_dataset_catalog_with_new_version_catalog(subtests: SubTests) - }, { STAC_REL_KEY: STAC_REL_CHILD, - STAC_HREF_KEY: f"./{dataset_version}/{filename}", + STAC_HREF_KEY: f"./{dataset_version}/{catalog_filename}", STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, }, ] @@ -315,6 +369,11 @@ def should_update_dataset_catalog_with_new_version_catalog(subtests: SubTests) - STAC_HREF_KEY: f"../../{CATALOG_KEY}", STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, }, + { + STAC_REL_KEY: STAC_REL_CHILD, + STAC_HREF_KEY: f"./{collection_filename}", + STAC_TYPE_KEY: STAC_MEDIA_TYPE_JSON, + }, { STAC_REL_KEY: STAC_REL_PARENT, STAC_HREF_KEY: f"../{CATALOG_KEY}",