Skip to content

Commit

Permalink
fix: support null metadata and double lookup for write datasets (#3968)
Browse files Browse the repository at this point in the history
* fix: support null metadata and double lookup for write datasets

* PR changes

* split test into 2
  • Loading branch information
ebezzi authored Jan 13, 2023
1 parent ceb0379 commit e14aa76
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 43 deletions.
7 changes: 5 additions & 2 deletions backend/layers/business/business_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
CollectionMetadata,
CollectionVersion,
CollectionVersionId,
CollectionVersionWithDatasets,
DatasetArtifact,
DatasetArtifactId,
DatasetId,
Expand All @@ -29,13 +30,15 @@ def get_collections(self, filter: CollectionQueryFilter) -> Iterable[CollectionV
def get_published_collection_version(self, collection_id: CollectionId) -> CollectionVersion:
pass

def get_collection_version(self, version_id: CollectionVersionId) -> CollectionVersion:
def get_collection_version(self, version_id: CollectionVersionId) -> CollectionVersionWithDatasets:
pass

def get_collection_versions_from_canonical(self, collection_id: CollectionId) -> Iterable[CollectionVersion]:
pass

def get_collection_version_from_canonical(self, collection_id: CollectionId) -> Optional[CollectionVersion]:
def get_collection_version_from_canonical(
self, collection_id: CollectionId
) -> Optional[CollectionVersionWithDatasets]:
pass

def create_collection(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
CollectionVersionId,
CollectionVersionWithDatasets,
DatasetVersion,
DatasetVersionId,
)
from backend.portal.api.curation.v1.curation.collections.common import (
get_infered_dataset_version,
Expand Down Expand Up @@ -63,26 +62,34 @@ def get(collection_id: str, dataset_id: str = None):


def _get_collection_and_dataset(
business_logic: BusinessLogicInterface, collection_id: CollectionVersionId, dataset_id: DatasetVersionId
business_logic: BusinessLogicInterface, collection_id: str, dataset_id: str
) -> Tuple[CollectionVersionWithDatasets, DatasetVersion]:
dataset_version = business_logic.get_dataset_version(dataset_id)
if dataset_version is None:
raise ForbiddenHTTPException()
"""
Get collection and dataset by their ids. Will look up by both version and canonical id for both.
"""

collection_version = business_logic.get_collection_version(collection_id)
collection_version = business_logic.get_collection_version_from_canonical(CollectionId(collection_id))
if collection_version is None:
collection_version = business_logic.get_collection_version(CollectionVersionId(collection_id))
if collection_version is None:
raise ForbiddenHTTPException()

# Extract the dataset from the dataset list.
try:
dataset_version = next(
d for d in collection_version.datasets if d.version_id.id == dataset_id or d.dataset_id.id == dataset_id
)
except StopIteration:
raise ForbiddenHTTPException()

return collection_version, dataset_version


def delete(token_info: dict, collection_id: str, dataset_id: str = None):
def delete(token_info: dict, collection_id: str, dataset_id: str):
business_logic = get_business_logic()
user_info = UserInfo(token_info)

collection_version, dataset_version = _get_collection_and_dataset(
business_logic, CollectionVersionId(collection_id), DatasetVersionId(dataset_id)
)
collection_version, dataset_version = _get_collection_and_dataset(business_logic, collection_id, dataset_id)

if not user_info.is_user_owner_or_allowed(collection_version.owner):
raise ForbiddenHTTPException("Unauthorized")
Expand All @@ -105,9 +112,7 @@ def put(collection_id: str, dataset_id: str, body: dict, token_info: dict):
url = body.get("url", body.get("link"))
business_logic = get_business_logic()

collection_version, _ = _get_collection_and_dataset(
business_logic, CollectionVersionId(collection_id), DatasetVersionId(dataset_id)
)
collection_version, dataset_version = _get_collection_and_dataset(business_logic, collection_id, dataset_id)

if not UserInfo(token_info).is_user_owner_or_allowed(collection_version.owner):
raise ForbiddenHTTPException()
Expand All @@ -117,7 +122,7 @@ def put(collection_id: str, dataset_id: str, body: dict, token_info: dict):
collection_version.version_id,
url,
None,
None if dataset_id is None else DatasetVersionId(dataset_id),
None if dataset_id is None else dataset_version.version_id,
)
return Response(status=202)
except CollectionNotFoundException:
Expand Down
18 changes: 10 additions & 8 deletions backend/portal/api/curation/v1/curation/collections/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,16 @@ def reshape_dataset_for_curation_api(
else:
columns = EntityColumns.dataset_metadata_cols

# Get dataset metadata fields
for column in columns:
col = getattr(dataset_version.metadata, column)
if isinstance(col, OntologyTermId):
col = [asdict(col)]
elif isinstance(col, list) and len(col) != 0 and isinstance(col[0], OntologyTermId):
col = [asdict(i) for i in col]
ds[column] = col
# Get dataset metadata fields.
# Metadata can be None if the dataset isn't still fully processed, so we account for that
if dataset_version.metadata is not None:
for column in columns:
col = getattr(dataset_version.metadata, column)
if isinstance(col, OntologyTermId):
col = [asdict(col)]
elif isinstance(col, list) and len(col) != 0 and isinstance(col[0], OntologyTermId):
col = [asdict(i) for i in col]
ds[column] = col

# Get none preview specific dataset fields
if not preview:
Expand Down
72 changes: 53 additions & 19 deletions tests/unit/backend/layers/api/test_curation_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,26 +952,50 @@ def test__update_public_collection_owner__405(self):


class TestDeleteDataset(BaseAPIPortalTest):
def test__delete_dataset(self):
auth_credentials = [
def setUp(self):
super().setUp()
self.auth_credentials = [
(self.make_super_curator_header, "super", 202),
(self.make_owner_header, "owner", 202),
(None, "none", 401),
(self.make_not_owner_header, "not_owner", 403),
]
for auth, auth_description, expected_status_code in auth_credentials:

def _delete(self, auth, collection_id, dataset_id):
"""
Helper method to call the delete endpoint
"""
test_url = f"/curation/v1/collections/{collection_id}/datasets/{dataset_id}"
headers = auth() if callable(auth) else auth
return self.app.delete(test_url, headers=headers)

def test__delete_dataset_by_version_id(self):
"""
Calling DELETE /collections/:collection_id/datasets/:dataset_id should work according to the
auth token passed and when using versioned ids
"""
for auth, auth_description, expected_status_code in self.auth_credentials:
with self.subTest(f"{auth_description} {expected_status_code}"):
dataset = self.generate_dataset(
statuses=[DatasetStatusUpdate(DatasetStatusKey.UPLOAD, DatasetUploadStatus.UPLOADING)],
publish=False,
)
response = self._delete(auth, dataset.collection_version_id, dataset.dataset_version_id)
self.assertEqual(expected_status_code, response.status_code)

test_url = (
f"/curation/v1/collections/{dataset.collection_version_id}/datasets/"
f"{dataset.dataset_version_id}"
def test__delete_dataset_by_canonical_id(self):
"""
Calling DELETE /collections/:collection_id/datasets/:dataset_id should work according to the
auth token passed and when using canonical ids. In this case, the unpublished collection
version will be looked up and used for deletion.
"""
for auth, auth_description, expected_status_code in self.auth_credentials:
with self.subTest(f"{auth_description} {expected_status_code}"):
dataset = self.generate_dataset(
statuses=[DatasetStatusUpdate(DatasetStatusKey.UPLOAD, DatasetUploadStatus.UPLOADING)],
publish=False,
)
headers = auth() if callable(auth) else auth
response = self.app.delete(test_url, headers=headers)
response = self._delete(auth, dataset.collection_id, dataset.dataset_id)
self.assertEqual(expected_status_code, response.status_code)


Expand Down Expand Up @@ -1191,17 +1215,27 @@ def test__new_from_link__OK(self, *mocks):
Calling PUT /datasets/:dataset_id should succeed if a valid link is uploaded by the owner of the collection
"""

dataset = self.generate_dataset(
statuses=[DatasetStatusUpdate(DatasetStatusKey.PROCESSING, DatasetProcessingStatus.INITIALIZED)],
)
body = {"link": self.good_link}
headers = self.make_owner_header()
response = self.app.put(
f"/curation/v1/collections/{dataset.collection_version_id}/datasets/{dataset.dataset_version_id}",
json=body,
headers=headers,
)
self.assertEqual(202, response.status_code)
def _test_create(collection_id, dataset_id):
body = {"link": self.good_link}
headers = self.make_owner_header()
response = self.app.put(
f"/curation/v1/collections/{collection_id}/datasets/{dataset_id}",
json=body,
headers=headers,
)
self.assertEqual(202, response.status_code)

with self.subTest("with version_ids"):
dataset = self.generate_dataset(
statuses=[DatasetStatusUpdate(DatasetStatusKey.PROCESSING, DatasetProcessingStatus.INITIALIZED)],
)
_test_create(dataset.collection_version_id, dataset.dataset_version_id)

with self.subTest("with collection_ids"):
dataset = self.generate_dataset(
statuses=[DatasetStatusUpdate(DatasetStatusKey.PROCESSING, DatasetProcessingStatus.INITIALIZED)],
)
_test_create(dataset.collection_id, dataset.dataset_id)

def test__new_from_link__Super_Curator(self, *mocks):
"""
Expand Down

0 comments on commit e14aa76

Please sign in to comment.