Skip to content

Commit

Permalink
feat: Add support for DCAT creator field in dataset metadata
Browse files Browse the repository at this point in the history
- Added fields to store creator details (name, email, URL, and identifier) in the DCAT profile.
- Implemented functionality to serialize and deserialize creator information similar to the publisher.
- Updated RDF generation logic to include creator fields in the output graph.
- Enhanced unit tests to verify proper handling and serialization of creator metadata.
  • Loading branch information
Hans-Chrstian committed Oct 3, 2024
1 parent d5b8e92 commit d8461e2
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 118 deletions.
16 changes: 8 additions & 8 deletions ckanext/dcat/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def dcat_to_ckan(dcat_dict):
elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})

if dcat_publisher.get('mbox'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})
if dcat_publisher.get('email'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('email')})

if dcat_publisher.get('identifier'):
package_dict['extras'].append({
Expand All @@ -45,8 +45,8 @@ def dcat_to_ckan(dcat_dict):
if dcat_creator.get('name'):
package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator.get('name')})

if dcat_creator.get('mbox'):
package_dict['extras'].append({'key': 'dcat_creator_email', 'value': dcat_creator.get('mbox')})
if dcat_creator.get('email'):
package_dict['extras'].append({'key': 'dcat_creator_email', 'value': dcat_creator.get('email')})

if dcat_creator.get('identifier'):
package_dict['extras'].append({
Expand Down Expand Up @@ -106,7 +106,7 @@ def ckan_to_dcat(package_dict):
dcat_dict['publisher']['name'] = extra['value']

elif extra['key'] == 'dcat_publisher_email':
dcat_dict['publisher']['mbox'] = extra['value']
dcat_dict['publisher']['email'] = extra['value']

elif extra['key'] == 'dcat_publisher_id':
dcat_dict['publisher']['identifier'] = extra['value']
Expand All @@ -116,7 +116,7 @@ def ckan_to_dcat(package_dict):
dcat_dict['creator']['name'] = extra['value']

elif extra['key'] == 'dcat_creator_email':
dcat_dict['creator']['mbox'] = extra['value']
dcat_dict['creator']['email'] = extra['value']

elif extra['key'] == 'dcat_creator_id':
dcat_dict['creator']['identifier'] = extra['value']
Expand All @@ -129,13 +129,13 @@ def ckan_to_dcat(package_dict):
if not dcat_dict['publisher'].get('name') and package_dict.get('maintainer'):
dcat_dict['publisher']['name'] = package_dict.get('maintainer')
if package_dict.get('maintainer_email'):
dcat_dict['publisher']['mbox'] = package_dict.get('maintainer_email')
dcat_dict['publisher']['email'] = package_dict.get('maintainer_email')

# Fallback for creator (if no name in extras, optionally use author)
if not dcat_dict['creator'].get('name') and package_dict.get('author'):
dcat_dict['creator']['name'] = package_dict.get('author')
if package_dict.get('author_email'):
dcat_dict['creator']['mbox'] = package_dict.get('author_email')
dcat_dict['creator']['email'] = package_dict.get('author_email')

dcat_dict['distribution'] = []
for resource in package_dict.get('resources', []):
Expand Down
116 changes: 58 additions & 58 deletions ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):

# Basic fields
for key, predicate in (
("title", DCT.title),
("notes", DCT.description),
("url", DCAT.landingPage),
("version", OWL.versionInfo),
("title", DCT.title),
("notes", DCT.description),
("url", DCAT.landingPage),
("version", OWL.versionInfo),
):
value = self._object_value(dataset_ref, predicate)
if value:
Expand All @@ -78,30 +78,30 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):

# Simple values
for key, predicate in (
("issued", DCT.issued),
("modified", DCT.modified),
("identifier", DCT.identifier),
("version_notes", ADMS.versionNotes),
("frequency", DCT.accrualPeriodicity),
("provenance", DCT.provenance),
("dcat_type", DCT.type),
("issued", DCT.issued),
("modified", DCT.modified),
("identifier", DCT.identifier),
("version_notes", ADMS.versionNotes),
("frequency", DCT.accrualPeriodicity),
("provenance", DCT.provenance),
("dcat_type", DCT.type),
):
value = self._object_value(dataset_ref, predicate)
if value:
dataset_dict["extras"].append({"key": key, "value": value})

# Lists
for key, predicate, in (
("language", DCT.language),
("theme", DCAT.theme),
("alternate_identifier", ADMS.identifier),
("conforms_to", DCT.conformsTo),
("documentation", FOAF.page),
("related_resource", DCT.relation),
("has_version", DCT.hasVersion),
("is_version_of", DCT.isVersionOf),
("source", DCT.source),
("sample", ADMS.sample),
("language", DCT.language),
("theme", DCAT.theme),
("alternate_identifier", ADMS.identifier),
("conforms_to", DCT.conformsTo),
("documentation", FOAF.page),
("related_resource", DCT.relation),
("has_version", DCT.hasVersion),
("is_version_of", DCT.isVersionOf),
("source", DCT.source),
("sample", ADMS.sample),
):
values = self._object_value_list(dataset_ref, predicate)
if values:
Expand Down Expand Up @@ -177,14 +177,14 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):

# Simple values
for key, predicate in (
("name", DCT.title),
("description", DCT.description),
("access_url", DCAT.accessURL),
("download_url", DCAT.downloadURL),
("issued", DCT.issued),
("modified", DCT.modified),
("status", ADMS.status),
("license", DCT.license),
("name", DCT.title),
("description", DCT.description),
("access_url", DCAT.accessURL),
("download_url", DCAT.downloadURL),
("issued", DCT.issued),
("modified", DCT.modified),
("status", ADMS.status),
("license", DCT.license),
):
value = self._object_value(distribution, predicate)
if value:
Expand All @@ -195,9 +195,9 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
) or self._object_value(distribution, DCAT.accessURL)
# Lists
for key, predicate in (
("language", DCT.language),
("documentation", FOAF.page),
("conforms_to", DCT.conformsTo),
("language", DCT.language),
("documentation", FOAF.page),
("conforms_to", DCT.conformsTo),
):
values = self._object_value_list(distribution, predicate)
if values:
Expand Down Expand Up @@ -252,10 +252,10 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
# versions of the ckanext-dcat parsers
for extra in dataset_dict["extras"]:
if extra["key"] in (
"issued",
"modified",
"publisher_name",
"publisher_email",
"issued",
"modified",
"publisher_name",
"publisher_email",
):
extra["key"] = "dcat_" + extra["key"]

Expand Down Expand Up @@ -315,15 +315,15 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):

# Contact details
if any(
[
self._get_dataset_value(dataset_dict, "contact_uri"),
self._get_dataset_value(dataset_dict, "contact_name"),
self._get_dataset_value(dataset_dict, "contact_email"),
self._get_dataset_value(dataset_dict, "maintainer"),
self._get_dataset_value(dataset_dict, "maintainer_email"),
self._get_dataset_value(dataset_dict, "author"),
self._get_dataset_value(dataset_dict, "author_email"),
]
[
self._get_dataset_value(dataset_dict, "contact_uri"),
self._get_dataset_value(dataset_dict, "contact_name"),
self._get_dataset_value(dataset_dict, "contact_email"),
self._get_dataset_value(dataset_dict, "maintainer"),
self._get_dataset_value(dataset_dict, "maintainer_email"),
self._get_dataset_value(dataset_dict, "author"),
self._get_dataset_value(dataset_dict, "author_email"),
]
):

contact_uri = self._get_dataset_value(dataset_dict, "contact_uri")
Expand Down Expand Up @@ -360,10 +360,10 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
# Scheming publisher field: will be handled in a separate profile
pass
elif any(
[
self._get_dataset_value(dataset_dict, "publisher_uri"),
self._get_dataset_value(dataset_dict, "publisher_name"),
]
[
self._get_dataset_value(dataset_dict, "publisher_uri"),
self._get_dataset_value(dataset_dict, "publisher_name"),
]
):
# Legacy publisher_* extras
publisher_uri = self._get_dataset_value(dataset_dict, "publisher_uri")
Expand Down Expand Up @@ -425,10 +425,10 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
# Scheming publisher field: will be handled in a separate profile
pass
elif any(
[
self._get_dataset_value(dataset_dict, "creator_uri"),
self._get_dataset_value(dataset_dict, "creator_name"),
]
[
self._get_dataset_value(dataset_dict, "creator_uri"),
self._get_dataset_value(dataset_dict, "creator_name"),
]
):
# Legacy creator_* extras
creator_uri = self._get_dataset_value(dataset_dict, "creator_uri")
Expand Down Expand Up @@ -492,11 +492,11 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
resource_license_fallback = None
if toolkit.asbool(config.get(DISTRIBUTION_LICENSE_FALLBACK_CONFIG, False)):
if "license_id" in dataset_dict and isinstance(
URIRefOrLiteral(dataset_dict["license_id"]), URIRef
URIRefOrLiteral(dataset_dict["license_id"]), URIRef
):
resource_license_fallback = dataset_dict["license_id"]
elif "license_url" in dataset_dict and isinstance(
URIRefOrLiteral(dataset_dict["license_url"]), URIRef
URIRefOrLiteral(dataset_dict["license_url"]), URIRef
):
resource_license_fallback = dataset_dict["license_url"]

Expand Down Expand Up @@ -559,9 +559,9 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
# check which type is appropriate.
if fmt and (not mimetype or mimetype == fmt):
if (
"iana.org/assignments/media-types" in fmt
or not fmt.startswith("http")
and "/" in fmt
"iana.org/assignments/media-types" in fmt
or not fmt.startswith("http")
and "/" in fmt
):
# output format value as dcat:mediaType instead of dct:format
mimetype = fmt
Expand Down
16 changes: 8 additions & 8 deletions ckanext/dcat/profiles/schemaorg.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,11 @@ def _agent_graph(self, dataset_ref, dataset_dict, agent_type, schema_property_pr
identifier_key = f"{schema_property_prefix}_identifier"

if any(
[
self._get_dataset_value(dataset_dict, uri_key),
self._get_dataset_value(dataset_dict, name_key),
dataset_dict.get("organization"),
]
[
self._get_dataset_value(dataset_dict, uri_key),
self._get_dataset_value(dataset_dict, name_key),
dataset_dict.get("organization"),
]
):
agent_uri = self._get_dataset_value(dataset_dict, uri_key)
agent_uri_fallback = publisher_uri_organization_fallback(dataset_dict)
Expand All @@ -188,9 +188,9 @@ def _agent_graph(self, dataset_ref, dataset_dict, agent_type, schema_property_pr
self.g.add((dataset_ref, agent_type, agent_details))

if (
not agent_name
and not agent_uri
and dataset_dict.get("organization")
not agent_name
and not agent_uri
and dataset_dict.get("organization")
):
agent_name = dataset_dict["organization"]["title"]
self.g.add((agent_details, SCHEMA.name, Literal(agent_name)))
Expand Down
34 changes: 0 additions & 34 deletions ckanext/dcat/schemas/dcat_ap_recommended.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,40 +72,6 @@ dataset_fields:
help_text: Unique identifier for the publisher, such as a ROR ID.
help_text: Entity responsible for making the dataset available.

- field_name: creator
label: Creator
repeating_label: Creator
repeating_once: true
repeating_subfields:

- field_name: uri
label: URI
help_text: URI of the creator, if available.

- field_name: name
label: Name
help_text: Name of the entity or person who created the dataset.

- field_name: email
label: Email
display_snippet: email.html
help_text: Contact email of the creator.

- field_name: url
label: URL
display_snippet: link.html
help_text: URL for more information about the creator.

- field_name: type
label: Type
help_text: Type of creator (e.g., Organization, Person).

- field_name: identifier
label: Identifier
help_text: Unique identifier for the creator, such as an ORCID or ROR ID.

help_text: Entity responsible for creating the dataset.

- field_name: license_id
label: License
form_snippet: license.html
Expand Down
4 changes: 2 additions & 2 deletions ckanext/dcat/tests/profiles/base/test_base_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ def test_publisher_foaf(self):

p = RDFProfile(g)

publisher = p._publisher(URIRef('http://example.org'), DCT.publisher)
publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher)

assert publisher['uri'] == 'http://orgs.vocab.org/some-org'
assert publisher['name'] == 'Publishing Organization for dataset 1'
Expand Down Expand Up @@ -688,7 +688,7 @@ def test_publisher_ref(self):

p = RDFProfile(g)

publisher = p._publisher(URIRef('http://example.org'), DCT.publisher)
publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher)

assert publisher['uri'] == 'http://orgs.vocab.org/some-org'

Expand Down
2 changes: 2 additions & 0 deletions examples/ckan/ckan_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
{"key": "guid", "value": "9df8df51-63db-37a8-e044-0003ba9b0d98"},
{"key": "dcat_publisher_name", "value": "Geological Society"},
{"key": "dcat_publisher_email", "value": "info@gs.org"},
{"key": "dcat_creator_name", "value": "John Doe"},
{"key": "dcat_creator_email", "value": "johndoe@example.com"},
{"key": "language", "value": "en,es,ca"}
],
"resources": [{"id": "b1e0b666-b7f4-44c1-9b16-56c78e86b66a",
Expand Down
18 changes: 12 additions & 6 deletions examples/ckan/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,17 @@
"keyword" : ["exploration", "geochemical-exploration", "geochemical-maps", "geochemistry", "geology", "nercddc", "regional-geology"],
"publisher": {
"name": "Geological Society",
"mbox": "info@gs.org"
"email": "info@gs.org"
},
"distribution": [{"accessURL": "http://www.bgs.ac.uk/gbase/geochemcd/home.html",
"byteSize": null,
"description": "Resource locator",
"format": "text/html",
"title": ""}]
"creator": {
"name": "John Doe",
"email": "johndoe@example.com"
},
"distribution": [{
"accessURL": "http://www.bgs.ac.uk/gbase/geochemcd/home.html",
"byteSize": null,
"description": "Resource locator",
"format": "text/html",
"title": ""
}]
}
4 changes: 2 additions & 2 deletions examples/ckan/full_ckan_dataset.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"author": null,
"author_email": null,
"author": "John Doe",
"author_email": "johndoe@example.com",
"extras": [
{
"__extras": {
Expand Down

0 comments on commit d8461e2

Please sign in to comment.