Skip to content

Commit

Permalink
Enable retrieving synonyms on get_prefixes and implement `get_uri_p…
Browse files Browse the repository at this point in the history
…refixes` (#100)
  • Loading branch information
cthoyt authored Jan 17, 2024
1 parent edd8047 commit 0a22679
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 4 deletions.
46 changes: 46 additions & 0 deletions docs/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,52 @@ This works with both :class:`pathlib.Path` and vanilla strings.
urlretrieve(url, path)
converter = curies.load_shacl(path)
Introspecting on a Context
--------------------------
After loading a context, it's possible to get certain information out of the converter. For example, if you want to
get all of the CURIE prefixes from the converter, you can use :meth:`Converter.get_prefixes`:

.. code-block:: python
import curies
converter = curies.get_bioregistry_converter()
prefixes = converter.get_prefixes()
assert 'chebi' in prefixes
assert 'CHEBIID' not in prefixes, "No synonyms are included by default"
prefixes = converter.get_prefixes(include_synonyms=True)
assert 'chebi' in prefixes
assert 'CHEBIID' in prefixes
Similarly, the URI prefixes can be extracted with :meth:`Converter.get_uri_prefixes` like in:

.. code-block:: python
import curies
converter = curies.get_bioregistry_converter()
uri_prefixes = converter.get_uri_prefixes()
assert 'http://purl.obolibrary.org/obo/CHEBI_'' in prefixes
assert 'https://bioregistry.io/chebi:' not in prefixes, "No synonyms are included by default"
uri_prefixes = converter.get_uri_prefixes(include_synonyms=True)
assert 'http://purl.obolibrary.org/obo/CHEBI_'' in prefixes
assert 'https://bioregistry.io/chebi:' in prefixes
It's also possible to get a bijective prefix map, i.e., a dictionary from primary CURIE prefixes
to primary URI prefixes. This is useful for compatibility with legacy systems which assume simple prefix maps.
This can be done with the ``bimap`` property like in the following:

.. code-block:: python
import curies
converter = curies.get_bioregistry_converter()
prefix_map = converter.bimap
>>> prefix_map['chebi']
'http://purl.obolibrary.org/obo/CHEBI_'
Modifying a Context
-------------------
Incremental Converters
Expand Down
38 changes: 35 additions & 3 deletions src/curies/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,9 +965,41 @@ def from_shacl(
]
return cls(records, **kwargs)

def get_prefixes(self) -> Set[str]:
"""Get the set of prefixes covered by this converter."""
return {record.prefix for record in self.records}
def get_prefixes(self, *, include_synonyms: bool = False) -> Set[str]:
"""Get the set of prefixes covered by this converter.
:param include_synonyms: If true, include secondary prefixes.
:return:
A set of primary prefixes covered by the converter. If ``include_synonyms`` is
set to ``True``, secondary prefixes (i.e., ones in :data:`Record.prefix_synonyms`
are also included
"""
rv = {record.prefix for record in self.records}
if include_synonyms:
rv.update(
prefix_synonym
for record in self.records
for prefix_synonym in record.prefix_synonyms
)
return rv

def get_uri_prefixes(self, *, include_synonyms: bool = False) -> Set[str]:
"""Get the set of URI prefixes covered by this converter.
:param include_synonyms: If true, include secondary prefixes.
:return:
A set of primary URI prefixes covered by the converter. If ``include_synonyms`` is
set to ``True``, secondary URI prefixes (i.e., ones in :data:`Record.uri_prefix_synonyms`
are also included
"""
rv = {record.uri_prefix for record in self.records}
if include_synonyms:
rv.update(
uri_prefix_synonym
for record in self.records
for uri_prefix_synonym in record.uri_prefix_synonyms
)
return rv

def format_curie(self, prefix: str, identifier: str) -> str:
"""Format a prefix and identifier into a CURIE string."""
Expand Down
34 changes: 33 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@ def test_duplicate_failure(self):
with self.assertRaises(ValueError):
self.converter.add_record(Record(prefix="GO", uri_prefix=CHEBI_URI_PREFIX))

def test_get_prefix_synonyms(self):
"""Test getting prefix synonyms."""
self.assertEqual({self.prefix}, self.converter.get_prefixes())
self.assertEqual({self.prefix}, self.converter.get_prefixes(include_synonyms=False))
self.assertEqual(
{self.prefix, self.prefix_synonym},
self.converter.get_prefixes(include_synonyms=True),
)

def test_get_uri_prefix_synonyms(self):
"""Test getting URI prefix synonyms."""
self.assertEqual({self.uri_prefix}, self.converter.get_uri_prefixes())
self.assertEqual({self.uri_prefix}, self.converter.get_uri_prefixes(include_synonyms=False))
self.assertEqual(
{self.uri_prefix, self.uri_prefix_synonym},
self.converter.get_uri_prefixes(include_synonyms=True),
)

def test_extend_on_prefix_match(self):
"""Test adding a new prefix in merge mode."""
s1, s2, s3 = "s1", "s2", "s3"
Expand Down Expand Up @@ -259,6 +277,9 @@ def test_subset(self):
new_converter = self.converter.get_subconverter(["CHEBI"])
self.assertEqual(1, len(new_converter.records))
self.assertEqual({"CHEBI"}, new_converter.get_prefixes())
self.assertEqual(
{"http://purl.obolibrary.org/obo/CHEBI_"}, new_converter.get_uri_prefixes()
)
self.assertEqual({"CHEBI"}, set(new_converter.bimap))
self.assertEqual({"CHEBI"}, set(new_converter.prefix_map))
self.assertEqual(
Expand All @@ -282,6 +303,15 @@ def test_predicates(self):
def test_convert(self):
"""Test compression."""
self.assertEqual({"CHEBI", "MONDO", "GO", "OBO"}, self.converter.get_prefixes())
self.assertEqual(
{
"http://purl.obolibrary.org/obo/CHEBI_",
"http://purl.obolibrary.org/obo/MONDO_",
"http://purl.obolibrary.org/obo/GO_",
"http://purl.obolibrary.org/obo/",
},
self.converter.get_uri_prefixes(),
)
self._assert_convert(self.converter)

def _assert_convert(self, converter: Converter):
Expand Down Expand Up @@ -556,7 +586,7 @@ def test_combine_with_synonyms(self):
self.assertIn("GO", c3.bimap)

def test_combine_ci(self):
"""Test combining case insensitive."""
"""Test combining case-insensitive."""
c1 = Converter.from_priority_prefix_map(
{
"CHEBI": [
Expand All @@ -573,6 +603,8 @@ def test_combine_ci(self):
)
converter = chain([c1, c2], case_sensitive=False)
self.assertEqual({"CHEBI"}, converter.get_prefixes())
self.assertEqual({"CHEBI"}, converter.get_prefixes(include_synonyms=False))
self.assertEqual({"CHEBI", "chebi"}, converter.get_prefixes(include_synonyms=True))
for url in [
"http://purl.obolibrary.org/obo/CHEBI_138488",
"http://identifiers.org/chebi/138488",
Expand Down

0 comments on commit 0a22679

Please sign in to comment.