Skip to content

Commit

Permalink
Add JSON schema for extended prefix map (#109)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Apr 22, 2024
1 parent af3bd03 commit 470f71a
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 17 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store *.gpickle

include README.md LICENSE
exclude tox.ini .flake8 .bumpversion.cfg .readthedocs.yml codecov.yml
exclude docs/make_schema.py docs/schema.json
54 changes: 54 additions & 0 deletions docs/make_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Generate a JSON schema for extended prefix maps."""

import json
from pathlib import Path

from curies import Records
from curies._pydantic_compat import PYDANTIC_V1

HERE = Path(__file__).parent.resolve()
PATH = HERE.joinpath("schema.json")
TITLE = "Extended Prefix Map"
DESCRIPTION = (
"""\
An extended prefix map is a generalization of a prefix map that
includes synonyms for URI prefixes and CURIE prefixes.
""".strip()
.replace("\n", " ")
.replace(" ", " ")
)
URL = "https://w3id.org/biopragmatics/schema/epm.json"


def main() -> None:
"""Generate a JSON schema for extended prefix maps."""
rv = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": URL,
}

if PYDANTIC_V1:
import pydantic.schema

# see https://docs.pydantic.dev/latest/usage/json_schema/#general-notes-on-json-schema-generation

schema_dict = pydantic.schema.schema(
[Records],
title=TITLE,
description=DESCRIPTION,
)
else:
from pydantic.json_schema import models_json_schema

_, schema_dict = models_json_schema(
[(Records, "validation")],
title=TITLE,
description=DESCRIPTION,
)

rv.update(schema_dict)
PATH.write_text(json.dumps(rv, indent=2) + "\n")


if __name__ == "__main__":
main()
64 changes: 64 additions & 0 deletions docs/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://w3id.org/biopragmatics/schema/epm.json",
"$defs": {
"Record": {
"description": "A record of some prefixes and their associated URI prefixes.\n\n.. seealso:: https://github.com/cthoyt/curies/issues/70",
"properties": {
"prefix": {
"description": "The canonical CURIE prefix, used in the reverse prefix map",
"title": "CURIE prefix",
"type": "string"
},
"uri_prefix": {
"description": "The canonical URI prefix, used in the forward prefix map",
"title": "URI prefix",
"type": "string"
},
"prefix_synonyms": {
"items": {
"type": "string"
},
"title": "CURIE prefix synonyms",
"type": "array"
},
"uri_prefix_synonyms": {
"items": {
"type": "string"
},
"title": "URI prefix synonyms",
"type": "array"
},
"pattern": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "The regular expression pattern for entries in this semantic space. Warning: this is an experimental feature.",
"title": "Pattern"
}
},
"required": [
"prefix",
"uri_prefix"
],
"title": "Record",
"type": "object"
},
"Records": {
"description": "A list of records.",
"items": {
"$ref": "#/$defs/Record"
},
"title": "Records",
"type": "array"
}
},
"title": "Extended Prefix Map",
"description": "An extended prefix map is a generalization of a prefix map that includes synonyms for URI prefixes and CURIE prefixes."
}
6 changes: 5 additions & 1 deletion docs/source/struct.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ containing an entry for ChEBI) looks like:
}
]
An EPM is simply a list of records (see :class:`curies.Record`). EPMs have the benefit that they are still
An EPM is simply a list of records (see :class:`curies.Record` and :class:`curies.Records`).
EPMs have the benefit that they are still
encoded in JSON and can easily be encoded in YAML, TOML, RDF, and other schemata. Further, prefix maps can be
automatically upgraded into EPMs (with some caveats) using :func:`curies.upgrade_prefix_map`.

Expand All @@ -118,3 +119,6 @@ automatically upgraded into EPMs (with some caveats) using :func:`curies.upgrade
can be loaded using :meth:`curies.Converter.from_extended_prefix_map`.
We provide a Pydantic model representing it. Later, we hope to have an external, stable definition
of this data schema.

A JSON schema for EPMs is available at https://w3id.org/biopragmatics/schema/epm.json.
It can be updated at https://github.com/biopragmatics/curies/tree/main/docs/make_schema.py.
2 changes: 2 additions & 0 deletions src/curies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
DuplicateURIPrefixes,
DuplicateValueError,
Record,
Records,
Reference,
ReferenceTuple,
chain,
Expand Down Expand Up @@ -35,6 +36,7 @@
__all__ = [
"Converter",
"Record",
"Records",
"ReferenceTuple",
"Reference",
"DuplicateValueError",
Expand Down
63 changes: 47 additions & 16 deletions src/curies/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"Reference",
"ReferenceTuple",
"Record",
"Records",
"DuplicateValueError",
"DuplicatePrefixes",
"DuplicateURIPrefixes",
Expand Down Expand Up @@ -252,26 +253,21 @@ def from_curie(cls, curie: str, sep: str = ":") -> "Reference":
class Record(BaseModel): # type:ignore
"""A record of some prefixes and their associated URI prefixes.
A list of records can be annotated in a FastAPI setting with the following:
.. code-block:: python
from typing import List
from curies import Record
from pydantic import BaseModel
class Records(BaseModel):
__root__ = List[Record]
.. seealso:: https://github.com/cthoyt/curies/issues/70
"""

prefix: str = Field(..., description="The canonical prefix, used in the reverse prefix map")
prefix: str = Field(
...,
title="CURIE prefix",
description="The canonical CURIE prefix, used in the reverse prefix map",
)
uri_prefix: str = Field(
..., description="The canonical URI prefix, used in the forward prefix map"
...,
title="URI prefix",
description="The canonical URI prefix, used in the forward prefix map",
)
prefix_synonyms: List[str] = Field(default_factory=list)
uri_prefix_synonyms: List[str] = Field(default_factory=list)
prefix_synonyms: List[str] = Field(default_factory=list, title="CURIE prefix synonyms")
uri_prefix_synonyms: List[str] = Field(default_factory=list, title="URI prefix synonyms")
pattern: Optional[str] = Field(
default=None,
description="The regular expression pattern for entries in this semantic space. "
Expand Down Expand Up @@ -315,6 +311,40 @@ def _key(self) -> RecordKey:
)


if PYDANTIC_V1:
# An explanation of RootModels in Pydantic V1 can be found on
# https://docs.pydantic.dev/1.10/usage/models/#custom-root-types

from pydantic import BaseModel

class Records(BaseModel): # type:ignore
"""A list of records."""

class Config:
"""Configuration for the records."""

arbitrary_types_allowed = True

__root__: List[Record]

def __iter__(self) -> Iterable[Record]:
"""Iterate over records."""
return cast(Iterable[Record], iter(self.__root__))

else:
# An explanation of RootModels in Pydantic V2 can be found on
# https://docs.pydantic.dev/latest/concepts/models/#rootmodel-and-custom-root-types

from pydantic import RootModel

class Records(RootModel[List[Record]]): # type:ignore
"""A list of records."""

def __iter__(self) -> Iterable[Record]:
"""Iterate over records."""
return cast(Iterable[Record], iter(self.root))


class DuplicateSummary(NamedTuple):
"""A triple representing two records that are duplicated, either based on a CURIE or URI prefix."""

Expand Down Expand Up @@ -548,7 +578,8 @@ def add_record(self, record: Record, case_sensitive: bool = True, merge: bool =
"""Append a record to the converter."""
matched = self._match_record(record, case_sensitive=case_sensitive)
if len(matched) > 1:
raise ValueError(f"new record has duplicates: {matched}")
msg = "".join(f"\n {m} -> {v}" for m, v in matched.items())
raise ValueError(f"new record has duplicates:{msg}")
if len(matched) == 1:
if not merge:
raise ValueError(f"new record already exists and merge=False: {matched}")
Expand Down
11 changes: 11 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
ExpansionError,
PrefixStandardizationError,
Record,
Records,
Reference,
ReferenceTuple,
URIStandardizationError,
Expand All @@ -41,6 +42,16 @@
GO_URI_PREFIX = "http://purl.obolibrary.org/obo/GO_"


class TestStruct(unittest.TestCase):
"""Test the data structures."""

def test_records(self):
"""Test a list of records."""
records = Records.parse_obj([{"prefix": "chebi", "uri_prefix": CHEBI_URI_PREFIX}])
converter = Converter(records=records)
self.assertEqual({"chebi"}, converter.get_prefixes())


class TestAddRecord(unittest.TestCase):
"""Test adding records."""

Expand Down

0 comments on commit 470f71a

Please sign in to comment.