Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ted 849 #316

Merged
merged 2 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
prefix org: <http://www.w3.org/ns/org#>
prefix epo: <http://data.europa.eu/a4g/ontology#>

construct {
?s ?p ?o .
?o ?op ?oo . }
{
?o ?op ?oo .
}
where {
values ?s {<$uri>}
?s ?p ?o .
optional{
?o ?op ?oo .
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
prefix epo: <http://data.europa.eu/a4g/ontology#>
SELECT DISTINCT ?s
{
?s a <$uri> .
Expand Down
88 changes: 74 additions & 14 deletions ted_sws/master_data_registry/services/rdf_fragment_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,43 +10,79 @@
"""
import pathlib
from string import Template
from typing import List
from typing import List, Tuple

import rdflib

from ted_sws.core.model.notice import Notice
from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLStringEndpoint
from ted_sws.master_data_registry.resources import RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH, \
TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH

RDFTriple = Tuple[rdflib.term.Node, rdflib.term.Node, rdflib.term.Node]

DEFAULT_RDF_FILE_FORMAT = "n3"
RDF_FRAGMENT_ROOT_NODE_TYPE = "http://www.meaningfy.ws/mdr#RootNode"
RDF_FRAGMENT_FROM_NOTICE_PROPERTY = "http://www.meaningfy.ws/mdr#fromNotice"


def get_rdf_fragment_by_cet_uri_from_string(rdf_content: str, cet_uri: str,
rdf_content_format: str = DEFAULT_RDF_FILE_FORMAT) -> List[rdflib.Graph]:
def get_subjects_by_cet_uri(sparql_endpoint: SPARQLStringEndpoint, cet_uri: str) -> List[str]:
"""
This function extracts from an RDF file content a list of RDFFragments dependent on a CET URI.
:param rdf_content:
This function return a list of subjects which are of concrete CET URI type.
:param sparql_endpoint:
:param cet_uri:
:param rdf_content_format:
:return:
"""
sparql_endpoint = SPARQLStringEndpoint(rdf_content=rdf_content, rdf_content_format=rdf_content_format)
sparql_query = TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8")
sparql_query = Template(sparql_query).substitute(uri=cet_uri)
query_table_result = sparql_endpoint.with_query(sparql_query=sparql_query).fetch_tabular()
return query_table_result["s"].to_list()


def get_rdf_fragment_by_root_uri(sparql_endpoint: SPARQLStringEndpoint, root_uri: str,
inject_triples: List[RDFTriple] = None) -> rdflib.Graph:
"""
This function return a RDF fragment by given root URI.
:param sparql_endpoint:
:param root_uri:
:param inject_triples:
:return:
"""
sparql_query = RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8")
sparql_query = Template(sparql_query)
sparql_query = Template(sparql_query).substitute(uri=root_uri)
rdf_fragment = sparql_endpoint.with_query(sparql_query=sparql_query).fetch_rdf()
if inject_triples:
for inject_triple in inject_triples:
rdf_fragment.add(inject_triple)

return rdf_fragment


def get_rdf_fragment_by_cet_uri_from_string(rdf_content: str, cet_uri: str,
rdf_content_format: str = DEFAULT_RDF_FILE_FORMAT
) -> List[rdflib.Graph]:
"""
This function extracts from an RDF content a list of RDFFragments dependent on a CET URI.
:param rdf_content:
:param cet_uri:
:param rdf_content_format:
:return:
"""
sparql_endpoint = SPARQLStringEndpoint(rdf_content=rdf_content, rdf_content_format=rdf_content_format)
root_uris = get_subjects_by_cet_uri(sparql_endpoint=sparql_endpoint, cet_uri=cet_uri)
rdf_fragments = []
query_list_result = query_table_result["s"].to_list()
for uri in query_list_result:
rdf_fragment = sparql_endpoint.with_query(
sparql_query=sparql_query.substitute(uri=uri)).fetch_rdf()
for root_uri in root_uris:
rdf_fragment = get_rdf_fragment_by_root_uri(sparql_endpoint=sparql_endpoint, root_uri=root_uri,
inject_triples=[(rdflib.URIRef(root_uri), rdflib.RDF.type,
rdflib.URIRef(RDF_FRAGMENT_ROOT_NODE_TYPE))]
)
rdf_fragments.append(rdf_fragment)
return rdf_fragments


def get_rdf_fragments_by_cet_uri_from_file(rdf_file_path: pathlib.Path, cet_uri: str,
rdf_file_content_format: str = DEFAULT_RDF_FILE_FORMAT) -> List[
rdflib.Graph]:
rdf_file_content_format: str = DEFAULT_RDF_FILE_FORMAT
) -> List[rdflib.Graph]:
"""
This function extracts from an RDF file a list of RDFFragments dependent on a CET URI.
:param rdf_file_path:
Expand All @@ -57,3 +93,27 @@ def get_rdf_fragments_by_cet_uri_from_file(rdf_file_path: pathlib.Path, cet_uri:
return get_rdf_fragment_by_cet_uri_from_string(rdf_content=rdf_file_path.read_text(encoding="utf-8"),
cet_uri=cet_uri,
rdf_content_format=rdf_file_content_format)


def get_rdf_fragment_by_cet_uri_from_notice(notice: Notice, cet_uri: str) -> List[rdflib.Graph]:
"""
This function extracts from a Notice RDF content a list of RDFFragments dependent on a CET URI.
:param notice:
:param cet_uri:
:return:
"""
sparql_endpoint = SPARQLStringEndpoint(rdf_content=notice.distilled_rdf_manifestation.object_data,
rdf_content_format=DEFAULT_RDF_FILE_FORMAT)
root_uris = get_subjects_by_cet_uri(sparql_endpoint=sparql_endpoint, cet_uri=cet_uri)
rdf_fragments = []
for root_uri in root_uris:
rdf_fragment = get_rdf_fragment_by_root_uri(sparql_endpoint=sparql_endpoint, root_uri=root_uri,
inject_triples=[(rdflib.URIRef(root_uri), rdflib.RDF.type,
rdflib.URIRef(RDF_FRAGMENT_ROOT_NODE_TYPE)),
(rdflib.URIRef(root_uri),
rdflib.URIRef(RDF_FRAGMENT_FROM_NOTICE_PROPERTY),
rdflib.Literal(notice.ted_id))
]
)
rdf_fragments.append(rdf_fragment)
return rdf_fragments
14 changes: 14 additions & 0 deletions tests/unit/master_data_registry/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pytest

from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation
from ted_sws.core.model.notice import Notice
from tests import TEST_DATA_PATH


@pytest.fixture
def notice_with_distilled_rdf_manifestation():
notice = Notice(ted_id="002705-2021", original_metadata={},
xml_manifestation=XMLManifestation(object_data="No XML data"))
rdf_content_path = TEST_DATA_PATH / "example.ttl"
notice._distilled_rdf_manifestation = RDFManifestation(object_data=rdf_content_path.read_text(encoding="utf-8"))
return notice
13 changes: 10 additions & 3 deletions tests/unit/master_data_registry/test_rdf_fragment_processor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import rdflib

from ted_sws.master_data_registry.services.rdf_fragment_processor import get_rdf_fragment_by_cet_uri_from_string, \
get_rdf_fragments_by_cet_uri_from_file
get_rdf_fragments_by_cet_uri_from_file, get_rdf_fragment_by_cet_uri_from_notice


def test_get_rdf_fragment_by_cet_uri_from_string(rdf_content, organisation_cet_uri):
Expand All @@ -11,10 +11,17 @@ def test_get_rdf_fragment_by_cet_uri_from_string(rdf_content, organisation_cet_u
assert type(rdf_fragment) == rdflib.Graph




def test_get_rdf_fragments_by_cet_uri_from_file(rdf_file_path, organisation_cet_uri):
rdf_fragments = get_rdf_fragments_by_cet_uri_from_file(rdf_file_path=rdf_file_path, cet_uri=organisation_cet_uri)
assert len(rdf_fragments) == 3
for rdf_fragment in rdf_fragments:
assert type(rdf_fragment) == rdflib.Graph


def test_get_rdf_fragments_by_cet_uri_from_notice(notice_with_distilled_rdf_manifestation, organisation_cet_uri):
rdf_fragments = get_rdf_fragment_by_cet_uri_from_notice(notice=notice_with_distilled_rdf_manifestation,
cet_uri=organisation_cet_uri
)
assert len(rdf_fragments) == 3
for rdf_fragment in rdf_fragments:
assert type(rdf_fragment) == rdflib.Graph