Skip to content

Commit

Permalink
Merge pull request #312 from OP-TED/feature/TED-537
Browse files Browse the repository at this point in the history
Feature/ted 537
  • Loading branch information
CaptainOfHacks committed Oct 17, 2022
2 parents cc59d9f + 423415b commit 8c4ea16
Show file tree
Hide file tree
Showing 13 changed files with 1,958 additions and 28 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/unit-tests-srv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
- name: Get Saxon
run: make init-saxon

- name: Run command
run: echo "Echo"
- name: Get Limes
run: make init-limes

- name: Get RML mapper
run: make init-rml-mapper
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,5 @@ tests/reports/allure/test_results/
package-lock.json
package.json
/infra/digest_api/digest_service/project_requirements.txt
.limes/*
.limes/*
*.ser
29 changes: 24 additions & 5 deletions ted_sws/alignment_oracle/adapters/limes_alignment_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@

from ted_sws.alignment_oracle.model.limes_config import LimesConfigParams
from ted_sws.alignment_oracle.services.limes_configurator import generate_xml_config_from_limes_config
from ted_sws.event_manager.services.log import log_info


class LimesAlignmentEngine:
"""
This is a adapter for limes executable.
This is an adapter for limes executable.
"""
def __init__(self, limes_executable_path: pathlib.Path):

def __init__(self, limes_executable_path: pathlib.Path, use_caching: bool = None):
self.limes_executable_path = limes_executable_path
self.use_caching = use_caching if use_caching else True

def execute(self, limes_config_params: LimesConfigParams):
"""
Expand All @@ -22,7 +25,23 @@ def execute(self, limes_config_params: LimesConfigParams):
limes_xml_config = generate_xml_config_from_limes_config(limes_config_params=limes_config_params)
temp_file = tempfile.NamedTemporaryFile()
temp_file.write(limes_xml_config.encode(encoding="utf-8"))
bash_script = f"java -jar {self.limes_executable_path} {temp_file.name}"
script_result = subprocess.run(bash_script, shell=True, capture_output=True)
self.execute_from_file_config(config_file_path=pathlib.Path(temp_file.name))
temp_file.close()
script_result.stderr.decode('utf-8')

def execute_from_file_config(self, config_file_path: pathlib.Path):
"""
This method generate alignment links based on config file.
:param config_file_path:
:return:
"""

def execute_bash_script(execution_dir_path: str):
bash_script = f"cd {execution_dir_path} && java -jar {self.limes_executable_path} {config_file_path}"
execution_result = subprocess.run(bash_script, shell=True, capture_output=True)
log_info(message=execution_result.stderr.decode(encoding="utf-8"))

if self.use_caching:
execute_bash_script(str(self.limes_executable_path.parent))
else:
with tempfile.TemporaryDirectory() as tmp_execution_dir_path:
execute_bash_script(execution_dir_path=tmp_execution_dir_path)
16 changes: 15 additions & 1 deletion ted_sws/alignment_oracle/model/limes_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import List
import pathlib
from typing import List, Protocol

from pydantic import BaseModel

Expand All @@ -12,6 +13,7 @@ class LimesDataSource(BaseModel):
sparql_variable: str
sparql_restrictions: List[str]
sparql_properties: List[str]
data_type: str


class LimesDataResult(BaseModel):
Expand All @@ -34,3 +36,15 @@ class LimesConfigParams(BaseModel):
acceptance: LimesDataResult
review: LimesDataResult
result_file_format: str


class LimesConfigGenerator(Protocol):
"""
This class provide a model for LimesConfigGenerators functions.
"""
def __call__(self, source_sparql_endpoint: str,
target_sparql_endpoint: str,
result_dir_path: pathlib.Path) -> LimesConfigParams:
"""
This method define a signature for a LimesConfigGenerator function.
"""
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
{% for property in source.sparql_properties -%}
<PROPERTY>{{ property }}</PROPERTY>
{%- endfor %}
<TYPE>{{ source.data_type }}</TYPE>
</SOURCE>
<TARGET>
<ID>{{ target.id }}</ID>
Expand All @@ -30,6 +31,7 @@
{% for property in target.sparql_properties -%}
<PROPERTY>{{ property }}</PROPERTY>
{%- endfor %}
<TYPE>{{ target.data_type }}</TYPE>
</TARGET>
<METRIC>{{ alignment_metric }}</METRIC>
<ACCEPTANCE>
Expand Down
48 changes: 45 additions & 3 deletions ted_sws/alignment_oracle/services/generate_alignment_links.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
import pathlib
import tempfile

from ted_sws import config
from ted_sws.alignment_oracle.adapters.limes_alignment_engine import LimesAlignmentEngine
from ted_sws.alignment_oracle.model.limes_config import LimesConfigParams
from ted_sws.alignment_oracle.model.limes_config import LimesConfigParams, LimesConfigGenerator
from ted_sws.core.model.notice import Notice

DEFAULT_MAX_ACCEPTANCE_THRESHOLD = 1.0
DEFAULT_MAX_REVIEW_THRESHOLD = 0.95
DEFAULT_DELTA_THRESHOLD = 0.05
TURTLE_SOURCE_DATA_TYPE = "TURTLE"


def generate_alignment_links(limes_config_params: LimesConfigParams, threshold: float,
delta: float = DEFAULT_DELTA_THRESHOLD) -> str:
delta: float = DEFAULT_DELTA_THRESHOLD,
use_caching: bool = None) -> str:
"""
This function generate alignment links using limes engine.
:param limes_config_params:
:param threshold:
:param delta:
:param use_caching:
:return:
"""
limes_config_params.review.threshold = min(threshold, DEFAULT_MAX_REVIEW_THRESHOLD)
limes_config_params.acceptance.threshold = min(threshold + delta, DEFAULT_MAX_ACCEPTANCE_THRESHOLD)
limes_alignment_engine = LimesAlignmentEngine(limes_executable_path=config.LIMES_ALIGNMENT_PATH)
limes_alignment_engine = LimesAlignmentEngine(limes_executable_path=pathlib.Path(config.LIMES_ALIGNMENT_PATH),
use_caching=use_caching)
limes_alignment_engine.execute(limes_config_params=limes_config_params)
review_result_path = pathlib.Path(limes_config_params.review.result_file_path)
review_result_content = review_result_path.read_text(encoding="utf-8")
Expand All @@ -28,3 +35,38 @@ def generate_alignment_links(limes_config_params: LimesConfigParams, threshold:
acceptance_result_content = acceptance_result_path.read_text(encoding="utf-8")
review_result_content += acceptance_result_content
return review_result_content


def generate_alignment_links_for_notice(notice: Notice, sparql_endpoint: str,
limes_config_generator: LimesConfigGenerator,
threshold: float,
delta: float = DEFAULT_DELTA_THRESHOLD,
use_caching: bool = None
) -> str:
"""
This function generate alignment links for a Notice RDF Manifestation.
:param notice:
:param sparql_endpoint:
:param limes_config_generator:
:param threshold:
:param delta:
:param use_caching:
:return:
"""
notice_rdf_manifestation = notice.distilled_rdf_manifestation.object_data
notice_rdf_file = tempfile.NamedTemporaryFile(suffix=".ttl")
notice_rdf_file.write(notice_rdf_manifestation.encode(encoding="utf-8"))
notice_rdf_file_path = notice_rdf_file.name
with tempfile.TemporaryDirectory() as tmp_result_dir_path:
limes_config_params = limes_config_generator(source_sparql_endpoint=notice_rdf_file_path,
target_sparql_endpoint=sparql_endpoint,
result_dir_path=pathlib.Path(tmp_result_dir_path)
)
limes_config_params.source.data_type = TURTLE_SOURCE_DATA_TYPE
result_alignment_links = generate_alignment_links(limes_config_params=limes_config_params,
threshold=threshold,
delta=delta,
use_caching=use_caching
)
notice_rdf_file.close()
return result_alignment_links
62 changes: 54 additions & 8 deletions ted_sws/alignment_oracle/services/limes_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,45 +8,66 @@

TEMPLATES = Environment(loader=PackageLoader("ted_sws.alignment_oracle.resources", "templates"))
LIMES_CONFIG_TEMPLATE = "limes_config.jinja2"
DEFAULT_SOURCE_SPARQL_VAR = "x"
DEFAULT_TARGET_SPARQL_VAR = "y"
DEFAULT_SOURCE_SPARQL_VAR = "?x"
DEFAULT_TARGET_SPARQL_VAR = "?y"
DEFAULT_ACCEPTANCE_THRESHOLD = 0.95
DEFAULT_REVIEW_THRESHOLD = 0.7
DEFAULT_ACCEPTANCE_FILE_NAME = "acceptance.ttl"
DEFAULT_REVIEW_FILE_NAME = "review.ttl"
DEFAULT_PREFIXES = config.SPARQL_PREFIXES
DEFAULT_RESULT_FILE_FORMAT = "TTL"
DEFAULT_RESULT_FILE_FORMAT = "NT"
DEFAULT_RELATION = "owl:sameAs"
DEFAULT_SOURCE_ID = "default_source_id"
DEFAULT_TARGET_ID = "default_target_id"
DEFAULT_SOURCE_DATA_TYPE = "SPARQL"


def generate_xml_config_from_limes_config(limes_config_params: LimesConfigParams) -> str:
"""
This function generate xml config from an instance of LimesConfigParams.
:param limes_config_params:
:return:
"""
return TEMPLATES.get_template(LIMES_CONFIG_TEMPLATE).render(limes_config_params.dict())


def generate_default_limes_config_params(sparql_endpoint: str,
def generate_default_limes_config_params(source_sparql_endpoint: str,
target_sparql_endpoint: str,
result_dir_path: pathlib.Path,
alignment_metric: str,
source_sparql_restrictions: List[str],
target_sparql_restrictions: List[str],
source_sparql_properties: List[str],
target_sparql_properties: List[str],
) -> LimesConfigParams:
"""
This function generate default LimesConfigParams.
:param source_sparql_endpoint:
:param target_sparql_endpoint:
:param result_dir_path:
:param alignment_metric:
:param source_sparql_restrictions:
:param target_sparql_restrictions:
:param source_sparql_properties:
:param target_sparql_properties:
:return:
"""
acceptance_file_path = str(result_dir_path / DEFAULT_ACCEPTANCE_FILE_NAME)
review_file_path = str(result_dir_path / DEFAULT_REVIEW_FILE_NAME)
return LimesConfigParams(prefixes=DEFAULT_PREFIXES,
source=LimesDataSource(id=DEFAULT_SOURCE_ID,
sparql_endpoint=sparql_endpoint,
sparql_endpoint=source_sparql_endpoint,
sparql_variable=DEFAULT_SOURCE_SPARQL_VAR,
sparql_restrictions=source_sparql_restrictions,
sparql_properties=source_sparql_properties
sparql_properties=source_sparql_properties,
data_type=DEFAULT_SOURCE_DATA_TYPE
),
target=LimesDataSource(id=DEFAULT_TARGET_ID,
sparql_endpoint=sparql_endpoint,
sparql_endpoint=target_sparql_endpoint,
sparql_variable=DEFAULT_TARGET_SPARQL_VAR,
sparql_restrictions=target_sparql_restrictions,
sparql_properties=target_sparql_properties
sparql_properties=target_sparql_properties,
data_type=DEFAULT_SOURCE_DATA_TYPE
),
alignment_metric=alignment_metric,
acceptance=LimesDataResult(threshold=DEFAULT_ACCEPTANCE_THRESHOLD,
Expand All @@ -59,3 +80,28 @@ def generate_default_limes_config_params(sparql_endpoint: str,
),
result_file_format=DEFAULT_RESULT_FILE_FORMAT
)


def generate_organisation_cet_limes_config_params(source_sparql_endpoint: str,
target_sparql_endpoint: str,
result_dir_path: pathlib.Path) -> LimesConfigParams:
"""
This function generate LimesConfigParams for an Organisation CET.
:param source_sparql_endpoint:
:param target_sparql_endpoint:
:param result_dir_path:
:return:
"""
return generate_default_limes_config_params(source_sparql_endpoint=source_sparql_endpoint,
target_sparql_endpoint=target_sparql_endpoint,
result_dir_path=result_dir_path,
alignment_metric="ADD(Jaccard(x.epo:hasLegalName, y.epo:hasLegalName), Jaccard(x.street, y.street))",
source_sparql_restrictions=["?x a org:Organization"],
source_sparql_properties=["epo:hasLegalName",
"legal:registeredAddress/locn:thoroughfare RENAME street"
],
target_sparql_restrictions=["?y a org:Organization"],
target_sparql_properties=["epo:hasLegalName",
"legal:registeredAddress/locn:thoroughfare RENAME street"
]
)
2 changes: 1 addition & 1 deletion ted_sws/notice_publisher/adapters/sftp_notice_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class SFTPPublisher(SFTPPublisherABC):
"""

def __init__(self, hostname: str = None, username: str = None, password: str = None, port: str = None):
def __init__(self, hostname: str = None, username: str = None, password: str = None, port: int = None):
"""Constructor Method"""
self.hostname = hostname if hostname else config.SFTP_HOST
self.username = username if username else config.SFTP_USER
Expand Down
15 changes: 15 additions & 0 deletions tests/e2e/alignment_oracle/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
import pathlib

import pytest

from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation
from ted_sws.core.model.notice import Notice
from tests import TEST_DATA_PATH


@pytest.fixture
def limes_sparql_endpoint() -> str:
return "https://fuseki.ted-data.eu/test_limes/query"


@pytest.fixture
def notice_with_distilled_rdf_manifestation():
notice = Notice(ted_id="002705-2021", original_metadata={},
xml_manifestation=XMLManifestation(object_data="No XML data"))
rdf_content_path = TEST_DATA_PATH / "rdf_manifestations" / "002705-2021.ttl"
notice._distilled_rdf_manifestation = RDFManifestation(object_data=rdf_content_path.read_text(encoding="utf-8"))
return notice
33 changes: 28 additions & 5 deletions tests/e2e/alignment_oracle/test_alignment_links_generation.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import pathlib
import tempfile

from ted_sws.alignment_oracle.services.generate_alignment_links import generate_alignment_links
from ted_sws.alignment_oracle.services.limes_configurator import generate_default_limes_config_params
from ted_sws.alignment_oracle.services.generate_alignment_links import generate_alignment_links, \
generate_alignment_links_for_notice
from ted_sws.alignment_oracle.services.limes_configurator import generate_default_limes_config_params, \
generate_organisation_cet_limes_config_params


def test_alignment_links_generation(limes_sparql_endpoint):
with tempfile.TemporaryDirectory() as tmp_dir_path:
limes_config_params = generate_default_limes_config_params(sparql_endpoint=limes_sparql_endpoint,
limes_config_params = generate_default_limes_config_params(source_sparql_endpoint=limes_sparql_endpoint,
target_sparql_endpoint=limes_sparql_endpoint,
result_dir_path=pathlib.Path(tmp_dir_path),
alignment_metric="ADD(Jaccard(x.epo:hasLegalName, y.epo:hasLegalName), Jaccard(x.street, y.street))",
source_sparql_restrictions=["?x a org:Organization"],
Expand All @@ -19,5 +22,25 @@ def test_alignment_links_generation(limes_sparql_endpoint):
"legal:registeredAddress/locn:thoroughfare RENAME street"
]
)
result_links = generate_alignment_links(limes_config_params=limes_config_params, threshold=0.95)
assert result_links
result_links = generate_alignment_links(limes_config_params=limes_config_params, threshold=0.95,
use_caching=False)
assert result_links


def test_generate_alignment_links_for_notice(limes_sparql_endpoint, notice_with_distilled_rdf_manifestation):
limes_config_generator = generate_organisation_cet_limes_config_params
result_links = generate_alignment_links_for_notice(notice=notice_with_distilled_rdf_manifestation,
sparql_endpoint=limes_sparql_endpoint,
limes_config_generator=limes_config_generator,
threshold=0.95, use_caching=False
)

assert result_links

result_links = generate_alignment_links_for_notice(notice=notice_with_distilled_rdf_manifestation,
sparql_endpoint=limes_sparql_endpoint,
limes_config_generator=limes_config_generator,
threshold=0.95, use_caching=True
)

assert result_links
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def test_sftp_notice_publisher():
sftp_publisher = SFTPPublisher(port=0)
sftp_publisher = SFTPPublisher(port=123)

with pytest.raises(Exception):
sftp_publisher.connect()
Expand Down
Loading

0 comments on commit 8c4ea16

Please sign in to comment.