Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ted 537 #312

Merged
merged 4 commits into from
Oct 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/unit-tests-srv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
- name: Get Saxon
run: make init-saxon

- name: Run command
run: echo "Echo"
- name: Get Limes
run: make init-limes

- name: Get RML mapper
run: make init-rml-mapper
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,5 @@ tests/reports/allure/test_results/
package-lock.json
package.json
/infra/digest_api/digest_service/project_requirements.txt
.limes/*
.limes/*
*.ser
29 changes: 24 additions & 5 deletions ted_sws/alignment_oracle/adapters/limes_alignment_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@

from ted_sws.alignment_oracle.model.limes_config import LimesConfigParams
from ted_sws.alignment_oracle.services.limes_configurator import generate_xml_config_from_limes_config
from ted_sws.event_manager.services.log import log_info


class LimesAlignmentEngine:
"""
This is a adapter for limes executable.
This is an adapter for limes executable.
"""
def __init__(self, limes_executable_path: pathlib.Path):

def __init__(self, limes_executable_path: pathlib.Path, use_caching: bool = None):
self.limes_executable_path = limes_executable_path
self.use_caching = use_caching if use_caching else True

def execute(self, limes_config_params: LimesConfigParams):
"""
Expand All @@ -22,7 +25,23 @@ def execute(self, limes_config_params: LimesConfigParams):
limes_xml_config = generate_xml_config_from_limes_config(limes_config_params=limes_config_params)
temp_file = tempfile.NamedTemporaryFile()
temp_file.write(limes_xml_config.encode(encoding="utf-8"))
bash_script = f"java -jar {self.limes_executable_path} {temp_file.name}"
script_result = subprocess.run(bash_script, shell=True, capture_output=True)
self.execute_from_file_config(config_file_path=pathlib.Path(temp_file.name))
temp_file.close()
script_result.stderr.decode('utf-8')

def execute_from_file_config(self, config_file_path: pathlib.Path):
"""
This method generate alignment links based on config file.
:param config_file_path:
:return:
"""

def execute_bash_script(execution_dir_path: str):
bash_script = f"cd {execution_dir_path} && java -jar {self.limes_executable_path} {config_file_path}"
execution_result = subprocess.run(bash_script, shell=True, capture_output=True)
log_info(message=execution_result.stderr.decode(encoding="utf-8"))

if self.use_caching:
execute_bash_script(str(self.limes_executable_path.parent))
else:
with tempfile.TemporaryDirectory() as tmp_execution_dir_path:
execute_bash_script(execution_dir_path=tmp_execution_dir_path)
16 changes: 15 additions & 1 deletion ted_sws/alignment_oracle/model/limes_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import List
import pathlib
from typing import List, Protocol

from pydantic import BaseModel

Expand All @@ -12,6 +13,7 @@ class LimesDataSource(BaseModel):
sparql_variable: str
sparql_restrictions: List[str]
sparql_properties: List[str]
data_type: str


class LimesDataResult(BaseModel):
Expand All @@ -34,3 +36,15 @@ class LimesConfigParams(BaseModel):
acceptance: LimesDataResult
review: LimesDataResult
result_file_format: str


class LimesConfigGenerator(Protocol):
"""
This class provide a model for LimesConfigGenerators functions.
"""
def __call__(self, source_sparql_endpoint: str,
target_sparql_endpoint: str,
result_dir_path: pathlib.Path) -> LimesConfigParams:
"""
This method define a signature for a LimesConfigGenerator function.
"""
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
{% for property in source.sparql_properties -%}
<PROPERTY>{{ property }}</PROPERTY>
{%- endfor %}
<TYPE>{{ source.data_type }}</TYPE>
</SOURCE>
<TARGET>
<ID>{{ target.id }}</ID>
Expand All @@ -30,6 +31,7 @@
{% for property in target.sparql_properties -%}
<PROPERTY>{{ property }}</PROPERTY>
{%- endfor %}
<TYPE>{{ target.data_type }}</TYPE>
</TARGET>
<METRIC>{{ alignment_metric }}</METRIC>
<ACCEPTANCE>
Expand Down
48 changes: 45 additions & 3 deletions ted_sws/alignment_oracle/services/generate_alignment_links.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
import pathlib
import tempfile

from ted_sws import config
from ted_sws.alignment_oracle.adapters.limes_alignment_engine import LimesAlignmentEngine
from ted_sws.alignment_oracle.model.limes_config import LimesConfigParams
from ted_sws.alignment_oracle.model.limes_config import LimesConfigParams, LimesConfigGenerator
from ted_sws.core.model.notice import Notice

DEFAULT_MAX_ACCEPTANCE_THRESHOLD = 1.0
DEFAULT_MAX_REVIEW_THRESHOLD = 0.95
DEFAULT_DELTA_THRESHOLD = 0.05
TURTLE_SOURCE_DATA_TYPE = "TURTLE"


def generate_alignment_links(limes_config_params: LimesConfigParams, threshold: float,
delta: float = DEFAULT_DELTA_THRESHOLD) -> str:
delta: float = DEFAULT_DELTA_THRESHOLD,
use_caching: bool = None) -> str:
"""
This function generate alignment links using limes engine.
:param limes_config_params:
:param threshold:
:param delta:
:param use_caching:
:return:
"""
limes_config_params.review.threshold = min(threshold, DEFAULT_MAX_REVIEW_THRESHOLD)
limes_config_params.acceptance.threshold = min(threshold + delta, DEFAULT_MAX_ACCEPTANCE_THRESHOLD)
limes_alignment_engine = LimesAlignmentEngine(limes_executable_path=config.LIMES_ALIGNMENT_PATH)
limes_alignment_engine = LimesAlignmentEngine(limes_executable_path=pathlib.Path(config.LIMES_ALIGNMENT_PATH),
use_caching=use_caching)
limes_alignment_engine.execute(limes_config_params=limes_config_params)
review_result_path = pathlib.Path(limes_config_params.review.result_file_path)
review_result_content = review_result_path.read_text(encoding="utf-8")
Expand All @@ -28,3 +35,38 @@ def generate_alignment_links(limes_config_params: LimesConfigParams, threshold:
acceptance_result_content = acceptance_result_path.read_text(encoding="utf-8")
review_result_content += acceptance_result_content
return review_result_content


def generate_alignment_links_for_notice(notice: Notice, sparql_endpoint: str,
limes_config_generator: LimesConfigGenerator,
threshold: float,
delta: float = DEFAULT_DELTA_THRESHOLD,
use_caching: bool = None
) -> str:
"""
This function generate alignment links for a Notice RDF Manifestation.
:param notice:
:param sparql_endpoint:
:param limes_config_generator:
:param threshold:
:param delta:
:param use_caching:
:return:
"""
notice_rdf_manifestation = notice.distilled_rdf_manifestation.object_data
notice_rdf_file = tempfile.NamedTemporaryFile(suffix=".ttl")
notice_rdf_file.write(notice_rdf_manifestation.encode(encoding="utf-8"))
notice_rdf_file_path = notice_rdf_file.name
with tempfile.TemporaryDirectory() as tmp_result_dir_path:
limes_config_params = limes_config_generator(source_sparql_endpoint=notice_rdf_file_path,
target_sparql_endpoint=sparql_endpoint,
result_dir_path=pathlib.Path(tmp_result_dir_path)
)
limes_config_params.source.data_type = TURTLE_SOURCE_DATA_TYPE
result_alignment_links = generate_alignment_links(limes_config_params=limes_config_params,
threshold=threshold,
delta=delta,
use_caching=use_caching
)
notice_rdf_file.close()
return result_alignment_links
62 changes: 54 additions & 8 deletions ted_sws/alignment_oracle/services/limes_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,45 +8,66 @@

TEMPLATES = Environment(loader=PackageLoader("ted_sws.alignment_oracle.resources", "templates"))
LIMES_CONFIG_TEMPLATE = "limes_config.jinja2"
DEFAULT_SOURCE_SPARQL_VAR = "x"
DEFAULT_TARGET_SPARQL_VAR = "y"
DEFAULT_SOURCE_SPARQL_VAR = "?x"
DEFAULT_TARGET_SPARQL_VAR = "?y"
DEFAULT_ACCEPTANCE_THRESHOLD = 0.95
DEFAULT_REVIEW_THRESHOLD = 0.7
DEFAULT_ACCEPTANCE_FILE_NAME = "acceptance.ttl"
DEFAULT_REVIEW_FILE_NAME = "review.ttl"
DEFAULT_PREFIXES = config.SPARQL_PREFIXES
DEFAULT_RESULT_FILE_FORMAT = "TTL"
DEFAULT_RESULT_FILE_FORMAT = "NT"
DEFAULT_RELATION = "owl:sameAs"
DEFAULT_SOURCE_ID = "default_source_id"
DEFAULT_TARGET_ID = "default_target_id"
DEFAULT_SOURCE_DATA_TYPE = "SPARQL"


def generate_xml_config_from_limes_config(limes_config_params: LimesConfigParams) -> str:
"""
This function generate xml config from an instance of LimesConfigParams.
:param limes_config_params:
:return:
"""
return TEMPLATES.get_template(LIMES_CONFIG_TEMPLATE).render(limes_config_params.dict())


def generate_default_limes_config_params(sparql_endpoint: str,
def generate_default_limes_config_params(source_sparql_endpoint: str,
target_sparql_endpoint: str,
result_dir_path: pathlib.Path,
alignment_metric: str,
source_sparql_restrictions: List[str],
target_sparql_restrictions: List[str],
source_sparql_properties: List[str],
target_sparql_properties: List[str],
) -> LimesConfigParams:
"""
This function generate default LimesConfigParams.
:param source_sparql_endpoint:
:param target_sparql_endpoint:
:param result_dir_path:
:param alignment_metric:
:param source_sparql_restrictions:
:param target_sparql_restrictions:
:param source_sparql_properties:
:param target_sparql_properties:
:return:
"""
acceptance_file_path = str(result_dir_path / DEFAULT_ACCEPTANCE_FILE_NAME)
review_file_path = str(result_dir_path / DEFAULT_REVIEW_FILE_NAME)
return LimesConfigParams(prefixes=DEFAULT_PREFIXES,
source=LimesDataSource(id=DEFAULT_SOURCE_ID,
sparql_endpoint=sparql_endpoint,
sparql_endpoint=source_sparql_endpoint,
sparql_variable=DEFAULT_SOURCE_SPARQL_VAR,
sparql_restrictions=source_sparql_restrictions,
sparql_properties=source_sparql_properties
sparql_properties=source_sparql_properties,
data_type=DEFAULT_SOURCE_DATA_TYPE
),
target=LimesDataSource(id=DEFAULT_TARGET_ID,
sparql_endpoint=sparql_endpoint,
sparql_endpoint=target_sparql_endpoint,
sparql_variable=DEFAULT_TARGET_SPARQL_VAR,
sparql_restrictions=target_sparql_restrictions,
sparql_properties=target_sparql_properties
sparql_properties=target_sparql_properties,
data_type=DEFAULT_SOURCE_DATA_TYPE
),
alignment_metric=alignment_metric,
acceptance=LimesDataResult(threshold=DEFAULT_ACCEPTANCE_THRESHOLD,
Expand All @@ -59,3 +80,28 @@ def generate_default_limes_config_params(sparql_endpoint: str,
),
result_file_format=DEFAULT_RESULT_FILE_FORMAT
)


def generate_organisation_cet_limes_config_params(source_sparql_endpoint: str,
target_sparql_endpoint: str,
result_dir_path: pathlib.Path) -> LimesConfigParams:
"""
This function generate LimesConfigParams for an Organisation CET.
:param source_sparql_endpoint:
:param target_sparql_endpoint:
:param result_dir_path:
:return:
"""
return generate_default_limes_config_params(source_sparql_endpoint=source_sparql_endpoint,
target_sparql_endpoint=target_sparql_endpoint,
result_dir_path=result_dir_path,
alignment_metric="ADD(Jaccard(x.epo:hasLegalName, y.epo:hasLegalName), Jaccard(x.street, y.street))",
source_sparql_restrictions=["?x a org:Organization"],
source_sparql_properties=["epo:hasLegalName",
"legal:registeredAddress/locn:thoroughfare RENAME street"
],
target_sparql_restrictions=["?y a org:Organization"],
target_sparql_properties=["epo:hasLegalName",
"legal:registeredAddress/locn:thoroughfare RENAME street"
]
)
2 changes: 1 addition & 1 deletion ted_sws/notice_publisher/adapters/sftp_notice_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class SFTPPublisher(SFTPPublisherABC):

"""

def __init__(self, hostname: str = None, username: str = None, password: str = None, port: str = None):
def __init__(self, hostname: str = None, username: str = None, password: str = None, port: int = None):
"""Constructor Method"""
self.hostname = hostname if hostname else config.SFTP_HOST
self.username = username if username else config.SFTP_USER
Expand Down
15 changes: 15 additions & 0 deletions tests/e2e/alignment_oracle/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
import pathlib

import pytest

from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation
from ted_sws.core.model.notice import Notice
from tests import TEST_DATA_PATH


@pytest.fixture
def limes_sparql_endpoint() -> str:
return "https://fuseki.ted-data.eu/test_limes/query"


@pytest.fixture
def notice_with_distilled_rdf_manifestation():
notice = Notice(ted_id="002705-2021", original_metadata={},
xml_manifestation=XMLManifestation(object_data="No XML data"))
rdf_content_path = TEST_DATA_PATH / "rdf_manifestations" / "002705-2021.ttl"
notice._distilled_rdf_manifestation = RDFManifestation(object_data=rdf_content_path.read_text(encoding="utf-8"))
return notice
33 changes: 28 additions & 5 deletions tests/e2e/alignment_oracle/test_alignment_links_generation.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import pathlib
import tempfile

from ted_sws.alignment_oracle.services.generate_alignment_links import generate_alignment_links
from ted_sws.alignment_oracle.services.limes_configurator import generate_default_limes_config_params
from ted_sws.alignment_oracle.services.generate_alignment_links import generate_alignment_links, \
generate_alignment_links_for_notice
from ted_sws.alignment_oracle.services.limes_configurator import generate_default_limes_config_params, \
generate_organisation_cet_limes_config_params


def test_alignment_links_generation(limes_sparql_endpoint):
with tempfile.TemporaryDirectory() as tmp_dir_path:
limes_config_params = generate_default_limes_config_params(sparql_endpoint=limes_sparql_endpoint,
limes_config_params = generate_default_limes_config_params(source_sparql_endpoint=limes_sparql_endpoint,
target_sparql_endpoint=limes_sparql_endpoint,
result_dir_path=pathlib.Path(tmp_dir_path),
alignment_metric="ADD(Jaccard(x.epo:hasLegalName, y.epo:hasLegalName), Jaccard(x.street, y.street))",
source_sparql_restrictions=["?x a org:Organization"],
Expand All @@ -19,5 +22,25 @@ def test_alignment_links_generation(limes_sparql_endpoint):
"legal:registeredAddress/locn:thoroughfare RENAME street"
]
)
result_links = generate_alignment_links(limes_config_params=limes_config_params, threshold=0.95)
assert result_links
result_links = generate_alignment_links(limes_config_params=limes_config_params, threshold=0.95,
use_caching=False)
assert result_links


def test_generate_alignment_links_for_notice(limes_sparql_endpoint, notice_with_distilled_rdf_manifestation):
limes_config_generator = generate_organisation_cet_limes_config_params
result_links = generate_alignment_links_for_notice(notice=notice_with_distilled_rdf_manifestation,
sparql_endpoint=limes_sparql_endpoint,
limes_config_generator=limes_config_generator,
threshold=0.95, use_caching=False
)

assert result_links

result_links = generate_alignment_links_for_notice(notice=notice_with_distilled_rdf_manifestation,
sparql_endpoint=limes_sparql_endpoint,
limes_config_generator=limes_config_generator,
threshold=0.95, use_caching=True
)

assert result_links
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def test_sftp_notice_publisher():
sftp_publisher = SFTPPublisher(port=0)
sftp_publisher = SFTPPublisher(port=123)

with pytest.raises(Exception):
sftp_publisher.connect()
Expand Down
Loading