OP-TED · costezki · Mar 28, 2022 · Mar 18, 2022 · Mar 18, 2022 · Mar 18, 2022
diff --git a/.gitignore b/.gitignore
@@ -101,3 +101,4 @@ infra/traefik/letsencrypt/acme.json
 *.log
 infra/airflow/logs/scheduler/latest
 /.airflow/
+.rmlmapper/rmlmapper.jar
diff --git a/Makefile b/Makefile
@@ -12,6 +12,7 @@ ENV_FILE := .env
 
 PROJECT_PATH = $(shell pwd)
 AIRFLOW_INFRA_FOLDER ?= ${PROJECT_PATH}/.airflow
+RML_MAPPER_PATH = ${PROJECT_PATH}/.rmlmapper/rmlmapper.jar
 
 #-----------------------------------------------------------------------------
 # Dev commands
@@ -142,8 +143,13 @@ stop-mongo:
 	@ echo -e "$(BUILD_PRINT)Stopping the Mongo services $(END_BUILD_PRINT)"
 	@ docker-compose -p ${ENVIRONMENT} --file ./infra/mongo/docker-compose.yml --env-file ${ENV_FILE} down
 
+init-rml-mapper:
+	@ echo -e "RMLMapper folder initialisation!"
+	@ wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v5.0.0/rmlmapper-5.0.0-r362-all.jar -P .rmlmapper/
+	@ mv .rmlmapper/rmlmapper-5.0.0-r362-all.jar .rmlmapper/rmlmapper.jar 2>/dev/null
 
-start-project-services: | start-airflow start-mongo
+
+start-project-services: | start-airflow start-mongo init-rml-mapper
 stop-project-services: | stop-airflow stop-mongo
 
 #-----------------------------------------------------------------------------
@@ -170,6 +176,7 @@ staging-dotenv-file: guard-VAULT_ADDR guard-VAULT_TOKEN vault-installed
 	@ echo DOMAIN=ted-data.eu >> .env
 	@ echo ENVIRONMENT=staging >> .env
 	@ echo SUBDOMAIN=staging. >> .env
+	@ echo RML_MAPPER_PATH=${RML_MAPPER_PATH} >> .env
 	@ echo AIRFLOW_INFRA_FOLDER=~/airflow-infra/staging >> .env
 	@ vault kv get -format="json" ted-staging/airflow | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
 	@ vault kv get -format="json" ted-staging/mongo-db | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
@@ -181,6 +188,7 @@ dev-dotenv-file: guard-VAULT_ADDR guard-VAULT_TOKEN vault-installed
 	@ echo DOMAIN=localhost >> .env
 	@ echo ENVIRONMENT=dev >> .env
 	@ echo SUBDOMAIN= >> .env
+	@ echo RML_MAPPER_PATH=${RML_MAPPER_PATH} >> .env
 	@ echo AIRFLOW_INFRA_FOLDER=${AIRFLOW_INFRA_FOLDER} >> .env
 	@ vault kv get -format="json" ted-dev/airflow | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
 	@ vault kv get -format="json" ted-dev/mongo-db | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
@@ -193,6 +201,7 @@ prod-dotenv-file: guard-VAULT_ADDR guard-VAULT_TOKEN vault-installed
 	@ echo DOMAIN=ted-data.eu >> .env
 	@ echo ENVIRONMENT=prod >> .env
 	@ echo SUBDOMAIN= >> .env
+	@ echo RML_MAPPER_PATH=${RML_MAPPER_PATH} >> .env
 	@ echo AIRFLOW_INFRA_FOLDER=~/airflow-infra/prod >> .env
 	@ vault kv get -format="json" ted-prod/airflow | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env
 	@ vault kv get -format="json" ted-prod/mongo-db | jq -r ".data.data | keys[] as \$$k | \"\(\$$k)=\(.[\$$k])\"" >> .env

diff --git a/ted_sws/__init__.py b/ted_sws/__init__.py
@@ -53,10 +53,24 @@ def MONGO_DB_PORT(self) -> int:
         return int(VaultAndEnvConfigResolver().config_resolve())
 
 
-class TedConfigResolver(MongoDBConfig):
+    @property
+    def MONGO_DB_AGGREGATES_DATABASE_NAME(self) -> str:
+        return VaultAndEnvConfigResolver().config_resolve()
+
+
+
+class RMLMapperConfig:
+
+    @property
+    def RML_MAPPER_PATH(self) -> str:
+        return VaultAndEnvConfigResolver().config_resolve()
+
+
+class TedConfigResolver(MongoDBConfig, RMLMapperConfig):
     """
         This class resolve the secrets of the ted-sws project.
     """
 
 
 config = TedConfigResolver()
+
diff --git a/ted_sws/data_manager/adapters/mapping_suite_repository.py b/ted_sws/data_manager/adapters/mapping_suite_repository.py
@@ -0,0 +1,291 @@
+import json
+import pathlib
+import shutil
+from typing import Iterator, List, Optional
+
+from pymongo import MongoClient
+
+from ted_sws import config
+from ted_sws.domain.adapters.repository_abc import MappingSuiteRepositoryABC
+from ted_sws.domain.model.transform import MappingSuite, FileResource, TransformationRuleSet, SHACLTestSuite, \
+    SPARQLTestSuite, MetadataConstraints
+
+METADATA_FILE_NAME = "metadata.json"
+TRANSFORM_PACKAGE_NAME = "transform"
+MAPPINGS_PACKAGE_NAME = "mappings"
+RESOURCES_PACKAGE_NAME = "resources"
+VALIDATE_PACKAGE_NAME = "validate"
+SHACL_PACKAGE_NAME = "shacl"
+SPARQL_PACKAGE_NAME = "sparql"
+
+
+class MappingSuiteRepositoryMongoDB(MappingSuiteRepositoryABC):
+    """
+       This repository is intended for storing MappingSuite objects in MongoDB.
+    """
+
+    _collection_name = "mapping_suite_collection"
+    _database_name = config.MONGO_DB_AGGREGATES_DATABASE_NAME
+
+    def __init__(self, mongodb_client: MongoClient):
+        """
+
+        :param mongodb_client:
+        :param database_name:
+        """
+        mongodb_client = mongodb_client
+        notice_db = mongodb_client[self._database_name]
+        self.collection = notice_db[self._collection_name]
+
+    def add(self, mapping_suite: MappingSuite):
+        """
+            This method allows you to add MappingSuite objects to the repository.
+        :param mapping_suite:
+        :return:
+        """
+        mapping_suite_dict = mapping_suite.dict()
+        mapping_suite_dict["_id"] = mapping_suite_dict["identifier"]
+        self.collection.insert_one(mapping_suite_dict)
+
+    def update(self, mapping_suite: MappingSuite):
+        """
+            This method allows you to update MappingSuite objects to the repository
+        :param mapping_suite:
+        :return:
+        """
+        mapping_suite_dict = mapping_suite.dict()
+        mapping_suite_dict["_id"] = mapping_suite_dict["identifier"]
+        self.collection.update_one({'_id': mapping_suite_dict["_id"]}, {"$set": mapping_suite_dict})
+
+    def get(self, reference) -> MappingSuite:
+        """
+            This method allows a MappingSuite to be obtained based on an identification reference.
+        :param reference:
+        :return: MappingSuite
+        """
+        result_dict = self.collection.find_one({"identifier": reference})
+        return MappingSuite(**result_dict) if result_dict else None
+
+    def list(self) -> Iterator[MappingSuite]:
+        """
+            This method allows all records to be retrieved from the repository.
+        :return: list of MappingSuites
+        """
+        for result_dict in self.collection.find():
+            yield MappingSuite(**result_dict)
+
+
+class MappingSuiteRepositoryInFileSystem(MappingSuiteRepositoryABC):
+    """
+           This repository is intended for storing MappingSuite objects in FileSystem.
+    """
+
+    def __init__(self, repository_path: pathlib.Path):
+        """
+
+        :param repository_path:
+        """
+        self.repository_path = repository_path
+        self.repository_path.mkdir(parents=True, exist_ok=True)
+
+    def _read_package_metadata(self, package_path: pathlib.Path) -> dict:
+        """
+            This method allows reading the metadata of a packet.
+        :param package_path:
+        :return:
+        """
+        package_metadata_path = package_path / METADATA_FILE_NAME
+        package_metadata_content = package_metadata_path.read_text(encoding="utf-8")
+        package_metadata = json.loads(package_metadata_content)
+        package_metadata['metadata_constraints'] = MetadataConstraints(**package_metadata['metadata_constraints'])
+        return package_metadata
+
+    def _read_transformation_rule_set(self, package_path: pathlib.Path) -> TransformationRuleSet:
+        """
+            This method allows you to read the transformation rules in a package.
+        :param package_path:
+        :return:
+        """
+        mappings_path = package_path / TRANSFORM_PACKAGE_NAME / MAPPINGS_PACKAGE_NAME
+        resources_path = package_path / TRANSFORM_PACKAGE_NAME / RESOURCES_PACKAGE_NAME
+        resources = self._read_file_resources(path=resources_path)
+        rml_mapping_rules = self._read_file_resources(path=mappings_path)
+        return TransformationRuleSet(resources=resources, rml_mapping_rules=rml_mapping_rules)
+
+    def _read_shacl_test_suites(self, package_path: pathlib.Path) -> List[SHACLTestSuite]:
+        """
+            This method allows you to read shacl test suites from a package.
+        :param package_path:
+        :return:
+        """
+        validate_path = package_path / VALIDATE_PACKAGE_NAME
+        shacl_path = validate_path / SHACL_PACKAGE_NAME
+        shacl_test_suite_paths = [x for x in shacl_path.iterdir() if x.is_dir()]
+        return [SHACLTestSuite(shacl_tests=self._read_file_resources(path=shacl_test_suite_path))
+                for shacl_test_suite_path in shacl_test_suite_paths]
+
+    def _read_sparql_test_suites(self, package_path: pathlib.Path) -> List[SPARQLTestSuite]:
+        """
+            This method allows you to read sparql test suites from a package.
+        :param package_path:
+        :return:
+        """
+        validate_path = package_path / VALIDATE_PACKAGE_NAME
+        sparql_path = validate_path / SPARQL_PACKAGE_NAME
+        sparql_test_suite_paths = [x for x in sparql_path.iterdir() if x.is_dir()]
+        return [SPARQLTestSuite(sparql_tests=self._read_file_resources(path=sparql_test_suite_path))
+                for sparql_test_suite_path in sparql_test_suite_paths]
+
+    def _write_package_metadata(self, mapping_suite: MappingSuite):
+        """
+            This method creates the metadata of a package based on the metadata in the mapping_suite.
+        :param mapping_suite:
+        :return:
+        """
+        package_path = self.repository_path / mapping_suite.identifier
+        package_path.mkdir(parents=True, exist_ok=True)
+        metadata_path = package_path / METADATA_FILE_NAME
+        package_metadata = mapping_suite.dict()
+        [package_metadata.pop(key, None) for key in
+         ["transformation_rule_set", "shacl_test_suites", "sparql_test_suites"]]
+        with metadata_path.open("w", encoding="utf-8") as f:
+            f.write(json.dumps(package_metadata))
+
+    def _write_file_resources(self, file_resources: List[FileResource], path: pathlib.Path):
+        """
+            This method allows you to write a list of file-type resources to a specific location.
+        :param file_resources:
+        :param path:
+        :return:
+        """
+        for file_resource in file_resources:
+            file_resource_path = path / file_resource.file_name
+            with file_resource_path.open("w", encoding="utf-8") as f:
+                f.write(file_resource.file_content)
+
+    def _read_file_resources(self, path: pathlib.Path) -> List[FileResource]:
+        """
+            This method reads a list of file-type resources that are in a specific location.
+        :param path:
+        :return:
+        """
+        files = [file for file in path.iterdir() if file.is_file()]
+        return [FileResource(file_name=file.name,
+                             file_content=file.read_text(encoding="utf-8"))
+                for file in files]
+
+    def _write_package_transform_rules(self, mapping_suite: MappingSuite):
+        """
+            This method creates the transformation rules within the package.
+        :param mapping_suite:
+        :return:
+        """
+        package_path = self.repository_path / mapping_suite.identifier
+        transform_path = package_path / TRANSFORM_PACKAGE_NAME
+        mappings_path = transform_path / MAPPINGS_PACKAGE_NAME
+        resources_path = transform_path / RESOURCES_PACKAGE_NAME
+        mappings_path.mkdir(parents=True, exist_ok=True)
+        resources_path.mkdir(parents=True, exist_ok=True)
+        self._write_file_resources(file_resources=mapping_suite.transformation_rule_set.rml_mapping_rules,
+                                   path=mappings_path
+                                   )
+        self._write_file_resources(file_resources=mapping_suite.transformation_rule_set.resources,
+                                   path=resources_path
+                                   )
+
+    def _write_package_validation_rules(self, mapping_suite: MappingSuite):
+        """
+            This method creates the validation rules within the package.
+        :param mapping_suite:
+        :return:
+        """
+        package_path = self.repository_path / mapping_suite.identifier
+        validate_path = package_path / VALIDATE_PACKAGE_NAME
+        sparql_path = validate_path / SPARQL_PACKAGE_NAME
+        shacl_path = validate_path / SHACL_PACKAGE_NAME
+        sparql_path.mkdir(parents=True, exist_ok=True)
+        shacl_path.mkdir(parents=True, exist_ok=True)
+        shacl_test_suites = mapping_suite.shacl_test_suites
+        shacl_test_suite_path_counter = 0
+        for shacl_test_suite in shacl_test_suites:
+            shacl_test_suite_path = shacl_path / f"shacl_test_suite_{shacl_test_suite_path_counter}"
+            shacl_test_suite_path.mkdir(parents=True, exist_ok=True)
+            self._write_file_resources(file_resources=shacl_test_suite.shacl_tests,
+                                       path=shacl_test_suite_path
+                                       )
+            shacl_test_suite_path_counter += 1
+
+        sparql_test_suites = mapping_suite.sparql_test_suites
+        for idx, sparql_test_suite in enumerate(sparql_test_suites):
+            sparql_test_suite_path = sparql_path / f"sparql_test_suite_{idx}"
+            sparql_test_suite_path.mkdir(parents=True, exist_ok=True)
+            self._write_file_resources(file_resources=sparql_test_suite.sparql_tests,
+                                       path=sparql_test_suite_path
+                                       )
+
+    def _write_mapping_suite_package(self, mapping_suite: MappingSuite):
+        """
+            This method creates a package based on data from mapping_suite.
+        :param mapping_suite:
+        :return:
+        """
+        self._write_package_metadata(mapping_suite=mapping_suite)
+        self._write_package_transform_rules(mapping_suite=mapping_suite)
+        self._write_package_validation_rules(mapping_suite=mapping_suite)
+
+    def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional[MappingSuite]:
+        """
+            This method reads a package and initializes a MappingSuite object.
+        :param mapping_suite_identifier:
+        :return:
+        """
+        package_path = self.repository_path / mapping_suite_identifier
+        if package_path.is_dir():
+            package_metadata = self._read_package_metadata(package_path)
+            package_metadata["transformation_rule_set"] = self._read_transformation_rule_set(package_path)
+            package_metadata["shacl_test_suites"] = self._read_shacl_test_suites(package_path)
+            package_metadata["sparql_test_suites"] = self._read_sparql_test_suites(package_path)
+            return MappingSuite(**package_metadata)
+        return None
+
+    def add(self, mapping_suite: MappingSuite):
+        """
+            This method allows you to add MappingSuite objects to the repository.
+        :param mapping_suite:
+        :return:
+        """
+        self._write_mapping_suite_package(mapping_suite=mapping_suite)
+
+    def update(self, mapping_suite: MappingSuite):
+        """
+            This method allows you to update MappingSuite objects to the repository
+        :param mapping_suite:
+        :return:
+        """
+        package_path = self.repository_path / mapping_suite.identifier
+        if package_path.is_dir():
+            self._write_mapping_suite_package(mapping_suite=mapping_suite)
+
+    def get(self, reference) -> MappingSuite:
+        """
+            This method allows a MappingSuite to be obtained based on an identification reference.
+        :param reference:
+        :return: MappingSuite
+        """
+        return self._read_mapping_suite_package(mapping_suite_identifier=reference)
+
+    def list(self) -> Iterator[MappingSuite]:
+        """
+            This method allows all records to be retrieved from the repository.
+        :return: list of MappingSuites
+        """
+        package_paths = [x for x in self.repository_path.iterdir() if x.is_dir()]
+        for package_path in package_paths:
+            yield self.get(reference=package_path.name)
+
+    def clear_repository(self):
+        """
+            This method allows you to clean the repository.
+        :return:
+        """
+        shutil.rmtree(self.repository_path)
diff --git a/ted_sws/data_manager/adapters/notice_repository.py b/ted_sws/data_manager/adapters/notice_repository.py
@@ -1,6 +1,8 @@
 import logging
 from typing import Iterator
 from pymongo import MongoClient
+
+from ted_sws import config
 from ted_sws.domain.adapters.repository_abc import NoticeRepositoryABC
 from ted_sws.domain.model.notice import Notice, NoticeStatus
 
@@ -13,11 +15,11 @@ class NoticeRepository(NoticeRepositoryABC):
     """
 
     _collection_name = "notice_collection"
-    _database_name = "notice_db"
+    _database_name = config.MONGO_DB_AGGREGATES_DATABASE_NAME
 
-    def __init__(self, mongodb_client: MongoClient, database_name: str = None):
+    def __init__(self, mongodb_client: MongoClient):
         mongodb_client = mongodb_client
-        notice_db = mongodb_client[database_name if database_name else self._database_name]
+        notice_db = mongodb_client[self._database_name]
         self.collection = notice_db[self._collection_name]
 
     def add(self, notice: Notice):