-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/ted 24 #35
Feature/ted 24 #35
Changes from 22 commits
9db2652
a6f24de
68d227c
fe66b46
b378ef6
3773b2d
6239ad9
77a3815
f7e681c
370862c
51482ca
aa8a89a
f8386dc
14a1b64
3439441
60922ea
53b9e81
8572ea4
c833a64
bfdb972
f050bdb
fd64c82
1907747
f694e47
743dfd4
4373b04
b27ae01
51e3db7
b3884fe
d40b64a
574c7ba
f0e1ed0
bdd6644
b404a0b
9ef3999
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
import json | ||
import pathlib | ||
import shutil | ||
from typing import Iterator, List, Optional | ||
|
||
from pymongo import MongoClient | ||
|
||
from ted_sws import config | ||
from ted_sws.domain.adapters.repository_abc import MappingSuiteRepositoryABC | ||
from ted_sws.domain.model.transform import MappingSuite, FileResource, TransformationRuleSet, SHACLTestSuite, \ | ||
SPARQLTestSuite, MetadataConstraints | ||
|
||
METADATA_FILE_NAME = "metadata.json" | ||
TRANSFORM_PACKAGE_NAME = "transform" | ||
MAPPINGS_PACKAGE_NAME = "mappings" | ||
RESOURCES_PACKAGE_NAME = "resources" | ||
VALIDATE_PACKAGE_NAME = "validate" | ||
SHACL_PACKAGE_NAME = "shacl" | ||
SPARQL_PACKAGE_NAME = "sparql" | ||
|
||
|
||
class MappingSuiteRepositoryMongoDB(MappingSuiteRepositoryABC): | ||
""" | ||
This repository is intended for storing MappingSuite objects in MongoDB. | ||
""" | ||
|
||
_collection_name = "mapping_suite_collection" | ||
_database_name = config.MONGO_DB_AGGREGATES_DATABASE_NAME | ||
|
||
def __init__(self, mongodb_client: MongoClient): | ||
""" | ||
|
||
:param mongodb_client: | ||
:param database_name: | ||
""" | ||
mongodb_client = mongodb_client | ||
notice_db = mongodb_client[self._database_name] | ||
self.collection = notice_db[self._collection_name] | ||
|
||
def add(self, mapping_suite: MappingSuite): | ||
""" | ||
This method allows you to add MappingSuite objects to the repository. | ||
:param mapping_suite: | ||
:return: | ||
""" | ||
mapping_suite_dict = mapping_suite.dict() | ||
mapping_suite_dict["_id"] = mapping_suite_dict["identifier"] | ||
self.collection.insert_one(mapping_suite_dict) | ||
|
||
def update(self, mapping_suite: MappingSuite): | ||
""" | ||
This method allows you to update MappingSuite objects to the repository | ||
:param mapping_suite: | ||
:return: | ||
""" | ||
mapping_suite_dict = mapping_suite.dict() | ||
mapping_suite_dict["_id"] = mapping_suite_dict["identifier"] | ||
self.collection.update_one({'_id': mapping_suite_dict["_id"]}, {"$set": mapping_suite_dict}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope this fails if the _id does not exist in the DB There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it will not drop, it will not update anything, it will not insert anything |
||
|
||
def get(self, reference) -> MappingSuite: | ||
""" | ||
This method allows a MappingSuite to be obtained based on an identification reference. | ||
:param reference: | ||
:return: MappingSuite | ||
""" | ||
result_dict = self.collection.find_one({"identifier": reference}) | ||
return MappingSuite(**result_dict) if result_dict else None | ||
|
||
def list(self) -> Iterator[MappingSuite]: | ||
""" | ||
This method allows all records to be retrieved from the repository. | ||
:return: list of MappingSuites | ||
""" | ||
for result_dict in self.collection.find(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes 👍 |
||
yield MappingSuite(**result_dict) | ||
|
||
|
||
class MappingSuiteRepositoryInFileSystem(MappingSuiteRepositoryABC): | ||
""" | ||
This repository is intended for storing MappingSuite objects in FileSystem. | ||
""" | ||
|
||
def __init__(self, repository_path: pathlib.Path): | ||
""" | ||
|
||
:param repository_path: | ||
""" | ||
self.repository_path = repository_path | ||
self.repository_path.mkdir(parents=True, exist_ok=True) | ||
|
||
def _read_package_metadata(self, package_path: pathlib.Path) -> dict: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. repository_path is unique, in repository can be multiple packages and each package have a package_path |
||
""" | ||
This method allows reading the metadata of a packet. | ||
:param package_path: | ||
:return: | ||
""" | ||
package_metadata_path = package_path / METADATA_FILE_NAME | ||
package_metadata_content = package_metadata_path.read_text(encoding="utf-8") | ||
package_metadata = json.loads(package_metadata_content) | ||
package_metadata['metadata_constraints'] = MetadataConstraints(**package_metadata['metadata_constraints']) | ||
return package_metadata | ||
|
||
def _read_transformation_rule_set(self, package_path: pathlib.Path) -> TransformationRuleSet: | ||
""" | ||
This method allows you to read the transformation rules in a package. | ||
:param package_path: | ||
:return: | ||
""" | ||
mappings_path = package_path / TRANSFORM_PACKAGE_NAME / MAPPINGS_PACKAGE_NAME | ||
resources_path = package_path / TRANSFORM_PACKAGE_NAME / RESOURCES_PACKAGE_NAME | ||
resources = self._read_file_resources(path=resources_path) | ||
rml_mapping_rules = self._read_file_resources(path=mappings_path) | ||
return TransformationRuleSet(resources=resources, rml_mapping_rules=rml_mapping_rules) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking good |
||
|
||
def _read_shacl_test_suites(self, package_path: pathlib.Path) -> List[SHACLTestSuite]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when it comes to SHACL, there shall be a single suite, not a list. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need to revise architecture, at this point |
||
""" | ||
This method allows you to read shacl test suites from a package. | ||
:param package_path: | ||
:return: | ||
""" | ||
validate_path = package_path / VALIDATE_PACKAGE_NAME | ||
shacl_path = validate_path / SHACL_PACKAGE_NAME | ||
shacl_test_suite_paths = [x for x in shacl_path.iterdir() if x.is_dir()] | ||
return [SHACLTestSuite(shacl_tests=self._read_file_resources(path=shacl_test_suite_path)) | ||
for shacl_test_suite_path in shacl_test_suite_paths] | ||
|
||
def _read_sparql_test_suites(self, package_path: pathlib.Path) -> List[SPARQLTestSuite]: | ||
""" | ||
This method allows you to read sparql test suites from a package. | ||
:param package_path: | ||
:return: | ||
""" | ||
validate_path = package_path / VALIDATE_PACKAGE_NAME | ||
sparql_path = validate_path / SPARQL_PACKAGE_NAME | ||
sparql_test_suite_paths = [x for x in sparql_path.iterdir() if x.is_dir()] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe we need to keep an aggregating There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is not clear what you mean |
||
return [SPARQLTestSuite(sparql_tests=self._read_file_resources(path=sparql_test_suite_path)) | ||
for sparql_test_suite_path in sparql_test_suite_paths] | ||
|
||
def _write_package_metadata(self, mapping_suite: MappingSuite): | ||
""" | ||
This method creates the metadata of a package based on the metadata in the mapping_suite. | ||
:param mapping_suite: | ||
:return: | ||
""" | ||
package_path = self.repository_path / mapping_suite.identifier | ||
package_path.mkdir(parents=True, exist_ok=True) | ||
metadata_path = package_path / METADATA_FILE_NAME | ||
package_metadata = mapping_suite.dict() | ||
[package_metadata.pop(key, None) for key in | ||
["transformation_rule_set", "shacl_test_suites", "sparql_test_suites"]] | ||
with metadata_path.open("w", encoding="utf-8") as f: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. was thinking why not |
||
f.write(json.dumps(package_metadata)) | ||
|
||
def _write_file_resources(self, file_resources: List[FileResource], path: pathlib.Path): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Neat! |
||
""" | ||
This method allows you to write a list of file-type resources to a specific location. | ||
:param file_resources: | ||
:param path: | ||
:return: | ||
""" | ||
for file_resource in file_resources: | ||
file_resource_path = path / file_resource.file_name | ||
with file_resource_path.open("w", encoding="utf-8") as f: | ||
f.write(file_resource.file_content) | ||
|
||
def _read_file_resources(self, path: pathlib.Path) -> List[FileResource]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool |
||
""" | ||
This method reads a list of file-type resources that are in a specific location. | ||
:param path: | ||
:return: | ||
""" | ||
files = [file for file in path.iterdir() if file.is_file()] | ||
return [FileResource(file_name=file.name, | ||
file_content=file.read_text(encoding="utf-8")) | ||
for file in files] | ||
|
||
def _write_package_transform_rules(self, mapping_suite: MappingSuite): | ||
""" | ||
This method creates the transformation rules within the package. | ||
:param mapping_suite: | ||
:return: | ||
""" | ||
package_path = self.repository_path / mapping_suite.identifier | ||
transform_path = package_path / TRANSFORM_PACKAGE_NAME | ||
mappings_path = transform_path / MAPPINGS_PACKAGE_NAME | ||
resources_path = transform_path / RESOURCES_PACKAGE_NAME | ||
mappings_path.mkdir(parents=True, exist_ok=True) | ||
resources_path.mkdir(parents=True, exist_ok=True) | ||
self._write_file_resources(file_resources=mapping_suite.transformation_rule_set.rml_mapping_rules, | ||
path=mappings_path | ||
) | ||
self._write_file_resources(file_resources=mapping_suite.transformation_rule_set.resources, | ||
path=resources_path | ||
) | ||
|
||
def _write_package_validation_rules(self, mapping_suite: MappingSuite): | ||
""" | ||
This method creates the validation rules within the package. | ||
:param mapping_suite: | ||
:return: | ||
""" | ||
package_path = self.repository_path / mapping_suite.identifier | ||
validate_path = package_path / VALIDATE_PACKAGE_NAME | ||
sparql_path = validate_path / SPARQL_PACKAGE_NAME | ||
shacl_path = validate_path / SHACL_PACKAGE_NAME | ||
sparql_path.mkdir(parents=True, exist_ok=True) | ||
shacl_path.mkdir(parents=True, exist_ok=True) | ||
shacl_test_suites = mapping_suite.shacl_test_suites | ||
shacl_test_suite_path_counter = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having a counter like that already raises issues in my mind. Please refer to the comment I provided above: have a single suite of SHACL rules instead of a list of them. (well, maybe you can keep a list after all, but with an _id reference to suite folder, like I explain below) In the case of SPARQL rules, however, we would have to keep an identifier reference for each suite, which would be the folder name. No counter, however. |
||
for shacl_test_suite in shacl_test_suites: | ||
shacl_test_suite_path = shacl_path / f"shacl_test_suite_{shacl_test_suite_path_counter}" | ||
shacl_test_suite_path.mkdir(parents=True, exist_ok=True) | ||
self._write_file_resources(file_resources=shacl_test_suite.shacl_tests, | ||
path=shacl_test_suite_path | ||
) | ||
shacl_test_suite_path_counter += 1 | ||
|
||
sparql_test_suites = mapping_suite.sparql_test_suites | ||
for idx, sparql_test_suite in enumerate(sparql_test_suites): | ||
sparql_test_suite_path = sparql_path / f"sparql_test_suite_{idx}" | ||
sparql_test_suite_path.mkdir(parents=True, exist_ok=True) | ||
self._write_file_resources(file_resources=sparql_test_suite.sparql_tests, | ||
path=sparql_test_suite_path | ||
) | ||
|
||
def _write_mapping_suite_package(self, mapping_suite: MappingSuite): | ||
""" | ||
This method creates a package based on data from mapping_suite. | ||
:param mapping_suite: | ||
:return: | ||
""" | ||
self._write_package_metadata(mapping_suite=mapping_suite) | ||
self._write_package_transform_rules(mapping_suite=mapping_suite) | ||
self._write_package_validation_rules(mapping_suite=mapping_suite) | ||
|
||
def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional[MappingSuite]: | ||
""" | ||
This method reads a package and initializes a MappingSuite object. | ||
:param mapping_suite_identifier: | ||
:return: | ||
""" | ||
package_path = self.repository_path / mapping_suite_identifier | ||
if package_path.is_dir(): | ||
package_metadata = self._read_package_metadata(package_path) | ||
package_metadata["transformation_rule_set"] = self._read_transformation_rule_set(package_path) | ||
package_metadata["shacl_test_suites"] = self._read_shacl_test_suites(package_path) | ||
package_metadata["sparql_test_suites"] = self._read_sparql_test_suites(package_path) | ||
return MappingSuite(**package_metadata) | ||
return None | ||
|
||
def add(self, mapping_suite: MappingSuite): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we manage the |
||
""" | ||
This method allows you to add MappingSuite objects to the repository. | ||
:param mapping_suite: | ||
:return: | ||
""" | ||
self._write_mapping_suite_package(mapping_suite=mapping_suite) | ||
|
||
def update(self, mapping_suite: MappingSuite): | ||
""" | ||
This method allows you to update MappingSuite objects to the repository | ||
:param mapping_suite: | ||
:return: | ||
""" | ||
package_path = self.repository_path / mapping_suite.identifier | ||
if package_path.is_dir(): | ||
self._write_mapping_suite_package(mapping_suite=mapping_suite) | ||
|
||
def get(self, reference) -> MappingSuite: | ||
""" | ||
This method allows a MappingSuite to be obtained based on an identification reference. | ||
:param reference: | ||
:return: MappingSuite | ||
""" | ||
return self._read_mapping_suite_package(mapping_suite_identifier=reference) | ||
|
||
def list(self) -> Iterator[MappingSuite]: | ||
""" | ||
This method allows all records to be retrieved from the repository. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like you have simple and clear descriptions. |
||
:return: list of MappingSuites | ||
""" | ||
package_paths = [x for x in self.repository_path.iterdir() if x.is_dir()] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. beautiful! |
||
for package_path in package_paths: | ||
yield self.get(reference=package_path.name) | ||
|
||
def clear_repository(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BRUTAL! but useful :) |
||
""" | ||
This method allows you to clean the repository. | ||
:return: | ||
""" | ||
shutil.rmtree(self.repository_path) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More reliable constant usage.