Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updated XPATH coverage service to support not-based XPATHs #428

Merged
merged 8 commits into from
Feb 7, 2023
3 changes: 1 addition & 2 deletions dags/fetch_notices_by_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ def validate_fetched_notices():
default_value=(datetime.now() - timedelta(days=1)).strftime(
"%Y%m%d*")), "%Y%m%d*")
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
validate_and_update_daily_supra_notice(notice_publication_day=publication_date,
mongodb_client=mongodb_client)
validate_and_update_daily_supra_notice(ted_publication_date=publication_date, mongodb_client=mongodb_client)

def _branch_selector():
trigger_complete_workflow = get_dag_param(key=TRIGGER_COMPLETE_WORKFLOW_DAG_KEY,
Expand Down
2 changes: 1 addition & 1 deletion ted_sws/core/model/supra_notice.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ class DailySupraNotice(SupraNotice):
"""
This is an aggregate over the notices published in TED in a specific day.
"""
notice_fetched_date: date
ted_publication_date: date
validation_report: Optional[SupraNoticeValidationReport]
validation_summary: Optional[ValidationSummaryReport] = None
Empty file.
21 changes: 12 additions & 9 deletions ted_sws/data_manager/adapters/supra_notice_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields
from ted_sws.data_manager.adapters.repository_abc import DailySupraNoticeRepositoryABC

DAILY_SUPRA_NOTICE_FETCHED_DATE = "notice_fetched_date"
DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE = "ted_publication_date"
DAILY_SUPRA_NOTICE_CREATED_AT = "created_at"
DAILY_SUPRA_NOTICE_ID = "_id"

Expand All @@ -26,7 +26,8 @@ def __init__(self, mongodb_client: MongoClient, database_name: str = None):
daily_supra_notice_db = mongodb_client[self._database_name]
self.collection = daily_supra_notice_db[self._collection_name]
self.collection.create_index(
[(DAILY_SUPRA_NOTICE_FETCHED_DATE, ASCENDING)]) # TODO: index creation may bring race condition error.
[(DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE,
ASCENDING)]) # TODO: index creation may bring race condition error.

def _create_dict_from_daily_supra_notice(self, daily_supra_notice: DailySupraNotice) -> dict:
"""
Expand All @@ -35,10 +36,11 @@ def _create_dict_from_daily_supra_notice(self, daily_supra_notice: DailySupraNot
:return:
"""
daily_supra_notice_dict = daily_supra_notice.dict()
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE] = datetime.combine(
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE], time())
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE].isoformat()
inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_FETCHED_DATE)
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE] = datetime.combine(
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE], time())
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = daily_supra_notice_dict[
DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE].isoformat()
inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE)
inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_CREATED_AT)
return daily_supra_notice_dict

Expand All @@ -50,9 +52,10 @@ def _create_daily_supra_notice_from_dict(self, daily_supra_notice_dict: dict) ->
"""
if daily_supra_notice_dict is not None:
daily_supra_notice_dict.pop(DAILY_SUPRA_NOTICE_ID, None)
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE] = daily_supra_notice_dict[
DAILY_SUPRA_NOTICE_FETCHED_DATE].date()
remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_FETCHED_DATE)
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE] = daily_supra_notice_dict[
DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE].date()
remove_date_string_fields(data=daily_supra_notice_dict,
date_field_name=DAILY_SUPRA_NOTICE_TED_PUBLICATION_DATE)
remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_CREATED_AT)
daily_supra_notice = DailySupraNotice.parse_obj(daily_supra_notice_dict)
return daily_supra_notice
Expand Down
2 changes: 1 addition & 1 deletion ted_sws/notice_validator/adapters/xpath_coverage_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def based_xpaths(cls, xpaths: List[str], base_xpath: str) -> List[str]:
:param base_xpath:
:return:
"""
base_xpath += "/"
base_xpath += "/" if not base_xpath.endswith("/") else ""
return list(filter(lambda xpath: xpath.startswith(base_xpath), xpaths))

def coverage_notice_xpath(self, notices: List[Notice], mapping_suite_id) -> XPATHCoverageValidationReport:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@


def create_and_store_in_mongo_db_daily_supra_notice(notice_ids: List[str], mongodb_client: MongoClient,
notice_fetched_date: date = date.today()):
ted_publication_date: date = date.today()):
"""
This function creates and stores a DailySupraNotice in MongoDB.
:param notice_ids:
:param mongodb_client:
:param notice_fetched_date:
:param ted_publication_date:
:return:
"""
daily_supra_notice_repository = DailySupraNoticeRepository(mongodb_client=mongodb_client)
daily_supra_notice = DailySupraNotice(notice_fetched_date=notice_fetched_date,
daily_supra_notice = DailySupraNotice(ted_publication_date=ted_publication_date,
notice_ids=notice_ids)
daily_supra_notice_repository.add(daily_supra_notice=daily_supra_notice)
20 changes: 10 additions & 10 deletions ted_sws/supra_notice_manager/services/supra_notice_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,16 @@
SUPRA_NOTICE_NOT_FOUND_ERROR = "SupraNotice not found in Database!"


def validate_and_update_daily_supra_notice(notice_publication_day: day_type, mongodb_client: MongoClient,
def validate_and_update_daily_supra_notice(ted_publication_date: day_type, mongodb_client: MongoClient,
request_api: RequestAPI = None):
if request_api is None:
request_api = TedRequestAPI()

if isinstance(notice_publication_day, date):
notice_publication_day = datetime.combine(notice_publication_day, time())
if isinstance(ted_publication_date, date):
ted_publication_date = datetime.combine(ted_publication_date, time())

repo = DailySupraNoticeRepository(mongodb_client=mongodb_client)
supra_notice: DailySupraNotice = repo.get(reference=notice_publication_day)
supra_notice: DailySupraNotice = repo.get(reference=ted_publication_date)

if not supra_notice:
raise ValueError(SUPRA_NOTICE_NOT_FOUND_ERROR)
Expand All @@ -35,7 +35,7 @@ def validate_and_update_daily_supra_notice(notice_publication_day: day_type, mon
fetched_notice_ids = set(fetched_notice_ids_list)

ted_api_adapter: TedAPIAdapter = TedAPIAdapter(request_api=request_api)
query = {"q": f"PD=[{notice_publication_day.strftime('%Y%m%d*')}]"}
query = {"q": f"PD=[{ted_publication_date.strftime('%Y%m%d*')}]"}
documents = ted_api_adapter.get_by_query(query=query, result_fields={"fields": ["ND"]})
api_notice_ids_list = [document["ND"] for document in documents] if documents and len(documents) else []
api_notice_ids = set(api_notice_ids_list)
Expand All @@ -44,18 +44,18 @@ def validate_and_update_daily_supra_notice(notice_publication_day: day_type, mon
missing_notice_ids = api_notice_ids - fetched_notice_ids
if len(missing_notice_ids):
validation_report.missing_notice_ids = list(missing_notice_ids)
log_technical_error(message=f"Supra notice for date [{notice_publication_day}] don't fetch notices with ids=[{missing_notice_ids}]")
log_technical_error(message=f"Supra notice for date [{ted_publication_date}] don't fetch notices with ids=[{missing_notice_ids}]")

supra_notice.validation_report = validation_report
repo.update(daily_supra_notice=supra_notice)


def summary_validation_for_daily_supra_notice(notice_publication_day: day_type, mongodb_client: MongoClient):
if isinstance(notice_publication_day, date):
notice_publication_day = datetime.combine(notice_publication_day, time())
def summary_validation_for_daily_supra_notice(ted_publication_date: day_type, mongodb_client: MongoClient):
if isinstance(ted_publication_date, date):
ted_publication_date = datetime.combine(ted_publication_date, time())

repo = DailySupraNoticeRepository(mongodb_client=mongodb_client)
supra_notice: DailySupraNotice = repo.get(reference=notice_publication_day)
supra_notice: DailySupraNotice = repo.get(reference=ted_publication_date)

if not supra_notice:
raise ValueError(SUPRA_NOTICE_NOT_FOUND_ERROR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ def test_supra_notice_unpacking_in_file_system():
supra_notice_repository = DailySupraNoticeRepository(mongodb_client=mongodb_client)
unpacking_folder = pathlib.Path("./unpacking_supra_notice_result")
for index, supra_notice in enumerate(itertools.islice(supra_notice_repository.list(), 5)):
supra_notice_unpacking_folder = unpacking_folder / f"supra_notice_{supra_notice.notice_fetched_date.strftime('%Y_%m_%d')}"
supra_notice_unpacking_folder = unpacking_folder / f"supra_notice_{supra_notice.ted_publication_date.strftime('%Y_%m_%d')}"
supra_notice_unpacking_folder.mkdir(parents=True, exist_ok=True)
unpack_supra_notice(supra_notice=supra_notice, unpack_path=supra_notice_unpacking_folder)
6 changes: 3 additions & 3 deletions tests/e2e/supra_notice_manager/test_supra_notice_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_supra_notice_validator(fake_mongodb_client, daily_supra_notice_reposito
validate_and_update_daily_supra_notice(day, fake_mongodb_client)

create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=fake_mongodb_client,
notice_fetched_date=day)
ted_publication_date=day)
validate_and_update_daily_supra_notice(day, fake_mongodb_client)
result = daily_supra_notice_repository.get(reference=day)
assert result
Expand All @@ -45,9 +45,9 @@ def test_summary_validation_for_daily_supra_notice(fake_mongodb_client, daily_su
fake_notice_repository.add(notice)

create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=fake_mongodb_client,
notice_fetched_date=day)
ted_publication_date=day)

summary_validation_for_daily_supra_notice(notice_publication_day=day, mongodb_client=fake_mongodb_client)
summary_validation_for_daily_supra_notice(ted_publication_date=day, mongodb_client=fake_mongodb_client)
result = daily_supra_notice_repository.get(reference=day)
assert isinstance(result.validation_summary, ValidationSummaryReport)
assert result.notice_ids == notice_ids
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/data_manager/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def fake_mapping_suite():

@pytest.fixture
def daily_supra_notice():
return DailySupraNotice(notice_ids=["1", "2", "3"], notice_fetched_date=date.today())
return DailySupraNotice(notice_ids=["1", "2", "3"], ted_publication_date=date.today())


@pytest.fixture
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/data_manager/test_supra_notice_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
def test_daily_supra_notice_repository(mongodb_client, daily_supra_notice):
daily_supra_notice_repository = DailySupraNoticeRepository(mongodb_client=mongodb_client)
daily_supra_notice_repository.add(daily_supra_notice=daily_supra_notice)
result_supra_notice = daily_supra_notice_repository.get(reference=daily_supra_notice.notice_fetched_date)
result_supra_notice = daily_supra_notice_repository.get(reference=daily_supra_notice.ted_publication_date)
assert result_supra_notice
assert len(result_supra_notice.notice_ids) == 3
assert result_supra_notice.notice_fetched_date == daily_supra_notice.notice_fetched_date
assert result_supra_notice.ted_publication_date == daily_supra_notice.ted_publication_date
assert result_supra_notice.created_at == daily_supra_notice.created_at
assert result_supra_notice.notice_ids == daily_supra_notice.notice_ids
daily_supra_notice_repository.update(daily_supra_notice=daily_supra_notice)
result_supra_notice = daily_supra_notice_repository.get(reference=daily_supra_notice.notice_fetched_date)
result_supra_notice = daily_supra_notice_repository.get(reference=daily_supra_notice.ted_publication_date)
assert result_supra_notice
assert len(result_supra_notice.notice_ids) == 3
assert result_supra_notice.notice_fetched_date == daily_supra_notice.notice_fetched_date
assert result_supra_notice.ted_publication_date == daily_supra_notice.ted_publication_date
assert result_supra_notice.created_at == daily_supra_notice.created_at
assert result_supra_notice.notice_ids == daily_supra_notice.notice_ids
result = list(daily_supra_notice_repository.list())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@

def test_daily_supra_notice_manager(mongodb_client, daily_supra_notice_repository):
notice_ids = ["1", "2", "3"]
notice_fetched_date = date(2020, 1, 1)
ted_publication_date = date(2020, 1, 1)
create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=mongodb_client,
notice_fetched_date=notice_fetched_date)
ted_publication_date=ted_publication_date)
for result in daily_supra_notice_repository.list():
assert result
result = daily_supra_notice_repository.get(reference=notice_fetched_date)
result = daily_supra_notice_repository.get(reference=ted_publication_date)
assert result
assert result.notice_fetched_date == notice_fetched_date
assert result.ted_publication_date == ted_publication_date
notice_ids.append("4")
result.notice_ids = notice_ids
daily_supra_notice_repository.update(daily_supra_notice=result)
result = daily_supra_notice_repository.get(reference=notice_fetched_date)
result = daily_supra_notice_repository.get(reference=ted_publication_date)
assert result.notice_ids == notice_ids
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_supra_notice_validator(mongodb_client, daily_supra_notice_repository, f

notice_ids = ["1", "2", "3"]
create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=mongodb_client,
notice_fetched_date=day)
ted_publication_date=day)
validate_and_update_daily_supra_notice(day, mongodb_client, fake_request_api)
result = daily_supra_notice_repository.get(reference=day)
assert result
Expand All @@ -40,7 +40,7 @@ def test_summary_validation_for_daily_supra_notice(mongodb_client, daily_supra_n

notice_ids = [notice.ted_id]
create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=mongodb_client,
notice_fetched_date=day)
ted_publication_date=day)
summary_validation_for_daily_supra_notice(day, mongodb_client)
result = daily_supra_notice_repository.get(reference=day)
assert result
Expand Down