Skip to content

Commit

Permalink
Merge pull request #242 from OP-TED/feature/TED-589
Browse files Browse the repository at this point in the history
normaliser changes
  • Loading branch information
Dragos0000 committed Sep 6, 2022
2 parents 93eaf1a + a1a979d commit 291cb26
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 23 deletions.
20 changes: 16 additions & 4 deletions ted_sws/notice_metadata_processor/services/metadata_normalizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import abc
import re
from datetime import datetime
from typing import Dict, Tuple, List

Expand Down Expand Up @@ -137,12 +138,22 @@ def normalise_legal_basis_value(cls, value: str) -> str:
@classmethod
def normalise_form_number(cls, value: str) -> str:
"""
Normalise form number to be F{number} format
Normalise form number to be F{number} format.
##Decided to keep normalisation of the input data
Rules:
* The form number should start with a letter ("F", "T")
* The form number isn't always a number (CEI,EEIG)
* If the number is between 1 - 9 then it must have 0 as prefix (F02 not F2)
:param value:
:return:
"""
if value and not value.startswith("F") and not value[0].isalpha():
return "F" + value
form_number_parts = re.split(r"(?=\d)", value, 1)
if len(form_number_parts) == 2:
text_part: str = form_number_parts[0] if form_number_parts[0] else "F"
number_part: str = form_number_parts[1]
if text_part.isalpha() and number_part.isdecimal():
number_part = "0" + number_part if number_part and len(number_part) < 2 else number_part
return text_part + number_part
return value

@classmethod
Expand All @@ -168,7 +179,8 @@ def get_filter_variables_values(cls, form_number: str, extracted_notice_type: st
filter_map.query(f"{FORM_NUMBER_KEY}=='{variables[FORM_NUMBER_KEY]}'").to_dict(orient='records')[0]
except:
raise Exception(
f"This notice doesn't have a form number or the extracted form number is not in the mapping. Form number found is {form_number}")
f"This notice doesn't have a form number or the extracted form number is not in the mapping. "
f"Form number found is {form_number}, document code is {document_type_code} and legal basis is {legal_basis}")

for key, value in filter_variables.items():
if value == 0:
Expand Down
3 changes: 0 additions & 3 deletions ted_sws/resources/mapping_files/eforms_mapping.csv
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,5 @@ result,can-desg,32014L0025,37
cont-modif,can-modif,32014L0024,38
cont-modif,can-modif,32014L0025,39
cont-modif,can-modif,32014L0023,40
change,corr,32014L0024,41
change,corr,32014L0025,41
change,corr,32014L0023,41
planning,pin-tran,32007R1370,T01
result,can-tran,32007R1370,T02
3 changes: 2 additions & 1 deletion ted_sws/resources/mapping_files/sforms_mapping.csv
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ F12,,32014L0024,,23
F12,,32014L0025,,24
F13,,32014L0024,,36
F13,,32014L0025,,37
F14,,,,41
F15,,32009L0081,,27
F15,,32014L0023,,28
F15,,32014L0024,,25
Expand All @@ -34,10 +33,12 @@ F20,,32014L0025,,39
F20,,32018R1046,,38
F21,AWARD_CONTRACT,,,33
F21,CONTRACT,,,20
F21,PRI_ONLY,,,4
F21,PRI_CALL_COMPETITION,,,12
F22,AWARD_CONTRACT,,,34
F22,CONTRACT,,,21
F22,PER_CALL_COMPETITION,,,13
F22,PER_ONLY,,,5
F22,QSU_ONLY,,,15
F23,,,,35
F24,,,,19
Expand Down
38 changes: 23 additions & 15 deletions tests/unit/notice_metadata_processor/test_metadata_normaliser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice, normalise_notice_by_id, \
MetadataNormaliser, ExtractedMetadataNormaliser, FORM_NUMBER_KEY, SF_NOTICE_TYPE_KEY, LEGAL_BASIS_KEY, \
DOCUMENT_CODE_KEY
from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor
from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import \
XMLManifestationMetadataExtractor


def test_metadata_normaliser_by_notice(indexed_notice):
Expand Down Expand Up @@ -40,29 +41,37 @@ def test_metadata_normaliser(indexed_notice):


def test_normalise_form_number(indexed_notice):
extracted_metadata = XMLManifestationMetadataExtractor(xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata = XMLManifestationMetadataExtractor(
xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata_normaliser = ExtractedMetadataNormaliser(extracted_metadata=extracted_metadata)
assert "18" == extracted_metadata.extracted_form_number
assert "F18" == extracted_metadata_normaliser.normalise_form_number(value=extracted_metadata.extracted_form_number)
assert "T01" == extracted_metadata_normaliser.normalise_form_number(value="T01")
assert None == extracted_metadata_normaliser.normalise_form_number(value=None)
assert extracted_metadata_normaliser.normalise_form_number("FFFSA") == "FFFSA"
assert extracted_metadata_normaliser.normalise_form_number("F18") == "F18"
assert extracted_metadata_normaliser.normalise_form_number("F01") == "F01"
assert extracted_metadata_normaliser.normalise_form_number("F2") == "F02"
assert extracted_metadata_normaliser.normalise_form_number("22") == "F22"
assert extracted_metadata_normaliser.normalise_form_number("2") == "F02"
assert extracted_metadata_normaliser.normalise_form_number("F") == "F"
assert extracted_metadata_normaliser.normalise_form_number("TX01") == "TX01"
assert extracted_metadata_normaliser.normalise_form_number("TX1") == "TX01"
assert extracted_metadata_normaliser.normalise_form_number("FX03FG") == "FX03FG"
assert extracted_metadata_normaliser.normalise_form_number("1F03FG") == "1F03FG"


def test_normalise_legal_basis(indexed_notice):
extracted_metadata = XMLManifestationMetadataExtractor(xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata = XMLManifestationMetadataExtractor(
xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata_normaliser = ExtractedMetadataNormaliser(extracted_metadata=extracted_metadata)
assert "2009/81/EC" == extracted_metadata.legal_basis_directive
assert "32009L0081" == extracted_metadata_normaliser.normalise_legal_basis_value(
value=extracted_metadata.legal_basis_directive)


def test_get_map_value(indexed_notice):
extracted_metadata = XMLManifestationMetadataExtractor(xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata = XMLManifestationMetadataExtractor(
xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata_normaliser = ExtractedMetadataNormaliser(extracted_metadata=extracted_metadata)
value = extracted_metadata_normaliser.get_map_value(mapping=MappingFilesRegistry().countries, value="DE")
assert value == "http://publications.europa.eu/resource/authority/country/DEU"


def test_filter_df_by_variables():
df = MappingFilesRegistry().ef_notice_df
filtered_df = filter_df_by_variables(df=df, form_type="planning",
Expand All @@ -71,9 +80,9 @@ def test_filter_df_by_variables():
assert len(filtered_df.index) == 3
assert "32014L0024" in filtered_df["eform_legal_basis"].values


def test_get_form_type_and_notice_type(indexed_notice):
extracted_metadata = XMLManifestationMetadataExtractor(xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata = XMLManifestationMetadataExtractor(
xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata_normaliser = ExtractedMetadataNormaliser(extracted_metadata=extracted_metadata)
form_type, notice_type, legal_basis, eforms_subtype = extracted_metadata_normaliser.get_form_type_and_notice_type(
ef_map=MappingFilesRegistry().ef_notice_df,
Expand All @@ -86,9 +95,9 @@ def test_get_form_type_and_notice_type(indexed_notice):
assert "32014L0024" == legal_basis
assert "16" == eforms_subtype


def test_get_filter_values(indexed_notice):
extracted_metadata = XMLManifestationMetadataExtractor(xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata = XMLManifestationMetadataExtractor(
xml_manifestation=indexed_notice.xml_manifestation).to_metadata()
extracted_metadata_normaliser = ExtractedMetadataNormaliser(extracted_metadata=extracted_metadata)
filter_map = MappingFilesRegistry().filter_map_df
filter_variables_dict = extracted_metadata_normaliser.get_filter_variables_values(form_number="F07",
Expand All @@ -109,7 +118,6 @@ def test_get_filter_values(indexed_notice):
document_type_code="7",
legal_basis="legal")


def test_normalising_process_on_failed_notice_in_dag(notice_2021):
extracted_metadata = XMLManifestationMetadataExtractor(
xml_manifestation=notice_2021.xml_manifestation).to_metadata()
Expand Down

0 comments on commit 291cb26

Please sign in to comment.