From 2c030a9c3d542a08f3662f39e79ee34af74ed68d Mon Sep 17 00:00:00 2001 From: raftmsohani <97037188+raftmsohani@users.noreply.github.com> Date: Fri, 28 Jul 2023 10:59:50 -0400 Subject: [PATCH 1/2] 2598-staging-deployment-failure-issue (#2619) * disable migration and added timeout * added migrations to e2e pipeline * changed migration order * corrected directory * changed location of running commands for migration * reverted the migration back for checking the e2e * after waitfor * migration in docker-compose * rempved unused jobs * added -run back * check backend * added migration to docker-compose * testing * added populate stts * increased thetimeout * timeout * removed migration from Dockerfile * correction --------- Co-authored-by: Andrew <84722778+andrew-jameson@users.noreply.github.com> --- scripts/deploy-backend.sh | 4 ++-- tdrs-backend/Dockerfile | 2 +- tdrs-backend/docker-compose.yml | 5 ++++- tdrs-backend/gunicorn_start.sh | 7 +++---- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/scripts/deploy-backend.sh b/scripts/deploy-backend.sh index b14b8dd10e..f3ed5941d1 100755 --- a/scripts/deploy-backend.sh +++ b/scripts/deploy-backend.sh @@ -89,9 +89,9 @@ update_backend() # Do a zero downtime deploy. This requires enough memory for # two apps to exist in the org/space at one time. - cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml --strategy rolling || exit 1 + cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml -t 180 --strategy rolling || exit 1 else - cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml + cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml -t 180 # set up JWT key if needed if cf e "$CGAPPNAME_BACKEND" | grep -q JWT_KEY ; then echo jwt cert already created diff --git a/tdrs-backend/Dockerfile b/tdrs-backend/Dockerfile index 17eb686554..4ce1470318 100644 --- a/tdrs-backend/Dockerfile +++ b/tdrs-backend/Dockerfile @@ -28,7 +28,7 @@ RUN groupadd -g ${gid} ${group} && useradd -u ${uid} -g ${group} -s /bin/sh ${us RUN chown -R tdpuser /tdpapp && chmod u+x gunicorn_start.sh wait_for_services.sh -CMD ["./gunicorn_start.sh"] +#CMD ["./gunicorn_start.sh"] # if the container crashes/loops, we can shell into it by doing the following: # docker ps -a # to get the container id # docker commit debug/ diff --git a/tdrs-backend/docker-compose.yml b/tdrs-backend/docker-compose.yml index 47de77cdee..e7e9662cf2 100644 --- a/tdrs-backend/docker-compose.yml +++ b/tdrs-backend/docker-compose.yml @@ -93,7 +93,10 @@ services: build: . command: > bash -c "./wait_for_services.sh && - ./gunicorn_start.sh && celery -A tdpservice.settings worker -l info" + ./manage.py makemigrations && + ./manage.py migrate && + ./manage.py populate_stts && + ./gunicorn_start.sh && celery -A tdpservice.settings worker -l info" ports: - "5555:5555" depends_on: diff --git a/tdrs-backend/gunicorn_start.sh b/tdrs-backend/gunicorn_start.sh index 563927f00c..8238361490 100755 --- a/tdrs-backend/gunicorn_start.sh +++ b/tdrs-backend/gunicorn_start.sh @@ -14,10 +14,9 @@ fi # echo "Applying database migrations" -python manage.py makemigrations -python manage.py migrate -python manage.py populate_stts -python manage.py collectstatic --noinput +#python manage.py migrate +#python manage.py populate_stts +#python manage.py collectstatic --noinput celery -A tdpservice.settings worker -c 1 & sleep 5 From 51fd924c940834e1fdbda469a0ad25edee4de996 Mon Sep 17 00:00:00 2001 From: Eric Lipe <125676261+elipe17@users.noreply.github.com> Date: Tue, 1 Aug 2023 06:21:23 -0600 Subject: [PATCH 2/2] SSN Encryption/Decryption (#2624) * - ADding t2 schema * - small fixes to naming - added t2.py - Updating var names to match field names * - Added new doc for T2 - Updated model fo rT2 - Added active parse function * - Added T3 schema defs * - Fixing most lint errors * - Updated T3 to multirow record * - Added unit test and data files - updated field names * - Updating var names to conform to DB max var name length * - Updating based on unit test failure * - adding datafile * - Adding unit tests for t2 and t3 * - Fixed field types - Updated test file * - Removing many migrations to consolodate into one migration * Revert "- Removing many migrations to consolodate into one migration" This reverts commit 1455ae4d334f72e250405803d61d26c0a894e886. * - Resolved test issues after merge * - Resolve lint errors * - Merged 1113.2 * - Removed unnecessary file * - Rename model fields - Combined migrations * - fixed spelling in test * - Fixed lint errors * rm commented validators * - Updated schema newlines to be consistent * - Updated field names - Updated tests - Updated migration * - consolodating migrations * - Updated readme and backup script * - Updated parse logic to batch create records * - Fixed lint errors * - Batching record serialization * - Updated parse logic * - Adding big s1 datafile * - fixing lint errors * - Removing test to see if its causing weird failure * - Updating based on comments - Removing big file since circi cant handle it * - fixing error in update method * - fixing error in update * - Removed extraneous seek * - updating ignore to ignore patch files * - Made evaluate_trailer pure/immutable * Revert "- Removing test to see if its causing weird failure" This reverts commit 64b78b737c97fb9bfa70217ff70ccffea8d85429. * - Use custom transactions while parsing - Handle transaction rollback on parse failure/error - Update tests to use transaction logic - Created new decorator to rollback database when assertion error is hit - Making elastic search log level configurable - Added test for inter parse rollback * - updated store logic to one liner - updated decorator to catch all exceptions - updated tests * - removed function - renamed test * - refactored multirecord schema to be a schema manager instead - updated parse functions to use the new layout - updated bulk create to manage batch size * - Name update for clarity * - Fix lint errors * - Changing batch size to avoid crash in circi * - Updated based on review - Updated tests to reflect line number * - Updated based on review/OH - Added extra transaction safety * - Updated view to show latest datafiles - Added admin filter to show newest or all datafile records - Updated indices to allow easier elastic queries * - Updated based on review comments * - Updated based on feedback for preparse error handling - updated tests * - Updated search indices to have parent FK * - Fix lint errors * - Updated submission tests - Moved create_datafile to util * - fix lint errors * - removing frontend filtering * - addding datafile to admin model * Revert "- addding datafile to admin model" This reverts commit 35a6f24c36c3a4c00ddcfc40f20833530b0199f4. * - Fixed issue where datafile FK wasnt populating - Regenerated migration * - Readding datafile back to admin view now that the error is resolved * - adding datafile back * Revert "- Readding datafile back to admin view now that the error is resolved" This reverts commit 2807425059fd1b5b355edfb16d30d170cf869d7b. * - Removed unnecessary fields - Updated dependencies - Updated filter * - Updated document to include required fields * - Moved datafile reference to avoid confusion * - Updating based on comments. - Added base class to keep things a little more DRY * - Refactored parsing to avoid custom transactions by leveraging the FK on the records. Rollbacks are a lot slower now, but hopefully that will happen much less versus successful parses. * - small optimization for the rollback * - Fix lint errors * - Removing commented code * - Installing build essentials * Revert "- Installing build essentials" This reverts commit 782a5f7d687e60965a9100520f027b9678dbac35. * - adding setup tools and wheel * Revert "- adding setup tools and wheel" This reverts commit f529728811fba242132b7c42f9e9e09d6037fa70. * - Updating dependencies to get around pep issue * - Pin factoryboy - fix lint error * - Updating manifest * - Added EncryptedField class - Updated schema's - Updated datafiles since all are encrypted * - Fix lint errors * - Added decryption for ssp * - Making the encrypted check stronger * - Refactored fields and rowschema into seperate files - Updated encrypted field to take decryption function * - fix lint errors * - Fix import error * - Removed erroneous update that was causing error - Fixed whitespace in datafiles * - White space error * - Resolved error * - Refactored encryption logic to take the header encryption indicator into account * - Fix lint errors --------- Co-authored-by: Jan Timpe Co-authored-by: Alex P <63075587+ADPennington@users.noreply.github.com> Co-authored-by: Andrew <84722778+andrew-jameson@users.noreply.github.com> --- tdrs-backend/tdpservice/parsers/fields.py | 87 +++++++ tdrs-backend/tdpservice/parsers/parse.py | 8 +- tdrs-backend/tdpservice/parsers/row_schema.py | 167 +++++++++++++ .../tdpservice/parsers/schema_defs/header.py | 5 +- .../tdpservice/parsers/schema_defs/ssp/m1.py | 4 +- .../tdpservice/parsers/schema_defs/ssp/m2.py | 8 +- .../tdpservice/parsers/schema_defs/ssp/m3.py | 12 +- .../tdpservice/parsers/schema_defs/tanf/t1.py | 4 +- .../tdpservice/parsers/schema_defs/tanf/t2.py | 8 +- .../tdpservice/parsers/schema_defs/tanf/t3.py | 12 +- .../tdpservice/parsers/schema_defs/trailer.py | 3 +- .../parsers/test/data/ADS.E2J.FTP1.TS06 | 2 +- .../parsers/test/data/ADS.E2J.NDM1.TS53_fake | 2 +- .../test/data/ADS.E2J.NDM1.TS53_fake.rollback | 4 +- .../parsers/test/data/small_tanf_section1.txt | 2 +- .../tdpservice/parsers/test/test_util.py | 4 +- tdrs-backend/tdpservice/parsers/util.py | 222 ++---------------- 17 files changed, 318 insertions(+), 236 deletions(-) create mode 100644 tdrs-backend/tdpservice/parsers/fields.py create mode 100644 tdrs-backend/tdpservice/parsers/row_schema.py diff --git a/tdrs-backend/tdpservice/parsers/fields.py b/tdrs-backend/tdpservice/parsers/fields.py new file mode 100644 index 0000000000..e787e0584d --- /dev/null +++ b/tdrs-backend/tdpservice/parsers/fields.py @@ -0,0 +1,87 @@ +"""Datafile field representations.""" + +def value_is_empty(value, length): + """Handle 'empty' values as field inputs.""" + empty_values = [ + ' '*length, # ' ' + '#'*length, # '#####' + ] + + return value is None or value in empty_values + + +class Field: + """Provides a mapping between a field name and its position.""" + + def __init__(self, item, name, type, startIndex, endIndex, required=True, validators=[]): + self.item = item + self.name = name + self.type = type + self.startIndex = startIndex + self.endIndex = endIndex + self.required = required + self.validators = validators + + def create(self, item, name, length, start, end, type): + """Create a new field.""" + return Field(item, name, type, length, start, end) + + def __repr__(self): + """Return a string representation of the field.""" + return f"{self.name}({self.startIndex}-{self.endIndex})" + + def parse_value(self, line): + """Parse the value for a field given a line, startIndex, endIndex, and field type.""" + value = line[self.startIndex:self.endIndex] + + if value_is_empty(value, self.endIndex-self.startIndex): + return None + + match self.type: + case 'number': + try: + value = int(value) + return value + except ValueError: + return None + case 'string': + return value + +def tanf_ssn_decryption_func(value, is_encrypted): + """Decrypt TANF SSN value.""" + if is_encrypted: + decryption_dict = {"@": "1", "9": "2", "Z": "3", "P": "4", "0": "5", + "#": "6", "Y": "7", "B": "8", "W": "9", "T": "0"} + decryption_table = str.maketrans(decryption_dict) + return value.translate(decryption_table) + return value + +def ssp_ssn_decryption_func(value, is_encrypted): + """Decrypt SSP SSN value.""" + if is_encrypted: + decryption_dict = {"@": "1", "9": "2", "Z": "3", "P": "4", "0": "5", + "#": "6", "Y": "7", "B": "8", "W": "9", "T": "0"} + decryption_table = str.maketrans(decryption_dict) + return value.translate(decryption_table) + return value + +class EncryptedField(Field): + """Represents an encrypted field and its position.""" + + def __init__(self, decryption_func, item, name, type, startIndex, endIndex, required=True, validators=[]): + super().__init__(item, name, type, startIndex, endIndex, required, validators) + self.decryption_func = decryption_func + self.is_encrypted = False + + def parse_value(self, line): + """Parse and decrypt the value for a field given a line, startIndex, endIndex, and field type.""" + value = line[self.startIndex:self.endIndex] + + if value_is_empty(value, self.endIndex-self.startIndex): + return None + + match self.type: + case 'string': + return self.decryption_func(value, self.is_encrypted) + case _: + return None diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py index 27f27748f6..91a25caa19 100644 --- a/tdrs-backend/tdpservice/parsers/parse.py +++ b/tdrs-backend/tdpservice/parsers/parse.py @@ -27,6 +27,8 @@ def parse_datafile(datafile): bulk_create_errors({1: header_errors}, 1, flush=True) return errors + is_encrypted = util.contains_encrypted_indicator(header_line, schema_defs.header.get_field_by_name("encryption")) + # ensure file section matches upload section program_type = header['program_type'] section = header['type'] @@ -43,7 +45,7 @@ def parse_datafile(datafile): bulk_create_errors(unsaved_parser_errors, 1, flush=True) return errors - line_errors = parse_datafile_lines(datafile, program_type, section) + line_errors = parse_datafile_lines(datafile, program_type, section, is_encrypted) errors = errors | line_errors @@ -99,7 +101,7 @@ def rollback_parser_errors(datafile): """Delete created errors in the event of a failure.""" ParserError.objects.filter(file=datafile).delete() -def parse_datafile_lines(datafile, program_type, section): +def parse_datafile_lines(datafile, program_type, section, is_encrypted): """Parse lines with appropriate schema and return errors.""" rawfile = datafile.file errors = {} @@ -161,6 +163,8 @@ def parse_datafile_lines(datafile, program_type, section): schema_manager = get_schema_manager(line, section, schema_manager_options) + schema_manager.update_encrypted_fields(is_encrypted) + records = manager_parse_line(line, schema_manager, generate_error) record_number = 0 diff --git a/tdrs-backend/tdpservice/parsers/row_schema.py b/tdrs-backend/tdpservice/parsers/row_schema.py new file mode 100644 index 0000000000..a15b1a6bc7 --- /dev/null +++ b/tdrs-backend/tdpservice/parsers/row_schema.py @@ -0,0 +1,167 @@ +"""Row schema for datafile.""" +from .models import ParserErrorCategoryChoices +from .fields import Field, value_is_empty + + +class RowSchema: + """Maps the schema for data lines.""" + + def __init__( + self, + model=dict, + preparsing_validators=[], + postparsing_validators=[], + fields=[], + quiet_preparser_errors=False + ): + self.model = model + self.preparsing_validators = preparsing_validators + self.postparsing_validators = postparsing_validators + self.fields = fields + self.quiet_preparser_errors = quiet_preparser_errors + + def _add_field(self, item, name, length, start, end, type): + """Add a field to the schema.""" + self.fields.append( + Field(item, name, type, start, end) + ) + + def add_fields(self, fields: list): + """Add multiple fields to the schema.""" + for field, length, start, end, type in fields: + self._add_field(field, length, start, end, type) + + def get_all_fields(self): + """Get all fields from the schema.""" + return self.fields + + def parse_and_validate(self, line, generate_error): + """Run all validation steps in order, and parse the given line into a record.""" + errors = [] + + # run preparsing validators + preparsing_is_valid, preparsing_errors = self.run_preparsing_validators(line, generate_error) + + if not preparsing_is_valid: + if self.quiet_preparser_errors: + return None, True, [] + return None, False, preparsing_errors + + # parse line to model + record = self.parse_line(line) + + # run field validators + fields_are_valid, field_errors = self.run_field_validators(record, generate_error) + + # run postparsing validators + postparsing_is_valid, postparsing_errors = self.run_postparsing_validators(record, generate_error) + + is_valid = fields_are_valid and postparsing_is_valid + errors = field_errors + postparsing_errors + + return record, is_valid, errors + + def run_preparsing_validators(self, line, generate_error): + """Run each of the `preparsing_validator` functions in the schema against the un-parsed line.""" + is_valid = True + errors = [] + + for validator in self.preparsing_validators: + validator_is_valid, validator_error = validator(line) + is_valid = False if not validator_is_valid else is_valid + + if validator_error and not self.quiet_preparser_errors: + errors.append( + generate_error( + schema=self, + error_category=ParserErrorCategoryChoices.PRE_CHECK, + error_message=validator_error, + record=None, + field=None + ) + ) + + return is_valid, errors + + def parse_line(self, line): + """Create a model for the line based on the schema.""" + record = self.model() + + for field in self.fields: + value = field.parse_value(line) + + if value is not None: + if isinstance(record, dict): + record[field.name] = value + else: + setattr(record, field.name, value) + + return record + + def run_field_validators(self, instance, generate_error): + """Run all validators for each field in the parsed model.""" + is_valid = True + errors = [] + + for field in self.fields: + value = None + if isinstance(instance, dict): + value = instance.get(field.name, None) + else: + value = getattr(instance, field.name, None) + + if field.required and not value_is_empty(value, field.endIndex-field.startIndex): + for validator in field.validators: + validator_is_valid, validator_error = validator(value) + is_valid = False if not validator_is_valid else is_valid + if validator_error: + errors.append( + generate_error( + schema=self, + error_category=ParserErrorCategoryChoices.FIELD_VALUE, + error_message=validator_error, + record=instance, + field=field + ) + ) + elif field.required: + is_valid = False + errors.append( + generate_error( + schema=self, + error_category=ParserErrorCategoryChoices.FIELD_VALUE, + error_message=f"{field.name} is required but a value was not provided.", + record=instance, + field=field + ) + ) + + return is_valid, errors + + def run_postparsing_validators(self, instance, generate_error): + """Run each of the `postparsing_validator` functions against the parsed model.""" + is_valid = True + errors = [] + + for validator in self.postparsing_validators: + validator_is_valid, validator_error = validator(instance) + is_valid = False if not validator_is_valid else is_valid + if validator_error: + errors.append( + generate_error( + schema=self, + error_category=ParserErrorCategoryChoices.VALUE_CONSISTENCY, + error_message=validator_error, + record=instance, + field=None + ) + ) + + return is_valid, errors + + def get_field_by_name(self, name): + """Get field by it's name.""" + for field in self.fields: + if field.name == name: + return field + return None diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/header.py b/tdrs-backend/tdpservice/parsers/schema_defs/header.py index 3183142521..4d2f3da9db 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/header.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/header.py @@ -1,7 +1,8 @@ """Schema for HEADER row of all submission types.""" -from ..util import RowSchema, Field +from ..fields import Field +from ..row_schema import RowSchema from .. import validators @@ -41,7 +42,7 @@ validators.oneOf(['1', '2']) ]), Field(item="9", name='encryption', type='string', startIndex=21, endIndex=22, required=False, validators=[ - validators.matches('E') + validators.oneOf([' ', 'E']) ]), Field(item="10", name='update', type='string', startIndex=22, endIndex=23, required=True, validators=[ validators.oneOf(['N', 'D', 'U']) diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py index 303bcbf4b4..2336b80474 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py @@ -1,7 +1,9 @@ """Schema for SSP M1 record type.""" -from ...util import RowSchema, Field, SchemaManager +from ...util import SchemaManager +from ...fields import Field +from ...row_schema import RowSchema from ... import validators from tdpservice.search_indexes.models.ssp import SSP_M1 diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py index 9f0671ac72..067551b284 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py @@ -1,7 +1,9 @@ """Schema for SSP M1 record type.""" -from ...util import RowSchema, Field, SchemaManager +from ...util import SchemaManager +from ...fields import EncryptedField, Field, ssp_ssn_decryption_func +from ...row_schema import RowSchema from ... import validators from tdpservice.search_indexes.models.ssp import SSP_M2 @@ -27,8 +29,8 @@ required=True, validators=[]), Field(item=6, name='DATE_OF_BIRTH', type='string', startIndex=21, endIndex=29, required=True, validators=[]), - Field(item=7, name='SSN', type='string', startIndex=29, endIndex=38, - required=True, validators=[]), + EncryptedField(decryption_func=ssp_ssn_decryption_func, item=7, name='SSN', type='string', startIndex=29, + endIndex=38, required=True, validators=[]), Field(item=8, name='RACE_HISPANIC', type='number', startIndex=38, endIndex=39, required=True, validators=[]), Field(item=9, name='RACE_AMER_INDIAN', type='number', startIndex=39, endIndex=40, diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py index 14b0bb4d12..290c43cd0a 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m3.py @@ -1,7 +1,9 @@ """Schema for SSP M1 record type.""" -from ...util import SchemaManager, RowSchema, Field +from ...util import SchemaManager +from ...fields import EncryptedField, Field, ssp_ssn_decryption_func +from ...row_schema import RowSchema from ... import validators from tdpservice.search_indexes.models.ssp import SSP_M3 @@ -22,8 +24,8 @@ required=True, validators=[]), Field(item="61", name='DATE_OF_BIRTH', type='string', startIndex=20, endIndex=28, required=True, validators=[]), - Field(item="62", name='SSN', type='string', startIndex=28, endIndex=37, - required=True, validators=[]), + EncryptedField(decryption_func=ssp_ssn_decryption_func, item="62", name='SSN', type='string', startIndex=28, + endIndex=37, required=True, validators=[]), Field(item="63A", name='RACE_HISPANIC', type='number', startIndex=37, endIndex=38, required=True, validators=[]), Field(item="63B", name='RACE_AMER_INDIAN', type='number', startIndex=38, endIndex=39, @@ -75,8 +77,8 @@ required=True, validators=[]), Field(item="61", name='DATE_OF_BIRTH', type='string', startIndex=61, endIndex=69, required=True, validators=[]), - Field(item="62", name='SSN', type='string', startIndex=69, endIndex=78, - required=True, validators=[]), + EncryptedField(decryption_func=ssp_ssn_decryption_func, item="62", name='SSN', type='string', startIndex=69, + endIndex=78, required=True, validators=[]), Field(item="63A", name='RACE_HISPANIC', type='number', startIndex=78, endIndex=79, required=True, validators=[]), Field(item="63B", name='RACE_AMER_INDIAN', type='number', startIndex=79, endIndex=80, diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py index c9c11c378b..12784d6dbc 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py @@ -1,7 +1,9 @@ """Schema for HEADER row of all submission types.""" -from ...util import RowSchema, Field, SchemaManager +from ...util import SchemaManager +from ...fields import Field +from ...row_schema import RowSchema from ... import validators from tdpservice.search_indexes.models.tanf import TANF_T1 diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py index b901f7b79f..c41f4151d2 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t2.py @@ -1,7 +1,9 @@ """Schema for HEADER row of all submission types.""" -from ...util import RowSchema, Field, SchemaManager +from ...util import SchemaManager +from ...fields import EncryptedField, Field, tanf_ssn_decryption_func +from ...row_schema import RowSchema from ... import validators from tdpservice.search_indexes.models.tanf import TANF_T2 @@ -27,8 +29,8 @@ required=True, validators=[]), Field(item=6, name='DATE_OF_BIRTH', type='number', startIndex=21, endIndex=29, required=True, validators=[]), - Field(item=7, name='SSN', type='string', startIndex=29, endIndex=38, - required=True, validators=[]), + EncryptedField(decryption_func=tanf_ssn_decryption_func, item=7, name='SSN', type='string', startIndex=29, + endIndex=38, required=True, validators=[]), Field(item=8, name='RACE_HISPANIC', type='string', startIndex=38, endIndex=39, required=True, validators=[]), Field(item=9, name='RACE_AMER_INDIAN', type='string', startIndex=39, endIndex=40, diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py index 99572e79be..779185b01d 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t3.py @@ -1,7 +1,9 @@ """Schema for HEADER row of all submission types.""" -from ...util import SchemaManager, RowSchema, Field +from ...util import SchemaManager +from ...fields import EncryptedField, Field, tanf_ssn_decryption_func +from ...row_schema import RowSchema from ... import validators from tdpservice.search_indexes.models.tanf import TANF_T3 @@ -23,8 +25,8 @@ required=True, validators=[]), Field(item="68", name='DATE_OF_BIRTH', type='number', startIndex=20, endIndex=28, required=True, validators=[]), - Field(item="69", name='SSN', type='string', startIndex=28, endIndex=37, - required=True, validators=[]), + EncryptedField(decryption_func=tanf_ssn_decryption_func, item="69", name='SSN', type='string', startIndex=28, + endIndex=37, required=True, validators=[]), Field(item="70A", name='RACE_HISPANIC', type='string', startIndex=37, endIndex=38, required=True, validators=[]), Field(item="70B", name='RACE_AMER_INDIAN', type='string', startIndex=38, endIndex=39, @@ -76,8 +78,8 @@ required=True, validators=[]), Field(item="68", name='DATE_OF_BIRTH', type='number', startIndex=61, endIndex=69, required=True, validators=[]), - Field(item="69", name='SSN', type='string', startIndex=69, endIndex=78, - required=True, validators=[]), + EncryptedField(decryption_func=tanf_ssn_decryption_func, item="69", name='SSN', type='string', startIndex=69, + endIndex=78, required=True, validators=[]), Field(item="70A", name='RACE_HISPANIC', type='string', startIndex=78, endIndex=79, required=True, validators=[]), Field(item="70B", name='RACE_AMER_INDIAN', type='string', startIndex=79, endIndex=80, diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/trailer.py b/tdrs-backend/tdpservice/parsers/schema_defs/trailer.py index 086c83e85a..46a4405082 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/trailer.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/trailer.py @@ -1,7 +1,8 @@ """Schema for TRAILER row of all submission types.""" -from ..util import RowSchema, Field +from ..fields import Field +from ..row_schema import RowSchema from .. import validators diff --git a/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.FTP1.TS06 b/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.FTP1.TS06 index db9a8490de..bb3a1bcca9 100644 --- a/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.FTP1.TS06 +++ b/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.FTP1.TS06 @@ -1,4 +1,4 @@ -HEADER20204A06 TAN1 N +HEADER20204A06 TAN1EN T12020101111111111223003403361110213120000300000000000008730010000000000000000000000000000000000222222000000002229012 T2202010111111111121219740114WTTTTTY@W2221222222221012212110014722011400000000000000000000000000000000000000000000000000000000000000000000000000000000000291 T320201011111111112120190127WTTTT90W022212222204398100000000 diff --git a/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake b/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake index 9c1e0c2c4d..dc3b095e10 100644 --- a/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake +++ b/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake @@ -1,4 +1,4 @@ -HEADER20231A53000TAN1 N +HEADER20231A53000TAN1EN T12023011111111115424200409731120233120000300000000000004170990000000000000000000000000000000000222222000000002229022 T2202301111111111543219680723WT@BT#ZZZ2222211222222012212110421811069929000000000000000000000000000000000000000000000000000000000000999900000000000000000000 T320230111111111154120060507WT@9YZ9@@21222222206310100000000 diff --git a/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake.rollback b/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake.rollback index ee291b35b6..be3c55ae53 100644 --- a/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake.rollback +++ b/tdrs-backend/tdpservice/parsers/test/data/ADS.E2J.NDM1.TS53_fake.rollback @@ -1,4 +1,4 @@ -HEADER20231A53000TAN1 N +HEADER20231A53000TAN1EN T12023011111111115424200409731120233120000300000000000004170990000000000000000000000000000000000222222000000002229022 T2202301111111111543219680723WT@BT#ZZZ2222211222222012212110421811069929000000000000000000000000000000000000000000000000000000000000999900000000000000000000 T320230111111111154120060507WT@9YZ9@@21222222206310100000000 @@ -50020,4 +50020,4 @@ T320230111112027110120200723WTTY0WW@T22222122204398200000000120170901WTTTP9YYZ22 T12023011111202717025900402121120213110611300000000080005280070000000000000000000000000000000000222222000000002229012 T2202301111120271701219960227WTTYY00PY1222222222221012212110372323011400000000000000000000000000000000000000000000000000000000000000000000000000000000000000 HEADER20231A53000TAN1 N -TRAILER0326888 +TRAILER0326888 diff --git a/tdrs-backend/tdpservice/parsers/test/data/small_tanf_section1.txt b/tdrs-backend/tdpservice/parsers/test/data/small_tanf_section1.txt index f317ae3be3..e906c2ed36 100644 --- a/tdrs-backend/tdpservice/parsers/test/data/small_tanf_section1.txt +++ b/tdrs-backend/tdpservice/parsers/test/data/small_tanf_section1.txt @@ -1,4 +1,4 @@ -HEADER20204A06 TAN1 N +HEADER20204A06 TAN1EN T12020101111111111223003403361110213120000300000000000008730010000000000000000000000000000000000222222000000002229012 T2202010111111111121219740114WTTTTTY@W2221222222221012212110014722011400000000000000000000000000000000000000000000000000000000000000000000000000000000000291 T320201011111111112120190127WTTTT90W022212222204398100000000 diff --git a/tdrs-backend/tdpservice/parsers/test/test_util.py b/tdrs-backend/tdpservice/parsers/test/test_util.py index 4aa2d30703..b2817174ec 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_util.py +++ b/tdrs-backend/tdpservice/parsers/test/test_util.py @@ -1,7 +1,9 @@ """Test the methods of RowSchema to ensure parsing and validation work in all individual cases.""" import pytest -from ..util import SchemaManager, RowSchema, Field, value_is_empty +from ..fields import Field, value_is_empty +from ..row_schema import RowSchema +from ..util import SchemaManager def passing_validator(): diff --git a/tdrs-backend/tdpservice/parsers/util.py b/tdrs-backend/tdpservice/parsers/util.py index f1b67b8e69..7be2e9b5f8 100644 --- a/tdrs-backend/tdpservice/parsers/util.py +++ b/tdrs-backend/tdpservice/parsers/util.py @@ -1,9 +1,9 @@ """Utility file for functions shared between all parsers even preparser.""" -from .models import ParserError, ParserErrorCategoryChoices +from .models import ParserError from django.contrib.contenttypes.models import ContentType from tdpservice.data_files.models import DataFile from pathlib import Path - +from .fields import EncryptedField def create_test_datafile(filename, stt_user, stt, section='Active Case Data'): """Create a test DataFile instance with the given file attached.""" @@ -22,16 +22,6 @@ def create_test_datafile(filename, stt_user, stt, section='Active Case Data'): return datafile -def value_is_empty(value, length): - """Handle 'empty' values as field inputs.""" - empty_values = [ - ' '*length, # ' ' - '#'*length, # '#####' - ] - - return value is None or value in empty_values - - def generate_parser_error(datafile, line_number, schema, error_category, error_message, record=None, field=None): """Create and return a ParserError using args.""" return ParserError( @@ -68,201 +58,6 @@ def generate(schema, error_category, error_message, record=None, field=None): return generate -class Field: - """Provides a mapping between a field name and its position.""" - - def __init__(self, item, name, type, startIndex, endIndex, required=True, validators=[]): - self.item = item - self.name = name - self.type = type - self.startIndex = startIndex - self.endIndex = endIndex - self.required = required - self.validators = validators - - def create(self, item, name, length, start, end, type): - """Create a new field.""" - return Field(item, name, type, length, start, end) - - def __repr__(self): - """Return a string representation of the field.""" - return f"{self.name}({self.startIndex}-{self.endIndex})" - - def parse_value(self, line): - """Parse the value for a field given a line, startIndex, endIndex, and field type.""" - value = line[self.startIndex:self.endIndex] - - if value_is_empty(value, self.endIndex-self.startIndex): - return None - - match self.type: - case 'number': - try: - value = int(value) - return value - except ValueError: - return None - case 'string': - return value - - -class RowSchema: - """Maps the schema for data lines.""" - - def __init__( - self, - model=dict, - preparsing_validators=[], - postparsing_validators=[], - fields=[], - quiet_preparser_errors=False - ): - self.model = model - self.preparsing_validators = preparsing_validators - self.postparsing_validators = postparsing_validators - self.fields = fields - self.quiet_preparser_errors = quiet_preparser_errors - - def _add_field(self, item, name, length, start, end, type): - """Add a field to the schema.""" - self.fields.append( - Field(item, name, type, start, end) - ) - - def add_fields(self, fields: list): - """Add multiple fields to the schema.""" - for field, length, start, end, type in fields: - self._add_field(field, length, start, end, type) - - def get_all_fields(self): - """Get all fields from the schema.""" - return self.fields - - def parse_and_validate(self, line, generate_error): - """Run all validation steps in order, and parse the given line into a record.""" - errors = [] - - # run preparsing validators - preparsing_is_valid, preparsing_errors = self.run_preparsing_validators(line, generate_error) - - if not preparsing_is_valid: - if self.quiet_preparser_errors: - return None, True, [] - return None, False, preparsing_errors - - # parse line to model - record = self.parse_line(line) - - # run field validators - fields_are_valid, field_errors = self.run_field_validators(record, generate_error) - - # run postparsing validators - postparsing_is_valid, postparsing_errors = self.run_postparsing_validators(record, generate_error) - - is_valid = fields_are_valid and postparsing_is_valid - errors = field_errors + postparsing_errors - - return record, is_valid, errors - - def run_preparsing_validators(self, line, generate_error): - """Run each of the `preparsing_validator` functions in the schema against the un-parsed line.""" - is_valid = True - errors = [] - - for validator in self.preparsing_validators: - validator_is_valid, validator_error = validator(line) - is_valid = False if not validator_is_valid else is_valid - - if validator_error and not self.quiet_preparser_errors: - errors.append( - generate_error( - schema=self, - error_category=ParserErrorCategoryChoices.PRE_CHECK, - error_message=validator_error, - record=None, - field=None - ) - ) - - return is_valid, errors - - def parse_line(self, line): - """Create a model for the line based on the schema.""" - record = self.model() - - for field in self.fields: - value = field.parse_value(line) - - if value is not None: - if isinstance(record, dict): - record[field.name] = value - else: - setattr(record, field.name, value) - - return record - - def run_field_validators(self, instance, generate_error): - """Run all validators for each field in the parsed model.""" - is_valid = True - errors = [] - - for field in self.fields: - value = None - if isinstance(instance, dict): - value = instance.get(field.name, None) - else: - value = getattr(instance, field.name, None) - - if field.required and not value_is_empty(value, field.endIndex-field.startIndex): - for validator in field.validators: - validator_is_valid, validator_error = validator(value) - is_valid = False if not validator_is_valid else is_valid - if validator_error: - errors.append( - generate_error( - schema=self, - error_category=ParserErrorCategoryChoices.FIELD_VALUE, - error_message=validator_error, - record=instance, - field=field - ) - ) - elif field.required: - is_valid = False - errors.append( - generate_error( - schema=self, - error_category=ParserErrorCategoryChoices.FIELD_VALUE, - error_message=f"{field.name} is required but a value was not provided.", - record=instance, - field=field - ) - ) - - return is_valid, errors - - def run_postparsing_validators(self, instance, generate_error): - """Run each of the `postparsing_validator` functions against the parsed model.""" - is_valid = True - errors = [] - - for validator in self.postparsing_validators: - validator_is_valid, validator_error = validator(instance) - is_valid = False if not validator_is_valid else is_valid - if validator_error: - errors.append( - generate_error( - schema=self, - error_category=ParserErrorCategoryChoices.VALUE_CONSISTENCY, - error_message=validator_error, - record=instance, - field=None - ) - ) - - return is_valid, errors - - class SchemaManager: """Manages one or more RowSchema's and runs all parsers and validators.""" @@ -278,3 +73,16 @@ def parse_and_validate(self, line, generate_error): records.append((record, is_valid, errors)) return records + + def update_encrypted_fields(self, is_encrypted): + """Update whether schema fields are encrypted or not.""" + for schema in self.schemas: + for field in schema.fields: + if type(field) == EncryptedField: + field.is_encrypted = is_encrypted + +def contains_encrypted_indicator(line, encryption_field): + """Determine if line contains encryption indicator.""" + if encryption_field is not None: + return encryption_field.parse_value(line) == "E" + return False