Merge branch 'develop' into ghudson/reduce-workflows

raft-tech · Aug 1, 2023 · 8af6b10 · 8af6b10
2 parents 04eedc9 + 51fd924
commit 8af6b10
Show file tree

Hide file tree

Showing 21 changed files with 328 additions and 244 deletions.
diff --git a/scripts/deploy-backend.sh b/scripts/deploy-backend.sh
@@ -89,9 +89,9 @@ update_backend()
 
         # Do a zero downtime deploy.  This requires enough memory for
         # two apps to exist in the org/space at one time.
-        cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml  --strategy rolling || exit 1
+        cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml -t 180 --strategy rolling || exit 1
     else
-        cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml
+        cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml -t 180
         # set up JWT key if needed
         if cf e "$CGAPPNAME_BACKEND" | grep -q JWT_KEY ; then
             echo jwt cert already created

diff --git a/tdrs-backend/Dockerfile b/tdrs-backend/Dockerfile
@@ -28,7 +28,7 @@ RUN groupadd -g ${gid} ${group} && useradd -u ${uid} -g ${group} -s /bin/sh ${us
 
 RUN chown -R tdpuser /tdpapp && chmod u+x gunicorn_start.sh wait_for_services.sh
 
-CMD ["./gunicorn_start.sh"]
+#CMD ["./gunicorn_start.sh"]
 # if the container crashes/loops, we can shell into it by doing the following:
 # docker ps -a # to get the container id
 # docker commit <container id> debug/<new image name>

diff --git a/tdrs-backend/docker-compose.yml b/tdrs-backend/docker-compose.yml
@@ -93,7 +93,10 @@ services:
     build: .
     command: >
       bash -c "./wait_for_services.sh &&
-               ./gunicorn_start.sh && celery -A tdpservice.settings worker -l info"
+      ./manage.py makemigrations &&
+      ./manage.py migrate &&
+      ./manage.py populate_stts &&
+      ./gunicorn_start.sh && celery -A tdpservice.settings worker -l info"
     ports:
       - "5555:5555"
     depends_on:

diff --git a/tdrs-backend/gunicorn_start.sh b/tdrs-backend/gunicorn_start.sh
@@ -14,10 +14,9 @@ fi
 
 #
 echo "Applying database migrations"
-python manage.py makemigrations
-python manage.py migrate
-python manage.py populate_stts
-python manage.py collectstatic --noinput
+#python manage.py migrate
+#python manage.py populate_stts
+#python manage.py collectstatic --noinput
 
 celery -A tdpservice.settings worker -c 1 &
 sleep 5

diff --git a/tdrs-backend/tdpservice/parsers/fields.py b/tdrs-backend/tdpservice/parsers/fields.py
@@ -0,0 +1,87 @@
+"""Datafile field representations."""
+
+def value_is_empty(value, length):
+    """Handle 'empty' values as field inputs."""
+    empty_values = [
+        ' '*length,  # '     '
+        '#'*length,  # '#####'
+    ]
+
+    return value is None or value in empty_values
+
+
+class Field:
+    """Provides a mapping between a field name and its position."""
+
+    def __init__(self, item, name, type, startIndex, endIndex, required=True, validators=[]):
+        self.item = item
+        self.name = name
+        self.type = type
+        self.startIndex = startIndex
+        self.endIndex = endIndex
+        self.required = required
+        self.validators = validators
+
+    def create(self, item, name, length, start, end, type):
+        """Create a new field."""
+        return Field(item, name, type, length, start, end)
+
+    def __repr__(self):
+        """Return a string representation of the field."""
+        return f"{self.name}({self.startIndex}-{self.endIndex})"
+
+    def parse_value(self, line):
+        """Parse the value for a field given a line, startIndex, endIndex, and field type."""
+        value = line[self.startIndex:self.endIndex]
+
+        if value_is_empty(value, self.endIndex-self.startIndex):
+            return None
+
+        match self.type:
+            case 'number':
+                try:
+                    value = int(value)
+                    return value
+                except ValueError:
+                    return None
+            case 'string':
+                return value
+
+def tanf_ssn_decryption_func(value, is_encrypted):
+    """Decrypt TANF SSN value."""
+    if is_encrypted:
+        decryption_dict = {"@": "1", "9": "2", "Z": "3", "P": "4", "0": "5",
+                           "#": "6", "Y": "7", "B": "8", "W": "9", "T": "0"}
+        decryption_table = str.maketrans(decryption_dict)
+        return value.translate(decryption_table)
+    return value
+
+def ssp_ssn_decryption_func(value, is_encrypted):
+    """Decrypt SSP SSN value."""
+    if is_encrypted:
+        decryption_dict = {"@": "1", "9": "2", "Z": "3", "P": "4", "0": "5",
+                           "#": "6", "Y": "7", "B": "8", "W": "9", "T": "0"}
+        decryption_table = str.maketrans(decryption_dict)
+        return value.translate(decryption_table)
+    return value
+
+class EncryptedField(Field):
+    """Represents an encrypted field and its position."""
+
+    def __init__(self, decryption_func, item, name, type, startIndex, endIndex, required=True, validators=[]):
+        super().__init__(item, name, type, startIndex, endIndex, required, validators)
+        self.decryption_func = decryption_func
+        self.is_encrypted = False
+
+    def parse_value(self, line):
+        """Parse and decrypt the value for a field given a line, startIndex, endIndex, and field type."""
+        value = line[self.startIndex:self.endIndex]
+
+        if value_is_empty(value, self.endIndex-self.startIndex):
+            return None
+
+        match self.type:
+            case 'string':
+                return self.decryption_func(value, self.is_encrypted)
+            case _:
+                return None
diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py
@@ -27,6 +27,8 @@ def parse_datafile(datafile):
         bulk_create_errors({1: header_errors}, 1, flush=True)
         return errors
 
+    is_encrypted = util.contains_encrypted_indicator(header_line, schema_defs.header.get_field_by_name("encryption"))
+
     # ensure file section matches upload section
     program_type = header['program_type']
     section = header['type']
@@ -43,7 +45,7 @@ def parse_datafile(datafile):
         bulk_create_errors(unsaved_parser_errors, 1, flush=True)
         return errors
 
-    line_errors = parse_datafile_lines(datafile, program_type, section)
+    line_errors = parse_datafile_lines(datafile, program_type, section, is_encrypted)
 
     errors = errors | line_errors
 
@@ -99,7 +101,7 @@ def rollback_parser_errors(datafile):
     """Delete created errors in the event of a failure."""
     ParserError.objects.filter(file=datafile).delete()
 
-def parse_datafile_lines(datafile, program_type, section):
+def parse_datafile_lines(datafile, program_type, section, is_encrypted):
     """Parse lines with appropriate schema and return errors."""
     rawfile = datafile.file
     errors = {}
@@ -161,6 +163,8 @@ def parse_datafile_lines(datafile, program_type, section):
 
         schema_manager = get_schema_manager(line, section, schema_manager_options)
 
+        schema_manager.update_encrypted_fields(is_encrypted)
+
         records = manager_parse_line(line, schema_manager, generate_error)
 
         record_number = 0

diff --git a/tdrs-backend/tdpservice/parsers/row_schema.py b/tdrs-backend/tdpservice/parsers/row_schema.py
@@ -0,0 +1,167 @@
+"""Row schema for datafile."""
+from .models import ParserErrorCategoryChoices
+from .fields import Field,  value_is_empty
+
+
+class RowSchema:
+    """Maps the schema for data lines."""
+
+    def __init__(
+            self,
+            model=dict,
+            preparsing_validators=[],
+            postparsing_validators=[],
+            fields=[],
+            quiet_preparser_errors=False
+            ):
+        self.model = model
+        self.preparsing_validators = preparsing_validators
+        self.postparsing_validators = postparsing_validators
+        self.fields = fields
+        self.quiet_preparser_errors = quiet_preparser_errors
+
+    def _add_field(self, item, name, length, start, end, type):
+        """Add a field to the schema."""
+        self.fields.append(
+            Field(item, name, type, start, end)
+        )
+
+    def add_fields(self, fields: list):
+        """Add multiple fields to the schema."""
+        for field, length, start, end, type in fields:
+            self._add_field(field, length, start, end, type)
+
+    def get_all_fields(self):
+        """Get all fields from the schema."""
+        return self.fields
+
+    def parse_and_validate(self, line, generate_error):
+        """Run all validation steps in order, and parse the given line into a record."""
+        errors = []
+
+        # run preparsing validators
+        preparsing_is_valid, preparsing_errors = self.run_preparsing_validators(line, generate_error)
+
+        if not preparsing_is_valid:
+            if self.quiet_preparser_errors:
+                return None, True, []
+            return None, False, preparsing_errors
+
+        # parse line to model
+        record = self.parse_line(line)
+
+        # run field validators
+        fields_are_valid, field_errors = self.run_field_validators(record, generate_error)
+
+        # run postparsing validators
+        postparsing_is_valid, postparsing_errors = self.run_postparsing_validators(record, generate_error)
+
+        is_valid = fields_are_valid and postparsing_is_valid
+        errors = field_errors + postparsing_errors
+
+        return record, is_valid, errors
+
+    def run_preparsing_validators(self, line, generate_error):
+        """Run each of the `preparsing_validator` functions in the schema against the un-parsed line."""
+        is_valid = True
+        errors = []
+
+        for validator in self.preparsing_validators:
+            validator_is_valid, validator_error = validator(line)
+            is_valid = False if not validator_is_valid else is_valid
+
+            if validator_error and not self.quiet_preparser_errors:
+                errors.append(
+                    generate_error(
+                        schema=self,
+                        error_category=ParserErrorCategoryChoices.PRE_CHECK,
+                        error_message=validator_error,
+                        record=None,
+                        field=None
+                    )
+                )
+
+        return is_valid, errors
+
+    def parse_line(self, line):
+        """Create a model for the line based on the schema."""
+        record = self.model()
+
+        for field in self.fields:
+            value = field.parse_value(line)
+
+            if value is not None:
+                if isinstance(record, dict):
+                    record[field.name] = value
+                else:
+                    setattr(record, field.name, value)
+
+        return record
+
+    def run_field_validators(self, instance, generate_error):
+        """Run all validators for each field in the parsed model."""
+        is_valid = True
+        errors = []
+
+        for field in self.fields:
+            value = None
+            if isinstance(instance, dict):
+                value = instance.get(field.name, None)
+            else:
+                value = getattr(instance, field.name, None)
+
+            if field.required and not value_is_empty(value, field.endIndex-field.startIndex):
+                for validator in field.validators:
+                    validator_is_valid, validator_error = validator(value)
+                    is_valid = False if not validator_is_valid else is_valid
+                    if validator_error:
+                        errors.append(
+                            generate_error(
+                                schema=self,
+                                error_category=ParserErrorCategoryChoices.FIELD_VALUE,
+                                error_message=validator_error,
+                                record=instance,
+                                field=field
+                            )
+                        )
+            elif field.required:
+                is_valid = False
+                errors.append(
+                    generate_error(
+                        schema=self,
+                        error_category=ParserErrorCategoryChoices.FIELD_VALUE,
+                        error_message=f"{field.name} is required but a value was not provided.",
+                        record=instance,
+                        field=field
+                    )
+                )
+
+        return is_valid, errors
+
+    def run_postparsing_validators(self, instance, generate_error):
+        """Run each of the `postparsing_validator` functions against the parsed model."""
+        is_valid = True
+        errors = []
+
+        for validator in self.postparsing_validators:
+            validator_is_valid, validator_error = validator(instance)
+            is_valid = False if not validator_is_valid else is_valid
+            if validator_error:
+                errors.append(
+                    generate_error(
+                        schema=self,
+                        error_category=ParserErrorCategoryChoices.VALUE_CONSISTENCY,
+                        error_message=validator_error,
+                        record=instance,
+                        field=None
+                    )
+                )
+
+        return is_valid, errors
+
+    def get_field_by_name(self, name):
+        """Get field by it's name."""
+        for field in self.fields:
+            if field.name == name:
+                return field
+        return None
diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/header.py b/tdrs-backend/tdpservice/parsers/schema_defs/header.py
@@ -1,7 +1,8 @@
 """Schema for HEADER row of all submission types."""
 
 
-from ..util import RowSchema, Field
+from ..fields import Field
+from ..row_schema import RowSchema
 from .. import validators
 
 
@@ -41,7 +42,7 @@
             validators.oneOf(['1', '2'])
         ]),
         Field(item="9", name='encryption', type='string', startIndex=21, endIndex=22, required=False, validators=[
-            validators.matches('E')
+            validators.oneOf([' ', 'E'])
         ]),
         Field(item="10", name='update', type='string', startIndex=22, endIndex=23, required=True, validators=[
             validators.oneOf(['N', 'D', 'U'])

diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py
@@ -1,7 +1,9 @@
 """Schema for SSP M1 record type."""
 
 
-from ...util import RowSchema, Field, SchemaManager
+from ...util import SchemaManager
+from ...fields import Field
+from ...row_schema import RowSchema
 from ... import validators
 from tdpservice.search_indexes.models.ssp import SSP_M1
 

diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py
@@ -1,7 +1,9 @@
 """Schema for SSP M1 record type."""
 
 
-from ...util import RowSchema, Field, SchemaManager
+from ...util import SchemaManager
+from ...fields import EncryptedField, Field, ssp_ssn_decryption_func
+from ...row_schema import RowSchema
 from ... import validators
 from tdpservice.search_indexes.models.ssp import SSP_M2
 
@@ -27,8 +29,8 @@
                     required=True, validators=[]),
               Field(item=6, name='DATE_OF_BIRTH', type='string', startIndex=21, endIndex=29,
                     required=True, validators=[]),
-              Field(item=7, name='SSN', type='string', startIndex=29, endIndex=38,
-                    required=True, validators=[]),
+              EncryptedField(decryption_func=ssp_ssn_decryption_func, item=7, name='SSN', type='string', startIndex=29,
+                             endIndex=38, required=True, validators=[]),
               Field(item=8, name='RACE_HISPANIC', type='number', startIndex=38, endIndex=39,
                     required=True, validators=[]),
               Field(item=9, name='RACE_AMER_INDIAN', type='number', startIndex=39, endIndex=40,