Skip to content

Commit

Permalink
Merge branch 'develop' into ghudson/reduce-workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-jameson authored Aug 1, 2023
2 parents 04eedc9 + 51fd924 commit 8af6b10
Show file tree
Hide file tree
Showing 21 changed files with 328 additions and 244 deletions.
4 changes: 2 additions & 2 deletions scripts/deploy-backend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ update_backend()

# Do a zero downtime deploy. This requires enough memory for
# two apps to exist in the org/space at one time.
cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml --strategy rolling || exit 1
cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml -t 180 --strategy rolling || exit 1
else
cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml
cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml -t 180
# set up JWT key if needed
if cf e "$CGAPPNAME_BACKEND" | grep -q JWT_KEY ; then
echo jwt cert already created
Expand Down
2 changes: 1 addition & 1 deletion tdrs-backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ RUN groupadd -g ${gid} ${group} && useradd -u ${uid} -g ${group} -s /bin/sh ${us

RUN chown -R tdpuser /tdpapp && chmod u+x gunicorn_start.sh wait_for_services.sh

CMD ["./gunicorn_start.sh"]
#CMD ["./gunicorn_start.sh"]
# if the container crashes/loops, we can shell into it by doing the following:
# docker ps -a # to get the container id
# docker commit <container id> debug/<new image name>
Expand Down
5 changes: 4 additions & 1 deletion tdrs-backend/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ services:
build: .
command: >
bash -c "./wait_for_services.sh &&
./gunicorn_start.sh && celery -A tdpservice.settings worker -l info"
./manage.py makemigrations &&
./manage.py migrate &&
./manage.py populate_stts &&
./gunicorn_start.sh && celery -A tdpservice.settings worker -l info"
ports:
- "5555:5555"
depends_on:
Expand Down
7 changes: 3 additions & 4 deletions tdrs-backend/gunicorn_start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@ fi

#
echo "Applying database migrations"
python manage.py makemigrations
python manage.py migrate
python manage.py populate_stts
python manage.py collectstatic --noinput
#python manage.py migrate
#python manage.py populate_stts
#python manage.py collectstatic --noinput

celery -A tdpservice.settings worker -c 1 &
sleep 5
Expand Down
87 changes: 87 additions & 0 deletions tdrs-backend/tdpservice/parsers/fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Datafile field representations."""

def value_is_empty(value, length):
"""Handle 'empty' values as field inputs."""
empty_values = [
' '*length, # ' '
'#'*length, # '#####'
]

return value is None or value in empty_values


class Field:
"""Provides a mapping between a field name and its position."""

def __init__(self, item, name, type, startIndex, endIndex, required=True, validators=[]):
self.item = item
self.name = name
self.type = type
self.startIndex = startIndex
self.endIndex = endIndex
self.required = required
self.validators = validators

def create(self, item, name, length, start, end, type):
"""Create a new field."""
return Field(item, name, type, length, start, end)

def __repr__(self):
"""Return a string representation of the field."""
return f"{self.name}({self.startIndex}-{self.endIndex})"

def parse_value(self, line):
"""Parse the value for a field given a line, startIndex, endIndex, and field type."""
value = line[self.startIndex:self.endIndex]

if value_is_empty(value, self.endIndex-self.startIndex):
return None

match self.type:
case 'number':
try:
value = int(value)
return value
except ValueError:
return None
case 'string':
return value

def tanf_ssn_decryption_func(value, is_encrypted):
"""Decrypt TANF SSN value."""
if is_encrypted:
decryption_dict = {"@": "1", "9": "2", "Z": "3", "P": "4", "0": "5",
"#": "6", "Y": "7", "B": "8", "W": "9", "T": "0"}
decryption_table = str.maketrans(decryption_dict)
return value.translate(decryption_table)
return value

def ssp_ssn_decryption_func(value, is_encrypted):
"""Decrypt SSP SSN value."""
if is_encrypted:
decryption_dict = {"@": "1", "9": "2", "Z": "3", "P": "4", "0": "5",
"#": "6", "Y": "7", "B": "8", "W": "9", "T": "0"}
decryption_table = str.maketrans(decryption_dict)
return value.translate(decryption_table)
return value

class EncryptedField(Field):
"""Represents an encrypted field and its position."""

def __init__(self, decryption_func, item, name, type, startIndex, endIndex, required=True, validators=[]):
super().__init__(item, name, type, startIndex, endIndex, required, validators)
self.decryption_func = decryption_func
self.is_encrypted = False

def parse_value(self, line):
"""Parse and decrypt the value for a field given a line, startIndex, endIndex, and field type."""
value = line[self.startIndex:self.endIndex]

if value_is_empty(value, self.endIndex-self.startIndex):
return None

match self.type:
case 'string':
return self.decryption_func(value, self.is_encrypted)
case _:
return None
8 changes: 6 additions & 2 deletions tdrs-backend/tdpservice/parsers/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def parse_datafile(datafile):
bulk_create_errors({1: header_errors}, 1, flush=True)
return errors

is_encrypted = util.contains_encrypted_indicator(header_line, schema_defs.header.get_field_by_name("encryption"))

# ensure file section matches upload section
program_type = header['program_type']
section = header['type']
Expand All @@ -43,7 +45,7 @@ def parse_datafile(datafile):
bulk_create_errors(unsaved_parser_errors, 1, flush=True)
return errors

line_errors = parse_datafile_lines(datafile, program_type, section)
line_errors = parse_datafile_lines(datafile, program_type, section, is_encrypted)

errors = errors | line_errors

Expand Down Expand Up @@ -99,7 +101,7 @@ def rollback_parser_errors(datafile):
"""Delete created errors in the event of a failure."""
ParserError.objects.filter(file=datafile).delete()

def parse_datafile_lines(datafile, program_type, section):
def parse_datafile_lines(datafile, program_type, section, is_encrypted):
"""Parse lines with appropriate schema and return errors."""
rawfile = datafile.file
errors = {}
Expand Down Expand Up @@ -161,6 +163,8 @@ def parse_datafile_lines(datafile, program_type, section):

schema_manager = get_schema_manager(line, section, schema_manager_options)

schema_manager.update_encrypted_fields(is_encrypted)

records = manager_parse_line(line, schema_manager, generate_error)

record_number = 0
Expand Down
167 changes: 167 additions & 0 deletions tdrs-backend/tdpservice/parsers/row_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""Row schema for datafile."""
from .models import ParserErrorCategoryChoices
from .fields import Field, value_is_empty


class RowSchema:
"""Maps the schema for data lines."""

def __init__(
self,
model=dict,
preparsing_validators=[],
postparsing_validators=[],
fields=[],
quiet_preparser_errors=False
):
self.model = model
self.preparsing_validators = preparsing_validators
self.postparsing_validators = postparsing_validators
self.fields = fields
self.quiet_preparser_errors = quiet_preparser_errors

def _add_field(self, item, name, length, start, end, type):
"""Add a field to the schema."""
self.fields.append(
Field(item, name, type, start, end)
)

def add_fields(self, fields: list):
"""Add multiple fields to the schema."""
for field, length, start, end, type in fields:
self._add_field(field, length, start, end, type)

def get_all_fields(self):
"""Get all fields from the schema."""
return self.fields

def parse_and_validate(self, line, generate_error):
"""Run all validation steps in order, and parse the given line into a record."""
errors = []

# run preparsing validators
preparsing_is_valid, preparsing_errors = self.run_preparsing_validators(line, generate_error)

if not preparsing_is_valid:
if self.quiet_preparser_errors:
return None, True, []
return None, False, preparsing_errors

# parse line to model
record = self.parse_line(line)

# run field validators
fields_are_valid, field_errors = self.run_field_validators(record, generate_error)

# run postparsing validators
postparsing_is_valid, postparsing_errors = self.run_postparsing_validators(record, generate_error)

is_valid = fields_are_valid and postparsing_is_valid
errors = field_errors + postparsing_errors

return record, is_valid, errors

def run_preparsing_validators(self, line, generate_error):
"""Run each of the `preparsing_validator` functions in the schema against the un-parsed line."""
is_valid = True
errors = []

for validator in self.preparsing_validators:
validator_is_valid, validator_error = validator(line)
is_valid = False if not validator_is_valid else is_valid

if validator_error and not self.quiet_preparser_errors:
errors.append(
generate_error(
schema=self,
error_category=ParserErrorCategoryChoices.PRE_CHECK,
error_message=validator_error,
record=None,
field=None
)
)

return is_valid, errors

def parse_line(self, line):
"""Create a model for the line based on the schema."""
record = self.model()

for field in self.fields:
value = field.parse_value(line)

if value is not None:
if isinstance(record, dict):
record[field.name] = value
else:
setattr(record, field.name, value)

return record

def run_field_validators(self, instance, generate_error):
"""Run all validators for each field in the parsed model."""
is_valid = True
errors = []

for field in self.fields:
value = None
if isinstance(instance, dict):
value = instance.get(field.name, None)
else:
value = getattr(instance, field.name, None)

if field.required and not value_is_empty(value, field.endIndex-field.startIndex):
for validator in field.validators:
validator_is_valid, validator_error = validator(value)
is_valid = False if not validator_is_valid else is_valid
if validator_error:
errors.append(
generate_error(
schema=self,
error_category=ParserErrorCategoryChoices.FIELD_VALUE,
error_message=validator_error,
record=instance,
field=field
)
)
elif field.required:
is_valid = False
errors.append(
generate_error(
schema=self,
error_category=ParserErrorCategoryChoices.FIELD_VALUE,
error_message=f"{field.name} is required but a value was not provided.",
record=instance,
field=field
)
)

return is_valid, errors

def run_postparsing_validators(self, instance, generate_error):
"""Run each of the `postparsing_validator` functions against the parsed model."""
is_valid = True
errors = []

for validator in self.postparsing_validators:
validator_is_valid, validator_error = validator(instance)
is_valid = False if not validator_is_valid else is_valid
if validator_error:
errors.append(
generate_error(
schema=self,
error_category=ParserErrorCategoryChoices.VALUE_CONSISTENCY,
error_message=validator_error,
record=instance,
field=None
)
)

return is_valid, errors

def get_field_by_name(self, name):
"""Get field by it's name."""
for field in self.fields:
if field.name == name:
return field
return None
5 changes: 3 additions & 2 deletions tdrs-backend/tdpservice/parsers/schema_defs/header.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""Schema for HEADER row of all submission types."""


from ..util import RowSchema, Field
from ..fields import Field
from ..row_schema import RowSchema
from .. import validators


Expand Down Expand Up @@ -41,7 +42,7 @@
validators.oneOf(['1', '2'])
]),
Field(item="9", name='encryption', type='string', startIndex=21, endIndex=22, required=False, validators=[
validators.matches('E')
validators.oneOf([' ', 'E'])
]),
Field(item="10", name='update', type='string', startIndex=22, endIndex=23, required=True, validators=[
validators.oneOf(['N', 'D', 'U'])
Expand Down
4 changes: 3 additions & 1 deletion tdrs-backend/tdpservice/parsers/schema_defs/ssp/m1.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Schema for SSP M1 record type."""


from ...util import RowSchema, Field, SchemaManager
from ...util import SchemaManager
from ...fields import Field
from ...row_schema import RowSchema
from ... import validators
from tdpservice.search_indexes.models.ssp import SSP_M1

Expand Down
8 changes: 5 additions & 3 deletions tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Schema for SSP M1 record type."""


from ...util import RowSchema, Field, SchemaManager
from ...util import SchemaManager
from ...fields import EncryptedField, Field, ssp_ssn_decryption_func
from ...row_schema import RowSchema
from ... import validators
from tdpservice.search_indexes.models.ssp import SSP_M2

Expand All @@ -27,8 +29,8 @@
required=True, validators=[]),
Field(item=6, name='DATE_OF_BIRTH', type='string', startIndex=21, endIndex=29,
required=True, validators=[]),
Field(item=7, name='SSN', type='string', startIndex=29, endIndex=38,
required=True, validators=[]),
EncryptedField(decryption_func=ssp_ssn_decryption_func, item=7, name='SSN', type='string', startIndex=29,
endIndex=38, required=True, validators=[]),
Field(item=8, name='RACE_HISPANIC', type='number', startIndex=38, endIndex=39,
required=True, validators=[]),
Field(item=9, name='RACE_AMER_INDIAN', type='number', startIndex=39, endIndex=40,
Expand Down
Loading

0 comments on commit 8af6b10

Please sign in to comment.