Skip to content

Commit

Permalink
Merge pull request #701 from dssg/kit_caps_test
Browse files Browse the repository at this point in the history
The pull request changes the functionality of the string_is_tablesafe validation primitive to only allow lowercase letters (or numbers, underscores) in strings it checks, as well as adding additional tests for feature aggregation prefixes and subset names, both of which will be used for table names.

As described in #632, uppercase letters in these experiment config values end up getting lowercased on table creation by referenced using their uppercase forms (with quotes) at various places in the code, causing postgres to return a "table does not exist" error.

This PR also removes a redundant/conflicting dev.txt requirement of different versions of black, keeping the newer version.
  • Loading branch information
thcrock authored May 24, 2019
2 parents 5342254 + f763558 commit 3638f49
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 7 deletions.
1 change: 0 additions & 1 deletion requirement/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,5 @@
bumpversion==0.5.3
mkdocs==1.0.4
pymdown-extensions==6.0
black==18.9b0
mkdocs-material
black==19.3b0
7 changes: 4 additions & 3 deletions src/tests/test_validation_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@

# test with a variety of strings based on letters and numbers auto-generated by hypothesis
# and also add a hardcoded example that includes underscores because those are fine
@given(text(alphabet=characters(whitelist_categories=('Lu', 'Ll', 'Nd')), min_size=1))
@given(text(alphabet=characters(whitelist_categories=('Ll', 'Nd')), min_size=1))
@example('a_valid_name')
def test_string_is_tablesafe(s):
assert string_is_tablesafe(s)


# test with a variety of strings based on unsafe characters auto-generated by hypothesis
# and also add a hardcoded example that should be bad because it has spaces
@given(text(alphabet='/ "'))
@example('Spaces are not valid')
@given(text(alphabet='/ "A'))
@example('spaces are not valid')
@example('Neither_are_CAPITAL_letters')
def test_string_is_not_tablesafe(s):
assert not string_is_tablesafe(s)
23 changes: 21 additions & 2 deletions src/triage/experiments/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,16 @@ def _validate_keys(self, aggregation_config):
)
)
)
if not string_is_tablesafe(aggregation_config['prefix']):
raise ValueError(
dedent(
f"""Section: feature_aggregations -
Feature aggregation prefix should only contain
lowercase letters, numbers, and underscores.
Aggregation config: {aggregation_config}
"""
)
)

def _validate_aggregates(self, aggregation_config):
if (
Expand Down Expand Up @@ -453,7 +463,7 @@ def _run(self, label_config):
)
if 'name' in label_config and not string_is_tablesafe(label_config['name']):
raise ValueError("Section: label_config - "
"name should only contain letters, numbers, and underscores")
"name should only contain lowercase letters, numbers, and underscores")
self._validate_query(label_config["query"])
self._validate_include_missing_labels_in_train_as(
label_config.get("include_missing_labels_in_train_as", None)
Expand Down Expand Up @@ -482,7 +492,7 @@ def _run(self, cohort_config):
)
if 'name' in cohort_config and not string_is_tablesafe(cohort_config['name']):
raise ValueError("Section: cohort_config - "
"name should only contain letters, numbers, and underscores")
"name should only contain lowercase letters, numbers, and underscores")
dated_query = query.replace("{as_of_date}", "2016-01-01")
logging.info("Validating cohort query")
try:
Expand Down Expand Up @@ -823,6 +833,15 @@ def _run(self, scoring_config):
"""
)
)
if not string_is_tablesafe(subset['name']):
raise ValueError(
dedent(
f"""Section: subsets -
The subset {subset} name should only contain
lowercase letters, numbers, and underscores
"""
)
)

# 2. Validate that query conforms to the expectations
if "{as_of_date}" not in subset["query"]:
Expand Down
2 changes: 1 addition & 1 deletion src/triage/validation_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,4 @@ def column_should_be_stringlike(table_name, column, db_engine):
def string_is_tablesafe(string):
if not string:
return False
return all(c.isalpha() or c.isdigit() or c == '_' for c in string)
return all((c.isalpha() and c.islower()) or c.isdigit() or c == '_' for c in string)

0 comments on commit 3638f49

Please sign in to comment.