Skip to content

Commit

Permalink
Added an integration test for extract_values -- it needs to wait for …
Browse files Browse the repository at this point in the history
…match and revise to be fully implementable, so it is a scaffold for now until #119 goes in
  • Loading branch information
mmcdermott committed Aug 7, 2024
1 parent 10fedf0 commit c463548
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 0 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ MEDS_transform-filter_patients = "MEDS_transforms.filters.filter_patients:main"
## Transforms
MEDS_transform-reorder_measurements = "MEDS_transforms.transforms.reorder_measurements:main"
MEDS_transform-add_time_derived_measurements = "MEDS_transforms.transforms.add_time_derived_measurements:main"
MEDS_transform-extract_values = "MEDS_transforms.transforms.extract_values:main"
MEDS_transform-normalization = "MEDS_transforms.transforms.normalization:main"
MEDS_transform-occlude_outliers = "MEDS_transforms.transforms.occlude_outliers:main"
MEDS_transform-tensorization = "MEDS_transforms.transforms.tensorization:main"
Expand Down
123 changes: 123 additions & 0 deletions tests/test_extract_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Tests the extract values script.
Set the bash env variable `DO_USE_LOCAL_SCRIPTS=1` to use the local py files, rather than the installed
scripts.
"""

from .transform_tester_base import EXTRACT_VALUES_SCRIPT, single_stage_transform_tester
from .utils import parse_meds_csvs

MEDS_TRAIN_0 = """
patient_id,time,code,numeric_value,text_value
239684,,EYE_COLOR//BROWN,,
239684,"12/28/1980, 00:00:00",DOB,,
239684,"05/11/2010, 17:41:51",BP,,"120/80"
1195293,,EYE_COLOR//BLUE,,
1195293,"06/20/1978, 00:00:00",DOB,,
1195293,"06/20/2010, 19:23:52",BP,,"144/96"
1195293,"06/20/2010, 19:23:52",HR,80,
1195293,"06/20/2010, 19:23:52",TEMP,,"100F"
"""

MEDS_TRAIN_1 = """
patient_id,time,code,numeric_value,text_value
68729,,EYE_COLOR//HAZEL,,
68729,"03/09/1978, 00:00:00",DOB,,
814703,"02/05/2010, 05:55:39",HR,170.2,
1195293,"06/20/2010, 19:23:52",TEMP,,"37C"
814703,,EYE_COLOR//HAZEL,,
814703,"03/28/1976, 00:00:00",DOB,,
814703,"02/05/2010, 05:55:39",HR,170.2,
"""

MEDS_TUNING_0 = """
patient_id,time,code,numeric_value,text_value
754281,,EYE_COLOR//BROWN,,
754281,"12/19/1988, 00:00:00",DOB,,
754281,"01/03/2010, 06:27:59",HR,142.0,
754281,"06/20/2010, 20:23:50",BP,,"134/76"
754281,"06/20/2010, 21:00:02",TEMP,,"36.2C"
"""

MEDS_HELD_OUT_0 = """
patient_id,time,code,numeric_value,text_value
1500733,,EYE_COLOR//BROWN,,
1500733,"07/20/1986, 00:00:00",DOB,,
1500733,"06/03/2010, 14:54:38",HR,91.4
1500733,"06/03/2010, 14:54:38",BP,,"123/82"
"""

INPUT_SHARDS = parse_meds_csvs(
{
"train/0": MEDS_TRAIN_0,
"train/1": MEDS_TRAIN_1,
"tuning/0": MEDS_TUNING_0,
"held_out/0": MEDS_HELD_OUT_0,
}
)

WANT_TRAIN_0 = """
patient_id,time,code,numeric_value,text_value
239684,,EYE_COLOR//BROWN,,
239684,"12/28/1980, 00:00:00",DOB,,
239684,"05/11/2010, 17:41:51",BP//SYSTOLIC,120,
239684,"05/11/2010, 17:41:51",BP//DIASTOLIC,80,
1195293,,EYE_COLOR//BLUE,,
1195293,"06/20/1978, 00:00:00",DOB,,
1195293,"06/20/2010, 19:23:52",BP//SYSTOLIC,144,
1195293,"06/20/2010, 19:23:52",BP//DIASTOLIC,96,
1195293,"06/20/2010, 19:23:52",HR,80,
1195293,"06/20/2010, 19:23:52",TEMP//F,100,
"""

WANT_TRAIN_1 = """
patient_id,time,code,numeric_value,text_value
68729,,EYE_COLOR//HAZEL,,
68729,"03/09/1978, 00:00:00",DOB,,
814703,"02/05/2010, 05:55:39",HR,170.2,
1195293,"06/20/2010, 19:23:52",TEMP//C,37,
814703,,EYE_COLOR//HAZEL,,
814703,"03/28/1976, 00:00:00",DOB,,
814703,"02/05/2010, 05:55:39",HR,170.2,
"""

WANT_TUNING_0 = """
patient_id,time,code,numeric_value,text_value
754281,,EYE_COLOR//BROWN,,
754281,"12/19/1988, 00:00:00",DOB,,
754281,"01/03/2010, 06:27:59",HR,142.0,
754281,"06/20/2010, 20:23:50",BP//SYSTOLIC,134,
754281,"06/20/2010, 20:23:50",BP//DIASTOLIC,76,
754281,"06/20/2010, 21:00:02",TEMP//C,36.2,
"""

WANT_HELD_OUT_0 = """
patient_id,time,code,numeric_value,text_value
1500733,,EYE_COLOR//BROWN,,
1500733,"07/20/1986, 00:00:00",DOB,,
1500733,"06/03/2010, 14:54:38",HR,91.4
1500733,"06/03/2010, 14:54:38",BP//SYSTOLIC,123,
1500733,"06/03/2010, 14:54:38",BP//DIASTOLIC,82,
"""

WANT_SHARDS = parse_meds_csvs(
{
"train/0": WANT_TRAIN_0,
"train/1": WANT_TRAIN_1,
"tuning/0": WANT_TUNING_0,
"held_out/0": WANT_HELD_OUT_0,
}
)


def test_extract_values():
single_stage_transform_tester(
transform_script=EXTRACT_VALUES_SCRIPT,
stage_name="extract_values",
transform_stage_kwargs={
# TODO
},
input_shards=INPUT_SHARDS,
want_outputs=WANT_SHARDS,
do_use_config_file=True,
)
2 changes: 2 additions & 0 deletions tests/transform_tester_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
# Transforms
ADD_TIME_DERIVED_MEASUREMENTS_SCRIPT = transforms_root / "add_time_derived_measurements.py"
REORDER_MEASUREMENTS_SCRIPT = transforms_root / "reorder_measurements.py"
EXTRACT_VALUES_SCRIPT = transforms_root / "extract_values.py"
NORMALIZATION_SCRIPT = transforms_root / "normalization.py"
OCCLUDE_OUTLIERS_SCRIPT = transforms_root / "occlude_outliers.py"
TENSORIZATION_SCRIPT = transforms_root / "tensorization.py"
Expand All @@ -49,6 +50,7 @@
# Transforms
ADD_TIME_DERIVED_MEASUREMENTS_SCRIPT = "MEDS_transform-add_time_derived_measurements"
REORDER_MEASUREMENTS_SCRIPT = "MEDS_transform-reorder_measurements"
EXTRACT_VALUES_SCRIPT = "MEDS_transform-extract_values"
NORMALIZATION_SCRIPT = "MEDS_transform-normalization"
OCCLUDE_OUTLIERS_SCRIPT = "MEDS_transform-occlude_outliers"
TENSORIZATION_SCRIPT = "MEDS_transform-tensorization"
Expand Down

0 comments on commit c463548

Please sign in to comment.