From dc30f461ea979cc2bdc12802e56d26c9013e7eaf Mon Sep 17 00:00:00 2001 From: rvandewater Date: Wed, 18 Sep 2024 13:07:28 +0200 Subject: [PATCH] MIMIC changes --- MIMIC-IV_Example/README.md | 12 +++++++----- MIMIC-IV_Example/run.sh | 6 ++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/MIMIC-IV_Example/README.md b/MIMIC-IV_Example/README.md index dbfebf9..516a16d 100644 --- a/MIMIC-IV_Example/README.md +++ b/MIMIC-IV_Example/README.md @@ -18,8 +18,8 @@ If you want to profile the time and memory costs of your ETL, also install: `pip Set some environment variables and download the necessary files: ```bash export MIMICIV_RAW_DIR=??? # set to the directory in which you want to store the raw MIMIC-IV data -export MIMICIV_PRE_MEDS_DIR=??? # set to the directory in which you want to store the raw MIMIC-IV data -export MIMICIV_MEDS_COHORT_DIR=??? # set to the directory in which you want to store the raw MIMIC-IV data +export MIMICIV_PRE_MEDS_DIR=??? # set to the directory in which you want to store the intermediate MEDS MIMIC-IV data +export MIMICIV_MEDS_COHORT_DIR=??? # set to the directory in which you want to store the final MEDS MIMIC-IV data export VERSION=0.0.6 # or whatever version you want export URL="https://raw.githubusercontent.com/mmcdermott/MEDS_transforms/$VERSION/MIMIC-IV_Example" @@ -46,7 +46,7 @@ the root directory of where the resulting _core data files_ are stored -- e.g., ## Step 1.5: Download MIMIC-IV Metadata files ```bash -cd $MIMIC_RAW_DIR +cd $MIMICIV_RAW_DIR export MIMIC_URL=https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map wget $MIMIC_URL/d_labitems_to_loinc.csv wget $MIMIC_URL/inputevents_to_rxnorm.csv @@ -65,9 +65,11 @@ wget $MIMIC_URL/waveforms-summary.csv To run the MEDS ETL, run the following command: ```bash -./run.sh $MIMICIV_RAW_DIR $MIMICIV_PRE_MEDS_DIR $MIMICIV_MEDS_DIR do_unzip=true +./run.sh $MIMICIV_RAW_DIR $MIMICIV_PRE_MEDS_DIR $MIMICIV_MEDS_COHORT_DIR do_unzip=true ``` - +> [!NOTE] +> This can take up large amounts of memory if not parallelized. You can reduce the shard size to reduce memory usage by setting the `shard_size` parameter in the `extract_MIMIC.yaml` file. +> Check that your environment variables are set correctly. To not unzip the `.csv.gz` files, set `do_unzip=false` instead of `do_unzip=true`. To use a specific stage runner file (e.g., to set different parallelism options), you can specify it as an diff --git a/MIMIC-IV_Example/run.sh b/MIMIC-IV_Example/run.sh index 9c06c7e..e5c0513 100755 --- a/MIMIC-IV_Example/run.sh +++ b/MIMIC-IV_Example/run.sh @@ -35,6 +35,12 @@ if [ "$#" -lt 3 ]; then display_help fi +# Check that the do_unzip flag is not set as a positional argument +if [[ "$1" == "do_unzip=true" || "$1" == "do_unzip=false" || "$2" == "do_unzip=true" || "$2" == "do_unzip=false" || "$3" == "do_unzip=true" || "$3" == "do_unzip=false" ]]; then + echo "Error: Incorrect number of arguments provided. Check if your environment variables are set correctly." + display_help +fi + export MIMICIV_RAW_DIR=$1 export MIMICIV_PRE_MEDS_DIR=$2 export MIMICIV_MEDS_COHORT_DIR=$3