diff --git a/MIMIC-IV_Example/configs/event_configs.yaml b/MIMIC-IV_Example/configs/event_configs.yaml index 666bdd3..9d23bb6 100644 --- a/MIMIC-IV_Example/configs/event_configs.yaml +++ b/MIMIC-IV_Example/configs/event_configs.yaml @@ -165,8 +165,8 @@ hosp/procedures_icd: hosp/d_icd_procedures: description: "long_title" parent_codes: # List of objects are string labels mapping to filters to be evaluated. - - "ICD{icd_version}Proc/{icd_code}": { icd_version: 9 } - - "ICD{icd_version}PCS/{icd_code}": { icd_version: 10 } + - "ICD{icd_version}Proc/{icd_code}": { icd_version: "9" } + - "ICD{icd_version}PCS/{icd_code}": { icd_version: "10" } hosp/transfers: transfer: diff --git a/pyproject.toml b/pyproject.toml index a9b3292..ef35299 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "polars~=1.1.0", "pyarrow", "nested_ragged_tensors", "loguru", "hydra-core", "numpy", "meds==0.3.3", + "polars~=1.6.0", "pyarrow", "nested_ragged_tensors", "loguru", "hydra-core", "numpy", "meds==0.3.3", ] [tool.setuptools_scm] diff --git a/src/MEDS_transforms/extract/extract_code_metadata.py b/src/MEDS_transforms/extract/extract_code_metadata.py index 31d883c..3460cfb 100644 --- a/src/MEDS_transforms/extract/extract_code_metadata.py +++ b/src/MEDS_transforms/extract/extract_code_metadata.py @@ -386,7 +386,7 @@ def main(cfg: DictConfig): metadata_fp, read_fn = get_supported_fp(raw_input_dir, input_prefix) if metadata_fp.suffix != ".parquet": - read_fn = partial(read_fn, infer_schema_length=999999999) + read_fn = partial(read_fn, infer_schema=False) out_fp = partial_metadata_dir / f"{input_prefix}.parquet" logger.info(f"Extracting metadata from {metadata_fp} and saving to {out_fp}") diff --git a/src/MEDS_transforms/parser.py b/src/MEDS_transforms/parser.py index 948ca00..3b663f7 100644 --- a/src/MEDS_transforms/parser.py +++ b/src/MEDS_transforms/parser.py @@ -596,18 +596,6 @@ def cfg_to_expr(cfg: str | ListConfig | DictConfig) -> tuple[pl.Expr, set[str]]: ['34.2', 'bar//2', '34.2'] >>> sorted(cols) ['baz'] - - Note that sometimes coalescing can lead to unexpected results. For example, if the first expression is of - a different type than the second, the second expression may have its type coerced to match the first, - potentially in an unexpected manner. This is also related to some polars, bugs, such as - https://github.com/pola-rs/polars/issues/17773 - >>> cfg = [ - ... {"matcher": {"baz": 2}, "output": {"str": "bar//{baz}"}}, - ... {"literal": 34.8218}, - ... ] - >>> expr, cols = cfg_to_expr(cfg) - >>> data.select(expr.alias("out"))["out"].to_list() - ['34', 'bar//2', '34'] """ structured_expr = parse_col_expr(cfg) return structured_expr_to_pl(structured_expr)