Skip to content

Commit

Permalink
Merge pull request #191 from mmcdermott/190_retype_extracted_metadata
Browse files Browse the repository at this point in the history
Upgraded polars and set infer schema to false for metadata extraction.
  • Loading branch information
mmcdermott committed Aug 30, 2024
2 parents fa686e7 + a069965 commit 8def928
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 16 deletions.
4 changes: 2 additions & 2 deletions MIMIC-IV_Example/configs/event_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ hosp/procedures_icd:
hosp/d_icd_procedures:
description: "long_title"
parent_codes: # List of objects are string labels mapping to filters to be evaluated.
- "ICD{icd_version}Proc/{icd_code}": { icd_version: 9 }
- "ICD{icd_version}PCS/{icd_code}": { icd_version: 10 }
- "ICD{icd_version}Proc/{icd_code}": { icd_version: "9" }
- "ICD{icd_version}PCS/{icd_code}": { icd_version: "10" }

hosp/transfers:
transfer:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ classifiers = [
"Operating System :: OS Independent",
]
dependencies = [
"polars~=1.1.0", "pyarrow", "nested_ragged_tensors", "loguru", "hydra-core", "numpy", "meds==0.3.3",
"polars~=1.6.0", "pyarrow", "nested_ragged_tensors", "loguru", "hydra-core", "numpy", "meds==0.3.3",
]

[tool.setuptools_scm]
Expand Down
2 changes: 1 addition & 1 deletion src/MEDS_transforms/extract/extract_code_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def main(cfg: DictConfig):

metadata_fp, read_fn = get_supported_fp(raw_input_dir, input_prefix)
if metadata_fp.suffix != ".parquet":
read_fn = partial(read_fn, infer_schema_length=999999999)
read_fn = partial(read_fn, infer_schema=False)
out_fp = partial_metadata_dir / f"{input_prefix}.parquet"
logger.info(f"Extracting metadata from {metadata_fp} and saving to {out_fp}")

Expand Down
12 changes: 0 additions & 12 deletions src/MEDS_transforms/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,18 +596,6 @@ def cfg_to_expr(cfg: str | ListConfig | DictConfig) -> tuple[pl.Expr, set[str]]:
['34.2', 'bar//2', '34.2']
>>> sorted(cols)
['baz']
Note that sometimes coalescing can lead to unexpected results. For example, if the first expression is of
a different type than the second, the second expression may have its type coerced to match the first,
potentially in an unexpected manner. This is also related to some polars, bugs, such as
https://github.com/pola-rs/polars/issues/17773
>>> cfg = [
... {"matcher": {"baz": 2}, "output": {"str": "bar//{baz}"}},
... {"literal": 34.8218},
... ]
>>> expr, cols = cfg_to_expr(cfg)
>>> data.select(expr.alias("out"))["out"].to_list()
['34', 'bar//2', '34']
"""
structured_expr = parse_col_expr(cfg)
return structured_expr_to_pl(structured_expr)

0 comments on commit 8def928

Please sign in to comment.