Skip to content

Commit

Permalink
feat(task): modify default behaviour for task compression argument.
Browse files Browse the repository at this point in the history
The expected behaviour of the compression argument was to overwrite
compression args if a dict with those args was provided, to disable
compression if something which evaluates to False was provided, and use
the default parameters for the container otherwise. The behaviour
previously would instead disable compression if no args were provided.
  • Loading branch information
ljgray committed Mar 27, 2023
1 parent 64f9f1a commit 83d4a55
Showing 1 changed file with 20 additions and 12 deletions.
32 changes: 20 additions & 12 deletions draco/core/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,13 @@ class SingleTask(MPILoggedTask, pipeline.BasicContMixin):
compression : dict or bool, optional
Set compression options for each dataset. Provided as a dict with the dataset
names as keys and values for `chunks`, `compression`, and `compression_opts`.
If set to `False`, chunks and compression will be disabled for all datasets.
Otherwise, the default parameters set in the dataset spec are used.
Note that this will modify these parameters on the container itself, such that
if it is written out again downstream in the pipeline these will be used.
Any datasets not included in the dict (including if the dict is empty), will use
the default parameters set in the dataset spec. If set to `False` (or anything
that evaluates to `False`, other than an empty dict) chunks and compression will
be disabled for all datasets. If no argument in provided, the default parameters
set in the dataset spec are used. Note that this will modify these parameters on
the container itself, such that if it is written out again downstream in the
pipeline these will be used.
output_root : string
Pipeline settable parameter giving the first part of the output path.
Deprecated in favour of `output_name`.
Expand Down Expand Up @@ -292,7 +295,7 @@ class SingleTask(MPILoggedTask, pipeline.BasicContMixin):
output_name = config.Property(default="{output_root}{tag}.h5", proptype=str)
output_format = config.file_format()
compression = config.Property(
default={}, proptype=lambda x: x if isinstance(x, dict) else bool(x)
default=True, proptype=lambda x: x if isinstance(x, dict) else bool(x)
)

nan_check = config.Property(default=True, proptype=bool)
Expand Down Expand Up @@ -446,12 +449,8 @@ def walk_dset_tree(grp, root=""):
datasets.append(root + key)
return datasets

if not self.compression:
for ds in walk_dset_tree(output):
output._data._storage_root[ds].chunks = None
output._data._storage_root[ds].compression = None
output._data._storage_root[ds].compression_opts = None
else:
if isinstance(self.compression, dict):
# We want to overwrite some compression settings
datasets = walk_dset_tree(output)
for ds in self.compression:
if ds in datasets:
Expand All @@ -461,7 +460,10 @@ def walk_dset_tree(grp, root=""):
)
setattr(output._data._storage_root[ds], key, val)
# shorthand for bitshuffle
if output[ds].compression in ("bitshuffle", fileformats.H5FILTER):
if output[ds].compression in (
"bitshuffle",
fileformats.H5FILTER,
):
output[ds].compression = fileformats.H5FILTER
if output[ds].compression_opts is None:
output._data._storage_root[ds].compression_opts = (
Expand All @@ -472,6 +474,12 @@ def walk_dset_tree(grp, root=""):
self.log.warning(
f"Ignoring config entry in `compression` for non-existing dataset `{ds}`"
)
elif not self.compression:
# Disable compression
for ds in walk_dset_tree(output):
output._data._storage_root[ds].chunks = None
output._data._storage_root[ds].compression = None
output._data._storage_root[ds].compression_opts = None

# Routine to write output if needed.
if self.save:
Expand Down

0 comments on commit 83d4a55

Please sign in to comment.